pf.c revision 145030
1/*	$FreeBSD: head/sys/contrib/pf/net/pf.c 145030 2005-04-13 21:05:55Z glebius $	*/
2/*	$OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */
3/* add	$OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */
4/* add	$OpenBSD: pf.c,v 1.483 2005/03/15 17:38:43 dhartmei Exp $ */
5
6/*
7 * Copyright (c) 2001 Daniel Hartmeier
8 * Copyright (c) 2002,2003 Henning Brauer
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 *    - Redistributions of source code must retain the above copyright
16 *      notice, this list of conditions and the following disclaimer.
17 *    - Redistributions in binary form must reproduce the above
18 *      copyright notice, this list of conditions and the following
19 *      disclaimer in the documentation and/or other materials provided
20 *      with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * Effort sponsored in part by the Defense Advanced Research Projects
36 * Agency (DARPA) and Air Force Research Laboratory, Air Force
37 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
38 *
39 */
40
41#ifdef __FreeBSD__
42#include "opt_inet.h"
43#include "opt_inet6.h"
44#endif
45
46#ifdef __FreeBSD__
47#include "opt_bpf.h"
48#include "opt_pf.h"
49#define	NBPFILTER	DEV_BPF
50#define	NPFLOG		DEV_PFLOG
51#define	NPFSYNC		DEV_PFSYNC
52#else
53#include "bpfilter.h"
54#include "pflog.h"
55#include "pfsync.h"
56#endif
57
58#include <sys/param.h>
59#include <sys/systm.h>
60#include <sys/mbuf.h>
61#include <sys/filio.h>
62#include <sys/socket.h>
63#include <sys/socketvar.h>
64#include <sys/kernel.h>
65#include <sys/time.h>
66#ifdef __FreeBSD__
67#include <sys/sysctl.h>
68#include <sys/endian.h>
69#else
70#include <sys/pool.h>
71#endif
72
73#include <net/if.h>
74#include <net/if_types.h>
75#include <net/bpf.h>
76#include <net/route.h>
77
78#include <netinet/in.h>
79#include <netinet/in_var.h>
80#include <netinet/in_systm.h>
81#include <netinet/ip.h>
82#include <netinet/ip_var.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_seq.h>
85#include <netinet/udp.h>
86#include <netinet/ip_icmp.h>
87#include <netinet/in_pcb.h>
88#include <netinet/tcp_timer.h>
89#include <netinet/tcp_var.h>
90#include <netinet/udp_var.h>
91#include <netinet/icmp_var.h>
92
93#ifndef __FreeBSD__
94#include <dev/rndvar.h>
95#endif
96#include <net/pfvar.h>
97#include <net/if_pflog.h>
98
99#if NPFSYNC > 0
100#include <net/if_pfsync.h>
101#endif /* NPFSYNC > 0 */
102
103#ifdef INET6
104#include <netinet/ip6.h>
105#include <netinet/in_pcb.h>
106#include <netinet/icmp6.h>
107#include <netinet6/nd6.h>
108#ifdef __FreeBSD__
109#include <netinet6/ip6_var.h>
110#include <netinet6/in6_pcb.h>
111#endif
112#endif /* INET6 */
113
114#ifdef __FreeBSD__
115#include <machine/in_cksum.h>
116#include <sys/limits.h>
117#include <sys/ucred.h>
118
119extern int ip_optcopy(struct ip *, struct ip *);
120#endif
121
122#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
123
124/*
125 * Global variables
126 */
127
128struct pf_anchorqueue	 pf_anchors;
129struct pf_ruleset	 pf_main_ruleset;
130struct pf_altqqueue	 pf_altqs[2];
131struct pf_palist	 pf_pabuf;
132struct pf_altqqueue	*pf_altqs_active;
133struct pf_altqqueue	*pf_altqs_inactive;
134struct pf_status	 pf_status;
135
136u_int32_t		 ticket_altqs_active;
137u_int32_t		 ticket_altqs_inactive;
138int			 altqs_inactive_open;
139u_int32_t		 ticket_pabuf;
140
141#ifdef __FreeBSD__
142struct callout	 	 pf_expire_to;			/* expire timeout */
143#else
144struct timeout		 pf_expire_to;			/* expire timeout */
145#endif
146
147
148#ifdef __FreeBSD__
149uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
150uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
151#else
152struct pool		 pf_src_tree_pl, pf_rule_pl;
153struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
154#endif
155
156void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
157void			 pf_print_state(struct pf_state *);
158void			 pf_print_flags(u_int8_t);
159
160void			 pf_change_ap(struct pf_addr *, u_int16_t *,
161			    u_int16_t *, u_int16_t *, struct pf_addr *,
162			    u_int16_t, u_int8_t, sa_family_t);
163#ifdef INET6
164void			 pf_change_a6(struct pf_addr *, u_int16_t *,
165			    struct pf_addr *, u_int8_t);
166#endif /* INET6 */
167void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
168			    struct pf_addr *, struct pf_addr *, u_int16_t,
169			    u_int16_t *, u_int16_t *, u_int16_t *,
170			    u_int16_t *, u_int8_t, sa_family_t);
171void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
172			    const struct pf_addr *, const struct pf_addr *,
173			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
174			    u_int8_t, u_int16_t, u_int16_t, u_int8_t);
175void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
176			    sa_family_t, struct pf_rule *);
177struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
178			    int, int, struct pfi_kif *,
179			    struct pf_addr *, u_int16_t, struct pf_addr *,
180			    u_int16_t, int);
181struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
182			    int, int, struct pfi_kif *, struct pf_src_node **,
183			    struct pf_addr *, u_int16_t,
184			    struct pf_addr *, u_int16_t,
185			    struct pf_addr *, u_int16_t *);
186int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
187			    int, struct pfi_kif *, struct mbuf *, int,
188			    void *, struct pf_pdesc *, struct pf_rule **,
189#ifdef __FreeBSD__
190			    struct pf_ruleset **, struct inpcb *);
191#else
192			    struct pf_ruleset **);
193#endif
194int			 pf_test_udp(struct pf_rule **, struct pf_state **,
195			    int, struct pfi_kif *, struct mbuf *, int,
196			    void *, struct pf_pdesc *, struct pf_rule **,
197#ifdef __FreeBSD__
198			    struct pf_ruleset **, struct inpcb *);
199#else
200			    struct pf_ruleset **);
201#endif
202int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
203			    int, struct pfi_kif *, struct mbuf *, int,
204			    void *, struct pf_pdesc *, struct pf_rule **,
205			    struct pf_ruleset **);
206int			 pf_test_other(struct pf_rule **, struct pf_state **,
207			    int, struct pfi_kif *, struct mbuf *, int, void *,
208			    struct pf_pdesc *, struct pf_rule **,
209			    struct pf_ruleset **);
210int			 pf_test_fragment(struct pf_rule **, int,
211			    struct pfi_kif *, struct mbuf *, void *,
212			    struct pf_pdesc *, struct pf_rule **,
213			    struct pf_ruleset **);
214int			 pf_test_state_tcp(struct pf_state **, int,
215			    struct pfi_kif *, struct mbuf *, int,
216			    void *, struct pf_pdesc *, u_short *);
217int			 pf_test_state_udp(struct pf_state **, int,
218			    struct pfi_kif *, struct mbuf *, int,
219			    void *, struct pf_pdesc *);
220int			 pf_test_state_icmp(struct pf_state **, int,
221			    struct pfi_kif *, struct mbuf *, int,
222			    void *, struct pf_pdesc *);
223int			 pf_test_state_other(struct pf_state **, int,
224			    struct pfi_kif *, struct pf_pdesc *);
225struct pf_tag		*pf_get_tag(struct mbuf *);
226int			 pf_match_tag(struct mbuf *, struct pf_rule *,
227			     struct pf_rule *, struct pf_tag *, int *);
228void			 pf_hash(struct pf_addr *, struct pf_addr *,
229			    struct pf_poolhashkey *, sa_family_t);
230int			 pf_map_addr(u_int8_t, struct pf_rule *,
231			    struct pf_addr *, struct pf_addr *,
232			    struct pf_addr *, struct pf_src_node **);
233int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
234			    struct pf_addr *, struct pf_addr *, u_int16_t,
235			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
236			    struct pf_src_node **);
237void			 pf_route(struct mbuf **, struct pf_rule *, int,
238			    struct ifnet *, struct pf_state *);
239void			 pf_route6(struct mbuf **, struct pf_rule *, int,
240			    struct ifnet *, struct pf_state *);
241#ifdef __FreeBSD__
242int			 pf_socket_lookup(uid_t *, gid_t *,
243			    int, struct pf_pdesc *, struct inpcb *);
244#else
245int			 pf_socket_lookup(uid_t *, gid_t *,
246			    int, struct pf_pdesc *);
247#endif
248u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
249			    sa_family_t);
250u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
251			    sa_family_t);
252u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
253				u_int16_t);
254void			 pf_set_rt_ifp(struct pf_state *,
255			    struct pf_addr *);
256int			 pf_check_proto_cksum(struct mbuf *, int, int,
257			    u_int8_t, sa_family_t);
258int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
259			    struct pf_addr_wrap *);
260static int		 pf_add_mbuf_tag(struct mbuf *, u_int);
261struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
262			    struct pf_state *, u_int8_t);
263
264#ifdef __FreeBSD__
265int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
266
267struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
268#else
269struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
270	{ &pf_state_pl, PFSTATE_HIWAT },
271	{ &pf_src_tree_pl, PFSNODE_HIWAT },
272	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT }
273};
274#endif
275
276#define STATE_LOOKUP()							\
277	do {								\
278		if (direction == PF_IN)					\
279			*state = pf_find_state_recurse(		\
280			    kif, &key, PF_EXT_GWY);			\
281		else							\
282			*state = pf_find_state_recurse(		\
283			    kif, &key, PF_LAN_EXT);			\
284		if (*state == NULL)					\
285			return (PF_DROP);				\
286		if (direction == PF_OUT &&				\
287		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
288		    (*state)->rule.ptr->direction == PF_OUT) ||		\
289		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
290		    (*state)->rule.ptr->direction == PF_IN)) &&		\
291		    (*state)->rt_kif != NULL &&				\
292		    (*state)->rt_kif != kif)				\
293			return (PF_PASS);				\
294	} while (0)
295
296#define	STATE_TRANSLATE(s) \
297	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
298	((s)->af == AF_INET6 && \
299	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
300	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
301	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
302	(s)->lan.port != (s)->gwy.port
303
304#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) :   \
305	((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent :	       \
306	(k)->pfik_parent->pfik_parent)
307
308#ifndef __FreeBSD__
309static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
310static __inline int pf_state_compare_lan_ext(struct pf_state *,
311	struct pf_state *);
312static __inline int pf_state_compare_ext_gwy(struct pf_state *,
313	struct pf_state *);
314static __inline int pf_state_compare_id(struct pf_state *,
315	struct pf_state *);
316#else
317static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
318static int pf_state_compare_lan_ext(struct pf_state *,
319	struct pf_state *);
320static int pf_state_compare_ext_gwy(struct pf_state *,
321	struct pf_state *);
322static int pf_state_compare_id(struct pf_state *,
323	struct pf_state *);
324#endif
325
326struct pf_src_tree tree_src_tracking;
327
328struct pf_state_tree_id tree_id;
329struct pf_state_queue state_updates;
330
331RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
332RB_GENERATE(pf_state_tree_lan_ext, pf_state,
333    u.s.entry_lan_ext, pf_state_compare_lan_ext);
334RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
335    u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
336RB_GENERATE(pf_state_tree_id, pf_state,
337    u.s.entry_id, pf_state_compare_id);
338
339#ifdef __FreeBSD__
340static int
341#else
342static __inline int
343#endif
344pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
345{
346	int	diff;
347
348	if (a->rule.ptr > b->rule.ptr)
349		return (1);
350	if (a->rule.ptr < b->rule.ptr)
351		return (-1);
352	if ((diff = a->af - b->af) != 0)
353		return (diff);
354	switch (a->af) {
355#ifdef INET
356	case AF_INET:
357		if (a->addr.addr32[0] > b->addr.addr32[0])
358			return (1);
359		if (a->addr.addr32[0] < b->addr.addr32[0])
360			return (-1);
361		break;
362#endif /* INET */
363#ifdef INET6
364	case AF_INET6:
365		if (a->addr.addr32[3] > b->addr.addr32[3])
366			return (1);
367		if (a->addr.addr32[3] < b->addr.addr32[3])
368			return (-1);
369		if (a->addr.addr32[2] > b->addr.addr32[2])
370			return (1);
371		if (a->addr.addr32[2] < b->addr.addr32[2])
372			return (-1);
373		if (a->addr.addr32[1] > b->addr.addr32[1])
374			return (1);
375		if (a->addr.addr32[1] < b->addr.addr32[1])
376			return (-1);
377		if (a->addr.addr32[0] > b->addr.addr32[0])
378			return (1);
379		if (a->addr.addr32[0] < b->addr.addr32[0])
380			return (-1);
381		break;
382#endif /* INET6 */
383	}
384	return (0);
385}
386
387#ifdef __FreeBSD__
388static int
389#else
390static __inline int
391#endif
392pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
393{
394	int	diff;
395
396	if ((diff = a->proto - b->proto) != 0)
397		return (diff);
398	if ((diff = a->af - b->af) != 0)
399		return (diff);
400	switch (a->af) {
401#ifdef INET
402	case AF_INET:
403		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
404			return (1);
405		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
406			return (-1);
407		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
408			return (1);
409		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
410			return (-1);
411		break;
412#endif /* INET */
413#ifdef INET6
414	case AF_INET6:
415		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
416			return (1);
417		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
418			return (-1);
419		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
420			return (1);
421		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
422			return (-1);
423		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
424			return (1);
425		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
426			return (-1);
427		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
428			return (1);
429		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
430			return (-1);
431		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
432			return (1);
433		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
434			return (-1);
435		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
436			return (1);
437		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
438			return (-1);
439		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
440			return (1);
441		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
442			return (-1);
443		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
444			return (1);
445		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
446			return (-1);
447		break;
448#endif /* INET6 */
449	}
450
451	if ((diff = a->lan.port - b->lan.port) != 0)
452		return (diff);
453	if ((diff = a->ext.port - b->ext.port) != 0)
454		return (diff);
455
456	return (0);
457}
458
459#ifdef __FreeBSD__
460static int
461#else
462static __inline int
463#endif
464pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
465{
466	int	diff;
467
468	if ((diff = a->proto - b->proto) != 0)
469		return (diff);
470	if ((diff = a->af - b->af) != 0)
471		return (diff);
472	switch (a->af) {
473#ifdef INET
474	case AF_INET:
475		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
476			return (1);
477		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
478			return (-1);
479		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
480			return (1);
481		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
482			return (-1);
483		break;
484#endif /* INET */
485#ifdef INET6
486	case AF_INET6:
487		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
488			return (1);
489		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
490			return (-1);
491		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
492			return (1);
493		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
494			return (-1);
495		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
496			return (1);
497		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
498			return (-1);
499		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
500			return (1);
501		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
502			return (-1);
503		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
504			return (1);
505		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
506			return (-1);
507		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
508			return (1);
509		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
510			return (-1);
511		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
512			return (1);
513		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
514			return (-1);
515		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
516			return (1);
517		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
518			return (-1);
519		break;
520#endif /* INET6 */
521	}
522
523	if ((diff = a->ext.port - b->ext.port) != 0)
524		return (diff);
525	if ((diff = a->gwy.port - b->gwy.port) != 0)
526		return (diff);
527
528	return (0);
529}
530
531#ifdef __FreeBSD__
532static int
533#else
534static __inline int
535#endif
536pf_state_compare_id(struct pf_state *a, struct pf_state *b)
537{
538	if (a->id > b->id)
539		return (1);
540	if (a->id < b->id)
541		return (-1);
542	if (a->creatorid > b->creatorid)
543		return (1);
544	if (a->creatorid < b->creatorid)
545		return (-1);
546
547	return (0);
548}
549
550#ifdef INET6
551void
552pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
553{
554	switch (af) {
555#ifdef INET
556	case AF_INET:
557		dst->addr32[0] = src->addr32[0];
558		break;
559#endif /* INET */
560	case AF_INET6:
561		dst->addr32[0] = src->addr32[0];
562		dst->addr32[1] = src->addr32[1];
563		dst->addr32[2] = src->addr32[2];
564		dst->addr32[3] = src->addr32[3];
565		break;
566	}
567}
568#endif
569
570struct pf_state *
571pf_find_state_byid(struct pf_state *key)
572{
573	pf_status.fcounters[FCNT_STATE_SEARCH]++;
574	return (RB_FIND(pf_state_tree_id, &tree_id, key));
575}
576
577struct pf_state *
578pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree)
579{
580	struct pf_state *s;
581
582	pf_status.fcounters[FCNT_STATE_SEARCH]++;
583
584	switch (tree) {
585	case PF_LAN_EXT:
586		for (; kif != NULL; kif = kif->pfik_parent) {
587			s = RB_FIND(pf_state_tree_lan_ext,
588			    &kif->pfik_lan_ext, key);
589			if (s != NULL)
590				return (s);
591		}
592		return (NULL);
593	case PF_EXT_GWY:
594		for (; kif != NULL; kif = kif->pfik_parent) {
595			s = RB_FIND(pf_state_tree_ext_gwy,
596			    &kif->pfik_ext_gwy, key);
597			if (s != NULL)
598				return (s);
599		}
600		return (NULL);
601	default:
602		panic("pf_find_state_recurse");
603	}
604}
605
606struct pf_state *
607pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more)
608{
609	struct pf_state *s, *ss = NULL;
610	struct pfi_kif	*kif;
611
612	pf_status.fcounters[FCNT_STATE_SEARCH]++;
613
614	switch (tree) {
615	case PF_LAN_EXT:
616		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
617			s = RB_FIND(pf_state_tree_lan_ext,
618			    &kif->pfik_lan_ext, key);
619			if (s == NULL)
620				continue;
621			if (more == NULL)
622				return (s);
623			ss = s;
624			(*more)++;
625		}
626		return (ss);
627	case PF_EXT_GWY:
628		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
629			s = RB_FIND(pf_state_tree_ext_gwy,
630			    &kif->pfik_ext_gwy, key);
631			if (s == NULL)
632				continue;
633			if (more == NULL)
634				return (s);
635			ss = s;
636			(*more)++;
637		}
638		return (ss);
639	default:
640		panic("pf_find_state_all");
641	}
642}
643
644int
645pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
646    struct pf_addr *src, sa_family_t af)
647{
648	struct pf_src_node	k;
649
650	if (*sn == NULL) {
651		k.af = af;
652		PF_ACPY(&k.addr, src, af);
653		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
654		    rule->rpool.opts & PF_POOL_STICKYADDR)
655			k.rule.ptr = rule;
656		else
657			k.rule.ptr = NULL;
658		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
659		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
660	}
661	if (*sn == NULL) {
662		if (!rule->max_src_nodes ||
663		    rule->src_nodes < rule->max_src_nodes)
664			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
665		if ((*sn) == NULL)
666			return (-1);
667		bzero(*sn, sizeof(struct pf_src_node));
668		(*sn)->af = af;
669		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
670		    rule->rpool.opts & PF_POOL_STICKYADDR)
671			(*sn)->rule.ptr = rule;
672		else
673			(*sn)->rule.ptr = NULL;
674		PF_ACPY(&(*sn)->addr, src, af);
675		if (RB_INSERT(pf_src_tree,
676		    &tree_src_tracking, *sn) != NULL) {
677			if (pf_status.debug >= PF_DEBUG_MISC) {
678				printf("pf: src_tree insert failed: ");
679				pf_print_host(&(*sn)->addr, 0, af);
680				printf("\n");
681			}
682			pool_put(&pf_src_tree_pl, *sn);
683			return (-1);
684		}
685#ifdef __FreeBSD__
686		(*sn)->creation = time_second;
687#else
688		(*sn)->creation = time.tv_sec;
689#endif
690		(*sn)->ruletype = rule->action;
691		if ((*sn)->rule.ptr != NULL)
692			(*sn)->rule.ptr->src_nodes++;
693		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
694		pf_status.src_nodes++;
695	} else {
696		if (rule->max_src_states &&
697		    (*sn)->states >= rule->max_src_states)
698			return (-1);
699	}
700	return (0);
701}
702
703int
704pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
705{
706	/* Thou MUST NOT insert multiple duplicate keys */
707	state->u.s.kif = kif;
708	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
709		if (pf_status.debug >= PF_DEBUG_MISC) {
710			printf("pf: state insert failed: tree_lan_ext");
711			printf(" lan: ");
712			pf_print_host(&state->lan.addr, state->lan.port,
713			    state->af);
714			printf(" gwy: ");
715			pf_print_host(&state->gwy.addr, state->gwy.port,
716			    state->af);
717			printf(" ext: ");
718			pf_print_host(&state->ext.addr, state->ext.port,
719			    state->af);
720			if (state->sync_flags & PFSTATE_FROMSYNC)
721				printf(" (from sync)");
722			printf("\n");
723		}
724		return (-1);
725	}
726
727	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
728		if (pf_status.debug >= PF_DEBUG_MISC) {
729			printf("pf: state insert failed: tree_ext_gwy");
730			printf(" lan: ");
731			pf_print_host(&state->lan.addr, state->lan.port,
732			    state->af);
733			printf(" gwy: ");
734			pf_print_host(&state->gwy.addr, state->gwy.port,
735			    state->af);
736			printf(" ext: ");
737			pf_print_host(&state->ext.addr, state->ext.port,
738			    state->af);
739			if (state->sync_flags & PFSTATE_FROMSYNC)
740				printf(" (from sync)");
741			printf("\n");
742		}
743		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
744		return (-1);
745	}
746
747	if (state->id == 0 && state->creatorid == 0) {
748		state->id = htobe64(pf_status.stateid++);
749		state->creatorid = pf_status.hostid;
750	}
751	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
752		if (pf_status.debug >= PF_DEBUG_MISC) {
753#ifdef __FreeBSD__
754			printf("pf: state insert failed: "
755			    "id: %016llx creatorid: %08x",
756			    (long long)be64toh(state->id),
757			    ntohl(state->creatorid));
758#else
759			printf("pf: state insert failed: "
760			    "id: %016llx creatorid: %08x",
761			    betoh64(state->id), ntohl(state->creatorid));
762#endif
763			if (state->sync_flags & PFSTATE_FROMSYNC)
764				printf(" (from sync)");
765			printf("\n");
766		}
767		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
768		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
769		return (-1);
770	}
771	TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
772
773	pf_status.fcounters[FCNT_STATE_INSERT]++;
774	pf_status.states++;
775	pfi_attach_state(kif);
776#if NPFSYNC
777	pfsync_insert_state(state);
778#endif
779	return (0);
780}
781
782void
783pf_purge_timeout(void *arg)
784{
785#ifdef __FreeBSD__
786	struct callout  *to = arg;
787#else
788	struct timeout	*to = arg;
789#endif
790	int		 s;
791
792#ifdef __FreeBSD__
793	PF_LOCK();
794#endif
795	s = splsoftnet();
796	pf_purge_expired_states();
797	pf_purge_expired_fragments();
798	pf_purge_expired_src_nodes();
799	splx(s);
800#ifdef __FreeBSD__
801	PF_UNLOCK();
802#endif
803
804#ifdef __FreeBSD__
805	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
806	    pf_purge_timeout, to);
807#else
808	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
809#endif
810}
811
812u_int32_t
813pf_state_expires(const struct pf_state *state)
814{
815	u_int32_t	timeout;
816	u_int32_t	start;
817	u_int32_t	end;
818	u_int32_t	states;
819
820	/* handle all PFTM_* > PFTM_MAX here */
821	if (state->timeout == PFTM_PURGE)
822#ifdef __FreeBSD__
823		return (time_second);
824#else
825		return (time.tv_sec);
826#endif
827	if (state->timeout == PFTM_UNTIL_PACKET)
828		return (0);
829#ifdef __FreeBSD__
830	KASSERT((state->timeout < PFTM_MAX),
831	    ("pf_state_expires: timeout > PFTM_MAX"));
832#else
833	KASSERT(state->timeout < PFTM_MAX);
834#endif
835	timeout = state->rule.ptr->timeout[state->timeout];
836	if (!timeout)
837		timeout = pf_default_rule.timeout[state->timeout];
838	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
839	if (start) {
840		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
841		states = state->rule.ptr->states;
842	} else {
843		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
844		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
845		states = pf_status.states;
846	}
847	if (end && states > start && start < end) {
848		if (states < end)
849			return (state->expire + timeout * (end - states) /
850			    (end - start));
851		else
852#ifdef __FreeBSD__
853			return (time_second);
854#else
855			return (time.tv_sec);
856#endif
857	}
858	return (state->expire + timeout);
859}
860
861void
862pf_purge_expired_src_nodes(void)
863{
864	 struct pf_src_node		*cur, *next;
865
866	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
867		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
868
869#ifdef __FreeBSD__
870		 if (cur->states <= 0 && cur->expire <= time_second) {
871#else
872		 if (cur->states <= 0 && cur->expire <= time.tv_sec) {
873#endif
874			 if (cur->rule.ptr != NULL) {
875				 cur->rule.ptr->src_nodes--;
876				 if (cur->rule.ptr->states <= 0 &&
877				     cur->rule.ptr->max_src_nodes <= 0)
878					 pf_rm_rule(NULL, cur->rule.ptr);
879			 }
880			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
881			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
882			 pf_status.src_nodes--;
883			 pool_put(&pf_src_tree_pl, cur);
884		 }
885	 }
886}
887
888void
889pf_src_tree_remove_state(struct pf_state *s)
890{
891	u_int32_t timeout;
892
893	if (s->src_node != NULL) {
894		if (--s->src_node->states <= 0) {
895			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
896			if (!timeout)
897				timeout =
898				    pf_default_rule.timeout[PFTM_SRC_NODE];
899#ifdef __FreeBSD__
900			s->src_node->expire = time_second + timeout;
901#else
902			s->src_node->expire = time.tv_sec + timeout;
903#endif
904		}
905	}
906	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
907		if (--s->nat_src_node->states <= 0) {
908			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
909			if (!timeout)
910				timeout =
911				    pf_default_rule.timeout[PFTM_SRC_NODE];
912#ifdef __FreeBSD__
913			s->nat_src_node->expire = time_second + timeout;
914#else
915			s->nat_src_node->expire = time.tv_sec + timeout;
916#endif
917		}
918	}
919	s->src_node = s->nat_src_node = NULL;
920}
921
922void
923pf_purge_expired_states(void)
924{
925	struct pf_state		*cur, *next;
926
927	for (cur = RB_MIN(pf_state_tree_id, &tree_id);
928	    cur; cur = next) {
929		next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
930
931#ifdef __FreeBSD__
932		if (pf_state_expires(cur) <= time_second) {
933#else
934		if (pf_state_expires(cur) <= time.tv_sec) {
935#endif
936			if (cur->src.state == PF_TCPS_PROXY_DST)
937				pf_send_tcp(cur->rule.ptr, cur->af,
938				    &cur->ext.addr, &cur->lan.addr,
939				    cur->ext.port, cur->lan.port,
940				    cur->src.seqhi, cur->src.seqlo + 1, 0,
941				    TH_RST|TH_ACK, 0, 0);
942			RB_REMOVE(pf_state_tree_ext_gwy,
943			    &cur->u.s.kif->pfik_ext_gwy, cur);
944			RB_REMOVE(pf_state_tree_lan_ext,
945			    &cur->u.s.kif->pfik_lan_ext, cur);
946			RB_REMOVE(pf_state_tree_id, &tree_id, cur);
947#if NPFSYNC
948			pfsync_delete_state(cur);
949#endif
950			pf_src_tree_remove_state(cur);
951			if (--cur->rule.ptr->states <= 0 &&
952			    cur->rule.ptr->src_nodes <= 0)
953				pf_rm_rule(NULL, cur->rule.ptr);
954			if (cur->nat_rule.ptr != NULL)
955				if (--cur->nat_rule.ptr->states <= 0 &&
956					cur->nat_rule.ptr->src_nodes <= 0)
957					pf_rm_rule(NULL, cur->nat_rule.ptr);
958			if (cur->anchor.ptr != NULL)
959				if (--cur->anchor.ptr->states <= 0)
960					pf_rm_rule(NULL, cur->anchor.ptr);
961			pf_normalize_tcp_cleanup(cur);
962			pfi_detach_state(cur->u.s.kif);
963			TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
964			pool_put(&pf_state_pl, cur);
965			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
966			pf_status.states--;
967		}
968	}
969}
970
971int
972pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
973{
974	if (aw->type != PF_ADDR_TABLE)
975		return (0);
976	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
977		return (1);
978	return (0);
979}
980
981void
982pf_tbladdr_remove(struct pf_addr_wrap *aw)
983{
984	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
985		return;
986	pfr_detach_table(aw->p.tbl);
987	aw->p.tbl = NULL;
988}
989
990void
991pf_tbladdr_copyout(struct pf_addr_wrap *aw)
992{
993	struct pfr_ktable *kt = aw->p.tbl;
994
995	if (aw->type != PF_ADDR_TABLE || kt == NULL)
996		return;
997	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
998		kt = kt->pfrkt_root;
999	aw->p.tbl = NULL;
1000	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1001		kt->pfrkt_cnt : -1;
1002}
1003
1004void
1005pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1006{
1007	switch (af) {
1008#ifdef INET
1009	case AF_INET: {
1010		u_int32_t a = ntohl(addr->addr32[0]);
1011		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1012		    (a>>8)&255, a&255);
1013		if (p) {
1014			p = ntohs(p);
1015			printf(":%u", p);
1016		}
1017		break;
1018	}
1019#endif /* INET */
1020#ifdef INET6
1021	case AF_INET6: {
1022		u_int16_t b;
1023		u_int8_t i, curstart = 255, curend = 0,
1024		    maxstart = 0, maxend = 0;
1025		for (i = 0; i < 8; i++) {
1026			if (!addr->addr16[i]) {
1027				if (curstart == 255)
1028					curstart = i;
1029				else
1030					curend = i;
1031			} else {
1032				if (curstart) {
1033					if ((curend - curstart) >
1034					    (maxend - maxstart)) {
1035						maxstart = curstart;
1036						maxend = curend;
1037						curstart = 255;
1038					}
1039				}
1040			}
1041		}
1042		for (i = 0; i < 8; i++) {
1043			if (i >= maxstart && i <= maxend) {
1044				if (maxend != 7) {
1045					if (i == maxstart)
1046						printf(":");
1047				} else {
1048					if (i == maxend)
1049						printf(":");
1050				}
1051			} else {
1052				b = ntohs(addr->addr16[i]);
1053				printf("%x", b);
1054				if (i < 7)
1055					printf(":");
1056			}
1057		}
1058		if (p) {
1059			p = ntohs(p);
1060			printf("[%u]", p);
1061		}
1062		break;
1063	}
1064#endif /* INET6 */
1065	}
1066}
1067
1068void
1069pf_print_state(struct pf_state *s)
1070{
1071	switch (s->proto) {
1072	case IPPROTO_TCP:
1073		printf("TCP ");
1074		break;
1075	case IPPROTO_UDP:
1076		printf("UDP ");
1077		break;
1078	case IPPROTO_ICMP:
1079		printf("ICMP ");
1080		break;
1081	case IPPROTO_ICMPV6:
1082		printf("ICMPV6 ");
1083		break;
1084	default:
1085		printf("%u ", s->proto);
1086		break;
1087	}
1088	pf_print_host(&s->lan.addr, s->lan.port, s->af);
1089	printf(" ");
1090	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
1091	printf(" ");
1092	pf_print_host(&s->ext.addr, s->ext.port, s->af);
1093	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1094	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1095	if (s->src.wscale && s->dst.wscale)
1096		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1097	printf("]");
1098	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1099	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1100	if (s->src.wscale && s->dst.wscale)
1101		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1102	printf("]");
1103	printf(" %u:%u", s->src.state, s->dst.state);
1104}
1105
1106void
1107pf_print_flags(u_int8_t f)
1108{
1109	if (f)
1110		printf(" ");
1111	if (f & TH_FIN)
1112		printf("F");
1113	if (f & TH_SYN)
1114		printf("S");
1115	if (f & TH_RST)
1116		printf("R");
1117	if (f & TH_PUSH)
1118		printf("P");
1119	if (f & TH_ACK)
1120		printf("A");
1121	if (f & TH_URG)
1122		printf("U");
1123	if (f & TH_ECE)
1124		printf("E");
1125	if (f & TH_CWR)
1126		printf("W");
1127}
1128
1129#define	PF_SET_SKIP_STEPS(i)					\
1130	do {							\
1131		while (head[i] != cur) {			\
1132			head[i]->skip[i].ptr = cur;		\
1133			head[i] = TAILQ_NEXT(head[i], entries);	\
1134		}						\
1135	} while (0)
1136
1137void
1138pf_calc_skip_steps(struct pf_rulequeue *rules)
1139{
1140	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1141	int i;
1142
1143	cur = TAILQ_FIRST(rules);
1144	prev = cur;
1145	for (i = 0; i < PF_SKIP_COUNT; ++i)
1146		head[i] = cur;
1147	while (cur != NULL) {
1148
1149		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1150			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1151		if (cur->direction != prev->direction)
1152			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1153		if (cur->af != prev->af)
1154			PF_SET_SKIP_STEPS(PF_SKIP_AF);
1155		if (cur->proto != prev->proto)
1156			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1157		if (cur->src.not != prev->src.not ||
1158		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1159			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1160		if (cur->src.port[0] != prev->src.port[0] ||
1161		    cur->src.port[1] != prev->src.port[1] ||
1162		    cur->src.port_op != prev->src.port_op)
1163			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1164		if (cur->dst.not != prev->dst.not ||
1165		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1166			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1167		if (cur->dst.port[0] != prev->dst.port[0] ||
1168		    cur->dst.port[1] != prev->dst.port[1] ||
1169		    cur->dst.port_op != prev->dst.port_op)
1170			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1171
1172		prev = cur;
1173		cur = TAILQ_NEXT(cur, entries);
1174	}
1175	for (i = 0; i < PF_SKIP_COUNT; ++i)
1176		PF_SET_SKIP_STEPS(i);
1177}
1178
1179int
1180pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1181{
1182	if (aw1->type != aw2->type)
1183		return (1);
1184	switch (aw1->type) {
1185	case PF_ADDR_ADDRMASK:
1186		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1187			return (1);
1188		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1189			return (1);
1190		return (0);
1191	case PF_ADDR_DYNIFTL:
1192		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1193	case PF_ADDR_NOROUTE:
1194		return (0);
1195	case PF_ADDR_TABLE:
1196		return (aw1->p.tbl != aw2->p.tbl);
1197	default:
1198		printf("invalid address type: %d\n", aw1->type);
1199		return (1);
1200	}
1201}
1202
1203void
1204pf_update_anchor_rules()
1205{
1206	struct pf_rule	*rule;
1207	int		 i;
1208
1209	for (i = 0; i < PF_RULESET_MAX; ++i)
1210		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
1211		    entries)
1212			if (rule->anchorname[0])
1213				rule->anchor = pf_find_anchor(rule->anchorname);
1214			else
1215				rule->anchor = NULL;
1216}
1217
1218u_int16_t
1219pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1220{
1221	u_int32_t	l;
1222
1223	if (udp && !cksum)
1224		return (0x0000);
1225	l = cksum + old - new;
1226	l = (l >> 16) + (l & 65535);
1227	l = l & 65535;
1228	if (udp && !l)
1229		return (0xFFFF);
1230	return (l);
1231}
1232
1233void
1234pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1235    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1236{
1237	struct pf_addr	ao;
1238	u_int16_t	po = *p;
1239
1240	PF_ACPY(&ao, a, af);
1241	PF_ACPY(a, an, af);
1242
1243	*p = pn;
1244
1245	switch (af) {
1246#ifdef INET
1247	case AF_INET:
1248		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1249		    ao.addr16[0], an->addr16[0], 0),
1250		    ao.addr16[1], an->addr16[1], 0);
1251		*p = pn;
1252		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1253		    ao.addr16[0], an->addr16[0], u),
1254		    ao.addr16[1], an->addr16[1], u),
1255		    po, pn, u);
1256		break;
1257#endif /* INET */
1258#ifdef INET6
1259	case AF_INET6:
1260		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1261		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1262		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1263		    ao.addr16[0], an->addr16[0], u),
1264		    ao.addr16[1], an->addr16[1], u),
1265		    ao.addr16[2], an->addr16[2], u),
1266		    ao.addr16[3], an->addr16[3], u),
1267		    ao.addr16[4], an->addr16[4], u),
1268		    ao.addr16[5], an->addr16[5], u),
1269		    ao.addr16[6], an->addr16[6], u),
1270		    ao.addr16[7], an->addr16[7], u),
1271		    po, pn, u);
1272		break;
1273#endif /* INET6 */
1274	}
1275}
1276
1277
1278/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1279void
1280pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1281{
1282	u_int32_t	ao;
1283
1284	memcpy(&ao, a, sizeof(ao));
1285	memcpy(a, &an, sizeof(u_int32_t));
1286	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1287	    ao % 65536, an % 65536, u);
1288}
1289
1290#ifdef INET6
1291void
1292pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1293{
1294	struct pf_addr	ao;
1295
1296	PF_ACPY(&ao, a, AF_INET6);
1297	PF_ACPY(a, an, AF_INET6);
1298
1299	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1300	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1301	    pf_cksum_fixup(pf_cksum_fixup(*c,
1302	    ao.addr16[0], an->addr16[0], u),
1303	    ao.addr16[1], an->addr16[1], u),
1304	    ao.addr16[2], an->addr16[2], u),
1305	    ao.addr16[3], an->addr16[3], u),
1306	    ao.addr16[4], an->addr16[4], u),
1307	    ao.addr16[5], an->addr16[5], u),
1308	    ao.addr16[6], an->addr16[6], u),
1309	    ao.addr16[7], an->addr16[7], u);
1310}
1311#endif /* INET6 */
1312
1313void
1314pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1315    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1316    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1317{
1318	struct pf_addr	oia, ooa;
1319
1320	PF_ACPY(&oia, ia, af);
1321	PF_ACPY(&ooa, oa, af);
1322
1323	/* Change inner protocol port, fix inner protocol checksum. */
1324	if (ip != NULL) {
1325		u_int16_t	oip = *ip;
1326		u_int32_t	opc = 0;	/* make the compiler happy */
1327
1328		if (pc != NULL)
1329			opc = *pc;
1330		*ip = np;
1331		if (pc != NULL)
1332			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1333		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1334		if (pc != NULL)
1335			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1336	}
1337	/* Change inner ip address, fix inner ip and icmp checksums. */
1338	PF_ACPY(ia, na, af);
1339	switch (af) {
1340#ifdef INET
1341	case AF_INET: {
1342		u_int32_t	 oh2c = *h2c;
1343
1344		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1345		    oia.addr16[0], ia->addr16[0], 0),
1346		    oia.addr16[1], ia->addr16[1], 0);
1347		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1348		    oia.addr16[0], ia->addr16[0], 0),
1349		    oia.addr16[1], ia->addr16[1], 0);
1350		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1351		break;
1352	}
1353#endif /* INET */
1354#ifdef INET6
1355	case AF_INET6:
1356		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1357		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1358		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1359		    oia.addr16[0], ia->addr16[0], u),
1360		    oia.addr16[1], ia->addr16[1], u),
1361		    oia.addr16[2], ia->addr16[2], u),
1362		    oia.addr16[3], ia->addr16[3], u),
1363		    oia.addr16[4], ia->addr16[4], u),
1364		    oia.addr16[5], ia->addr16[5], u),
1365		    oia.addr16[6], ia->addr16[6], u),
1366		    oia.addr16[7], ia->addr16[7], u);
1367		break;
1368#endif /* INET6 */
1369	}
1370	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1371	PF_ACPY(oa, na, af);
1372	switch (af) {
1373#ifdef INET
1374	case AF_INET:
1375		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1376		    ooa.addr16[0], oa->addr16[0], 0),
1377		    ooa.addr16[1], oa->addr16[1], 0);
1378		break;
1379#endif /* INET */
1380#ifdef INET6
1381	case AF_INET6:
1382		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1383		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1384		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1385		    ooa.addr16[0], oa->addr16[0], u),
1386		    ooa.addr16[1], oa->addr16[1], u),
1387		    ooa.addr16[2], oa->addr16[2], u),
1388		    ooa.addr16[3], oa->addr16[3], u),
1389		    ooa.addr16[4], oa->addr16[4], u),
1390		    ooa.addr16[5], oa->addr16[5], u),
1391		    ooa.addr16[6], oa->addr16[6], u),
1392		    ooa.addr16[7], oa->addr16[7], u);
1393		break;
1394#endif /* INET6 */
1395	}
1396}
1397
1398void
1399pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1400    const struct pf_addr *saddr, const struct pf_addr *daddr,
1401    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1402    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
1403{
1404	struct mbuf	*m;
1405#ifdef ALTQ
1406	struct m_tag	*mtag;
1407#endif
1408	int		 len = 0, tlen;		/* make the compiler happy */
1409#ifdef INET
1410	struct ip	*h = NULL;		/* make the compiler happy */
1411#endif /* INET */
1412#ifdef INET6
1413	struct ip6_hdr	*h6 = NULL;		/* make the compiler happy */
1414#endif /* INET6 */
1415	struct tcphdr	*th = NULL;		/* make the compiler happy */
1416#ifdef __FreeBSD__
1417	struct ip 	*ip;
1418#endif
1419	char *opt;
1420
1421	/* maximum segment size tcp option */
1422	tlen = sizeof(struct tcphdr);
1423	if (mss)
1424		tlen += 4;
1425
1426	switch (af) {
1427#ifdef INET
1428	case AF_INET:
1429		len = sizeof(struct ip) + tlen;
1430		break;
1431#endif /* INET */
1432#ifdef INET6
1433	case AF_INET6:
1434		len = sizeof(struct ip6_hdr) + tlen;
1435		break;
1436#endif /* INET6 */
1437	}
1438
1439	/* create outgoing mbuf */
1440#ifdef __FreeBSD__
1441	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1442	if (m == NULL)
1443		return;
1444	m->m_flags |= M_SKIP_FIREWALL;
1445#else
1446	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1447	if (mtag == NULL)
1448		return;
1449	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1450	if (m == NULL) {
1451		m_tag_free(mtag);
1452		return;
1453	}
1454	m_tag_prepend(m, mtag);
1455#endif
1456#ifdef ALTQ
1457	if (r != NULL && r->qid) {
1458		struct altq_tag *atag;
1459
1460		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1461		if (mtag != NULL) {
1462			atag = (struct altq_tag *)(mtag + 1);
1463			atag->qid = r->qid;
1464			/* add hints for ecn */
1465			atag->af = af;
1466			atag->hdr = mtod(m, struct ip *);
1467			m_tag_prepend(m, mtag);
1468		}
1469	}
1470#endif
1471	m->m_data += max_linkhdr;
1472	m->m_pkthdr.len = m->m_len = len;
1473	m->m_pkthdr.rcvif = NULL;
1474	bzero(m->m_data, len);
1475	switch (af) {
1476#ifdef INET
1477	case AF_INET:
1478		h = mtod(m, struct ip *);
1479
1480		/* IP header fields included in the TCP checksum */
1481		h->ip_p = IPPROTO_TCP;
1482		h->ip_len = htons(tlen);
1483		h->ip_src.s_addr = saddr->v4.s_addr;
1484		h->ip_dst.s_addr = daddr->v4.s_addr;
1485
1486		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1487		break;
1488#endif /* INET */
1489#ifdef INET6
1490	case AF_INET6:
1491		h6 = mtod(m, struct ip6_hdr *);
1492
1493		/* IP header fields included in the TCP checksum */
1494		h6->ip6_nxt = IPPROTO_TCP;
1495		h6->ip6_plen = htons(tlen);
1496		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1497		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1498
1499		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1500		break;
1501#endif /* INET6 */
1502	}
1503
1504	/* TCP header */
1505	th->th_sport = sport;
1506	th->th_dport = dport;
1507	th->th_seq = htonl(seq);
1508	th->th_ack = htonl(ack);
1509	th->th_off = tlen >> 2;
1510	th->th_flags = flags;
1511	th->th_win = htons(win);
1512
1513	if (mss) {
1514		opt = (char *)(th + 1);
1515		opt[0] = TCPOPT_MAXSEG;
1516		opt[1] = 4;
1517		HTONS(mss);
1518		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1519	}
1520
1521	switch (af) {
1522#ifdef INET
1523	case AF_INET:
1524		/* TCP checksum */
1525		th->th_sum = in_cksum(m, len);
1526
1527		/* Finish the IP header */
1528		h->ip_v = 4;
1529		h->ip_hl = sizeof(*h) >> 2;
1530		h->ip_tos = IPTOS_LOWDELAY;
1531#ifdef __FreeBSD__
1532		h->ip_off = path_mtu_discovery ? IP_DF : 0;
1533		h->ip_len = len;
1534#else
1535		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1536		h->ip_len = htons(len);
1537#endif
1538		h->ip_ttl = ttl ? ttl : ip_defttl;
1539		h->ip_sum = 0;
1540#ifdef __FreeBSD__
1541		ip = mtod(m, struct ip *);
1542		PF_UNLOCK();
1543		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1544			(void *)NULL);
1545		PF_LOCK();
1546#else /* ! __FreeBSD__ */
1547		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1548		    (void *)NULL);
1549#endif
1550		break;
1551#endif /* INET */
1552#ifdef INET6
1553	case AF_INET6:
1554		/* TCP checksum */
1555		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1556		    sizeof(struct ip6_hdr), tlen);
1557
1558		h6->ip6_vfc |= IPV6_VERSION;
1559		h6->ip6_hlim = IPV6_DEFHLIM;
1560
1561#ifdef __FreeBSD__
1562		PF_UNLOCK();
1563		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1564		PF_LOCK();
1565#else
1566		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1567#endif
1568		break;
1569#endif /* INET6 */
1570	}
1571}
1572
1573void
1574pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1575    struct pf_rule *r)
1576{
1577#ifdef ALTQ
1578	struct m_tag	*mtag;
1579#endif
1580	struct mbuf	*m0;
1581#ifdef __FreeBSD__
1582	struct ip *ip;
1583#endif
1584
1585#ifdef __FreeBSD__
1586	m0 = m_copypacket(m, M_DONTWAIT);
1587	if (m0 == NULL)
1588		return;
1589	m0->m_flags |= M_SKIP_FIREWALL;
1590#else
1591	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1592	if (mtag == NULL)
1593		return;
1594	m0 = m_copy(m, 0, M_COPYALL);
1595	if (m0 == NULL) {
1596		m_tag_free(mtag);
1597		return;
1598	}
1599	m_tag_prepend(m0, mtag);
1600#endif
1601
1602#ifdef ALTQ
1603	if (r->qid) {
1604		struct altq_tag *atag;
1605
1606		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1607		if (mtag != NULL) {
1608			atag = (struct altq_tag *)(mtag + 1);
1609			atag->qid = r->qid;
1610			/* add hints for ecn */
1611			atag->af = af;
1612			atag->hdr = mtod(m0, struct ip *);
1613			m_tag_prepend(m0, mtag);
1614		}
1615	}
1616#endif
1617
1618	switch (af) {
1619#ifdef INET
1620	case AF_INET:
1621#ifdef __FreeBSD__
1622		/* icmp_error() expects host byte ordering */
1623		ip = mtod(m0, struct ip *);
1624		NTOHS(ip->ip_len);
1625		NTOHS(ip->ip_off);
1626		PF_UNLOCK();
1627#endif
1628		icmp_error(m0, type, code, 0, (void *)NULL);
1629#ifdef __FreeBSD__
1630		PF_LOCK();
1631#endif
1632		break;
1633#endif /* INET */
1634#ifdef INET6
1635	case AF_INET6:
1636#ifdef __FreeBSD__
1637		PF_UNLOCK();
1638#endif
1639		icmp6_error(m0, type, code, 0);
1640#ifdef __FreeBSD__
1641		PF_LOCK();
1642#endif
1643		break;
1644#endif /* INET6 */
1645	}
1646}
1647
1648/*
1649 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1650 * If n is 0, they match if they are equal. If n is != 0, they match if they
1651 * are different.
1652 */
1653int
1654pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1655    struct pf_addr *b, sa_family_t af)
1656{
1657	int	match = 0;
1658
1659	switch (af) {
1660#ifdef INET
1661	case AF_INET:
1662		if ((a->addr32[0] & m->addr32[0]) ==
1663		    (b->addr32[0] & m->addr32[0]))
1664			match++;
1665		break;
1666#endif /* INET */
1667#ifdef INET6
1668	case AF_INET6:
1669		if (((a->addr32[0] & m->addr32[0]) ==
1670		     (b->addr32[0] & m->addr32[0])) &&
1671		    ((a->addr32[1] & m->addr32[1]) ==
1672		     (b->addr32[1] & m->addr32[1])) &&
1673		    ((a->addr32[2] & m->addr32[2]) ==
1674		     (b->addr32[2] & m->addr32[2])) &&
1675		    ((a->addr32[3] & m->addr32[3]) ==
1676		     (b->addr32[3] & m->addr32[3])))
1677			match++;
1678		break;
1679#endif /* INET6 */
1680	}
1681	if (match) {
1682		if (n)
1683			return (0);
1684		else
1685			return (1);
1686	} else {
1687		if (n)
1688			return (1);
1689		else
1690			return (0);
1691	}
1692}
1693
1694int
1695pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1696{
1697	switch (op) {
1698	case PF_OP_IRG:
1699		return ((p > a1) && (p < a2));
1700	case PF_OP_XRG:
1701		return ((p < a1) || (p > a2));
1702	case PF_OP_RRG:
1703		return ((p >= a1) && (p <= a2));
1704	case PF_OP_EQ:
1705		return (p == a1);
1706	case PF_OP_NE:
1707		return (p != a1);
1708	case PF_OP_LT:
1709		return (p < a1);
1710	case PF_OP_LE:
1711		return (p <= a1);
1712	case PF_OP_GT:
1713		return (p > a1);
1714	case PF_OP_GE:
1715		return (p >= a1);
1716	}
1717	return (0); /* never reached */
1718}
1719
1720int
1721pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1722{
1723	NTOHS(a1);
1724	NTOHS(a2);
1725	NTOHS(p);
1726	return (pf_match(op, a1, a2, p));
1727}
1728
1729int
1730pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1731{
1732	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1733		return (0);
1734	return (pf_match(op, a1, a2, u));
1735}
1736
1737int
1738pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1739{
1740	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1741		return (0);
1742	return (pf_match(op, a1, a2, g));
1743}
1744
1745struct pf_tag *
1746pf_get_tag(struct mbuf *m)
1747{
1748	struct m_tag	*mtag;
1749
1750	if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL)
1751		return ((struct pf_tag *)(mtag + 1));
1752	else
1753		return (NULL);
1754}
1755
1756int
1757pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule,
1758    struct pf_tag *pftag, int *tag)
1759{
1760	if (*tag == -1) {	/* find mbuf tag */
1761		pftag = pf_get_tag(m);
1762		if (pftag != NULL)
1763			*tag = pftag->tag;
1764		else
1765			*tag = 0;
1766		if (nat_rule != NULL && nat_rule->tag)
1767			*tag = nat_rule->tag;
1768	}
1769
1770	return ((!r->match_tag_not && r->match_tag == *tag) ||
1771	    (r->match_tag_not && r->match_tag != *tag));
1772}
1773
1774int
1775pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag)
1776{
1777	struct m_tag	*mtag;
1778
1779	if (tag <= 0)
1780		return (0);
1781
1782	if (pftag == NULL) {
1783		mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT);
1784		if (mtag == NULL)
1785			return (1);
1786		((struct pf_tag *)(mtag + 1))->tag = tag;
1787		m_tag_prepend(m, mtag);
1788	} else
1789		pftag->tag = tag;
1790
1791	return (0);
1792}
1793
1794#define PF_STEP_INTO_ANCHOR(r, a, s, n)					\
1795	do {								\
1796		if ((r) == NULL || (r)->anchor == NULL ||		\
1797		    (s) != NULL || (a) != NULL)				\
1798			panic("PF_STEP_INTO_ANCHOR");			\
1799		(a) = (r);						\
1800		(s) = TAILQ_FIRST(&(r)->anchor->rulesets);		\
1801		(r) = NULL;						\
1802		while ((s) != NULL && ((r) =				\
1803		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1804			(s) = TAILQ_NEXT((s), entries);			\
1805		if ((r) == NULL) {					\
1806			(r) = TAILQ_NEXT((a), entries);			\
1807			(a) = NULL;					\
1808		}							\
1809	} while (0)
1810
1811#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n)				\
1812	do {								\
1813		if ((r) != NULL || (a) == NULL || (s) == NULL)		\
1814			panic("PF_STEP_OUT_OF_ANCHOR");			\
1815		(s) = TAILQ_NEXT((s), entries);				\
1816		while ((s) != NULL && ((r) =				\
1817		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1818			(s) = TAILQ_NEXT((s), entries);			\
1819		if ((r) == NULL) {					\
1820			(r) = TAILQ_NEXT((a), entries);			\
1821			(a) = NULL;					\
1822		}							\
1823	} while (0)
1824
1825#ifdef INET6
1826void
1827pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
1828    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
1829{
1830	switch (af) {
1831#ifdef INET
1832	case AF_INET:
1833		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1834		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1835		break;
1836#endif /* INET */
1837	case AF_INET6:
1838		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1839		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1840		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
1841		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
1842		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
1843		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
1844		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
1845		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
1846		break;
1847	}
1848}
1849
1850void
1851pf_addr_inc(struct pf_addr *addr, sa_family_t af)
1852{
1853	switch (af) {
1854#ifdef INET
1855	case AF_INET:
1856		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
1857		break;
1858#endif /* INET */
1859	case AF_INET6:
1860		if (addr->addr32[3] == 0xffffffff) {
1861			addr->addr32[3] = 0;
1862			if (addr->addr32[2] == 0xffffffff) {
1863				addr->addr32[2] = 0;
1864				if (addr->addr32[1] == 0xffffffff) {
1865					addr->addr32[1] = 0;
1866					addr->addr32[0] =
1867					    htonl(ntohl(addr->addr32[0]) + 1);
1868				} else
1869					addr->addr32[1] =
1870					    htonl(ntohl(addr->addr32[1]) + 1);
1871			} else
1872				addr->addr32[2] =
1873				    htonl(ntohl(addr->addr32[2]) + 1);
1874		} else
1875			addr->addr32[3] =
1876			    htonl(ntohl(addr->addr32[3]) + 1);
1877		break;
1878	}
1879}
1880#endif /* INET6 */
1881
1882#define mix(a,b,c) \
1883	do {					\
1884		a -= b; a -= c; a ^= (c >> 13);	\
1885		b -= c; b -= a; b ^= (a << 8);	\
1886		c -= a; c -= b; c ^= (b >> 13);	\
1887		a -= b; a -= c; a ^= (c >> 12);	\
1888		b -= c; b -= a; b ^= (a << 16);	\
1889		c -= a; c -= b; c ^= (b >> 5);	\
1890		a -= b; a -= c; a ^= (c >> 3);	\
1891		b -= c; b -= a; b ^= (a << 10);	\
1892		c -= a; c -= b; c ^= (b >> 15);	\
1893	} while (0)
1894
1895/*
1896 * hash function based on bridge_hash in if_bridge.c
1897 */
1898void
1899pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
1900    struct pf_poolhashkey *key, sa_family_t af)
1901{
1902	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
1903
1904	switch (af) {
1905#ifdef INET
1906	case AF_INET:
1907		a += inaddr->addr32[0];
1908		b += key->key32[1];
1909		mix(a, b, c);
1910		hash->addr32[0] = c + key->key32[2];
1911		break;
1912#endif /* INET */
1913#ifdef INET6
1914	case AF_INET6:
1915		a += inaddr->addr32[0];
1916		b += inaddr->addr32[2];
1917		mix(a, b, c);
1918		hash->addr32[0] = c;
1919		a += inaddr->addr32[1];
1920		b += inaddr->addr32[3];
1921		c += key->key32[1];
1922		mix(a, b, c);
1923		hash->addr32[1] = c;
1924		a += inaddr->addr32[2];
1925		b += inaddr->addr32[1];
1926		c += key->key32[2];
1927		mix(a, b, c);
1928		hash->addr32[2] = c;
1929		a += inaddr->addr32[3];
1930		b += inaddr->addr32[0];
1931		c += key->key32[3];
1932		mix(a, b, c);
1933		hash->addr32[3] = c;
1934		break;
1935#endif /* INET6 */
1936	}
1937}
1938
1939int
1940pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
1941    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
1942{
1943	unsigned char		 hash[16];
1944	struct pf_pool		*rpool = &r->rpool;
1945	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
1946	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
1947	struct pf_pooladdr	*acur = rpool->cur;
1948	struct pf_src_node	 k;
1949
1950	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
1951	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1952		k.af = af;
1953		PF_ACPY(&k.addr, saddr, af);
1954		if (r->rule_flag & PFRULE_RULESRCTRACK ||
1955		    r->rpool.opts & PF_POOL_STICKYADDR)
1956			k.rule.ptr = r;
1957		else
1958			k.rule.ptr = NULL;
1959		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1960		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1961		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
1962			PF_ACPY(naddr, &(*sn)->raddr, af);
1963			if (pf_status.debug >= PF_DEBUG_MISC) {
1964				printf("pf_map_addr: src tracking maps ");
1965				pf_print_host(&k.addr, 0, af);
1966				printf(" to ");
1967				pf_print_host(naddr, 0, af);
1968				printf("\n");
1969			}
1970			return (0);
1971		}
1972	}
1973
1974	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
1975		return (1);
1976	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
1977		if (af == AF_INET) {
1978			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
1979			    (rpool->opts & PF_POOL_TYPEMASK) !=
1980			    PF_POOL_ROUNDROBIN)
1981				return (1);
1982			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
1983			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
1984		} else {
1985			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
1986			    (rpool->opts & PF_POOL_TYPEMASK) !=
1987			    PF_POOL_ROUNDROBIN)
1988				return (1);
1989			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
1990			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
1991		}
1992	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1993		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
1994			return (1); /* unsupported */
1995	} else {
1996		raddr = &rpool->cur->addr.v.a.addr;
1997		rmask = &rpool->cur->addr.v.a.mask;
1998	}
1999
2000	switch (rpool->opts & PF_POOL_TYPEMASK) {
2001	case PF_POOL_NONE:
2002		PF_ACPY(naddr, raddr, af);
2003		break;
2004	case PF_POOL_BITMASK:
2005		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2006		break;
2007	case PF_POOL_RANDOM:
2008		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2009			switch (af) {
2010#ifdef INET
2011			case AF_INET:
2012				rpool->counter.addr32[0] = arc4random();
2013				break;
2014#endif /* INET */
2015#ifdef INET6
2016			case AF_INET6:
2017				if (rmask->addr32[3] != 0xffffffff)
2018					rpool->counter.addr32[3] = arc4random();
2019				else
2020					break;
2021				if (rmask->addr32[2] != 0xffffffff)
2022					rpool->counter.addr32[2] = arc4random();
2023				else
2024					break;
2025				if (rmask->addr32[1] != 0xffffffff)
2026					rpool->counter.addr32[1] = arc4random();
2027				else
2028					break;
2029				if (rmask->addr32[0] != 0xffffffff)
2030					rpool->counter.addr32[0] = arc4random();
2031				break;
2032#endif /* INET6 */
2033			}
2034			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2035			PF_ACPY(init_addr, naddr, af);
2036
2037		} else {
2038			PF_AINC(&rpool->counter, af);
2039			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2040		}
2041		break;
2042	case PF_POOL_SRCHASH:
2043		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2044		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2045		break;
2046	case PF_POOL_ROUNDROBIN:
2047		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2048			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2049			    &rpool->tblidx, &rpool->counter,
2050			    &raddr, &rmask, af))
2051				goto get_addr;
2052		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2053			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2054			    &rpool->tblidx, &rpool->counter,
2055			    &raddr, &rmask, af))
2056				goto get_addr;
2057		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2058			goto get_addr;
2059
2060	try_next:
2061		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2062			rpool->cur = TAILQ_FIRST(&rpool->list);
2063		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2064			rpool->tblidx = -1;
2065			if (pfr_pool_get(rpool->cur->addr.p.tbl,
2066			    &rpool->tblidx, &rpool->counter,
2067			    &raddr, &rmask, af)) {
2068				/* table contains no address of type 'af' */
2069				if (rpool->cur != acur)
2070					goto try_next;
2071				return (1);
2072			}
2073		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2074			rpool->tblidx = -1;
2075			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2076			    &rpool->tblidx, &rpool->counter,
2077			    &raddr, &rmask, af)) {
2078				/* table contains no address of type 'af' */
2079				if (rpool->cur != acur)
2080					goto try_next;
2081				return (1);
2082			}
2083		} else {
2084			raddr = &rpool->cur->addr.v.a.addr;
2085			rmask = &rpool->cur->addr.v.a.mask;
2086			PF_ACPY(&rpool->counter, raddr, af);
2087		}
2088
2089	get_addr:
2090		PF_ACPY(naddr, &rpool->counter, af);
2091		if (init_addr != NULL && PF_AZERO(init_addr, af))
2092			PF_ACPY(init_addr, naddr, af);
2093		PF_AINC(&rpool->counter, af);
2094		break;
2095	}
2096	if (*sn != NULL)
2097		PF_ACPY(&(*sn)->raddr, naddr, af);
2098
2099	if (pf_status.debug >= PF_DEBUG_MISC &&
2100	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2101		printf("pf_map_addr: selected address ");
2102		pf_print_host(naddr, 0, af);
2103		printf("\n");
2104	}
2105
2106	return (0);
2107}
2108
2109int
2110pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2111    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2112    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2113    struct pf_src_node **sn)
2114{
2115	struct pf_state		key;
2116	struct pf_addr		init_addr;
2117	u_int16_t		cut;
2118
2119	bzero(&init_addr, sizeof(init_addr));
2120	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2121		return (1);
2122
2123	do {
2124		key.af = af;
2125		key.proto = proto;
2126		PF_ACPY(&key.ext.addr, daddr, key.af);
2127		PF_ACPY(&key.gwy.addr, naddr, key.af);
2128		key.ext.port = dport;
2129
2130		/*
2131		 * port search; start random, step;
2132		 * similar 2 portloop in in_pcbbind
2133		 */
2134		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
2135			key.gwy.port = dport;
2136			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2137				return (0);
2138		} else if (low == 0 && high == 0) {
2139			key.gwy.port = *nport;
2140			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2141				return (0);
2142		} else if (low == high) {
2143			key.gwy.port = htons(low);
2144			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2145				*nport = htons(low);
2146				return (0);
2147			}
2148		} else {
2149			u_int16_t tmp;
2150
2151			if (low > high) {
2152				tmp = low;
2153				low = high;
2154				high = tmp;
2155			}
2156			/* low < high */
2157			cut = arc4random() % (1 + high - low) + low;
2158			/* low <= cut <= high */
2159			for (tmp = cut; tmp <= high; ++(tmp)) {
2160				key.gwy.port = htons(tmp);
2161				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2162				    NULL) {
2163					*nport = htons(tmp);
2164					return (0);
2165				}
2166			}
2167			for (tmp = cut - 1; tmp >= low; --(tmp)) {
2168				key.gwy.port = htons(tmp);
2169				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2170				    NULL) {
2171					*nport = htons(tmp);
2172					return (0);
2173				}
2174			}
2175		}
2176
2177		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2178		case PF_POOL_RANDOM:
2179		case PF_POOL_ROUNDROBIN:
2180			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2181				return (1);
2182			break;
2183		case PF_POOL_NONE:
2184		case PF_POOL_SRCHASH:
2185		case PF_POOL_BITMASK:
2186		default:
2187			return (1);
2188		}
2189	} while (! PF_AEQ(&init_addr, naddr, af) );
2190
2191	return (1);					/* none available */
2192}
2193
2194struct pf_rule *
2195pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2196    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2197    struct pf_addr *daddr, u_int16_t dport, int rs_num)
2198{
2199	struct pf_rule		*r, *rm = NULL, *anchorrule = NULL;
2200	struct pf_ruleset	*ruleset = NULL;
2201
2202	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2203	while (r && rm == NULL) {
2204		struct pf_rule_addr	*src = NULL, *dst = NULL;
2205		struct pf_addr_wrap	*xdst = NULL;
2206
2207		if (r->action == PF_BINAT && direction == PF_IN) {
2208			src = &r->dst;
2209			if (r->rpool.cur != NULL)
2210				xdst = &r->rpool.cur->addr;
2211		} else {
2212			src = &r->src;
2213			dst = &r->dst;
2214		}
2215
2216		r->evaluations++;
2217		if (r->kif != NULL &&
2218		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2219			r = r->skip[PF_SKIP_IFP].ptr;
2220		else if (r->direction && r->direction != direction)
2221			r = r->skip[PF_SKIP_DIR].ptr;
2222		else if (r->af && r->af != pd->af)
2223			r = r->skip[PF_SKIP_AF].ptr;
2224		else if (r->proto && r->proto != pd->proto)
2225			r = r->skip[PF_SKIP_PROTO].ptr;
2226		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
2227			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2228			    PF_SKIP_DST_ADDR].ptr;
2229		else if (src->port_op && !pf_match_port(src->port_op,
2230		    src->port[0], src->port[1], sport))
2231			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2232			    PF_SKIP_DST_PORT].ptr;
2233		else if (dst != NULL &&
2234		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
2235			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2236		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2237			r = TAILQ_NEXT(r, entries);
2238		else if (dst != NULL && dst->port_op &&
2239		    !pf_match_port(dst->port_op, dst->port[0],
2240		    dst->port[1], dport))
2241			r = r->skip[PF_SKIP_DST_PORT].ptr;
2242		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2243		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2244		    off, pd->hdr.tcp), r->os_fingerprint)))
2245			r = TAILQ_NEXT(r, entries);
2246		else if (r->anchorname[0] && r->anchor == NULL)
2247			r = TAILQ_NEXT(r, entries);
2248		else if (r->anchor == NULL)
2249				rm = r;
2250		else
2251			PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
2252		if (r == NULL && anchorrule != NULL)
2253			PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
2254			    rs_num);
2255	}
2256	if (rm != NULL && (rm->action == PF_NONAT ||
2257	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2258		return (NULL);
2259	return (rm);
2260}
2261
2262struct pf_rule *
2263pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2264    struct pfi_kif *kif, struct pf_src_node **sn,
2265    struct pf_addr *saddr, u_int16_t sport,
2266    struct pf_addr *daddr, u_int16_t dport,
2267    struct pf_addr *naddr, u_int16_t *nport)
2268{
2269	struct pf_rule	*r = NULL;
2270
2271	if (direction == PF_OUT) {
2272		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2273		    sport, daddr, dport, PF_RULESET_BINAT);
2274		if (r == NULL)
2275			r = pf_match_translation(pd, m, off, direction, kif,
2276			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2277	} else {
2278		r = pf_match_translation(pd, m, off, direction, kif, saddr,
2279		    sport, daddr, dport, PF_RULESET_RDR);
2280		if (r == NULL)
2281			r = pf_match_translation(pd, m, off, direction, kif,
2282			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2283	}
2284
2285	if (r != NULL) {
2286		switch (r->action) {
2287		case PF_NONAT:
2288		case PF_NOBINAT:
2289		case PF_NORDR:
2290			return (NULL);
2291		case PF_NAT:
2292			if (pf_get_sport(pd->af, pd->proto, r, saddr,
2293			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2294			    r->rpool.proxy_port[1], sn)) {
2295				DPFPRINTF(PF_DEBUG_MISC,
2296				    ("pf: NAT proxy port allocation "
2297				    "(%u-%u) failed\n",
2298				    r->rpool.proxy_port[0],
2299				    r->rpool.proxy_port[1]));
2300				return (NULL);
2301			}
2302			break;
2303		case PF_BINAT:
2304			switch (direction) {
2305			case PF_OUT:
2306				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2307					if (pd->af == AF_INET) {
2308						if (r->rpool.cur->addr.p.dyn->
2309						    pfid_acnt4 < 1)
2310							return (NULL);
2311						PF_POOLMASK(naddr,
2312						    &r->rpool.cur->addr.p.dyn->
2313						    pfid_addr4,
2314						    &r->rpool.cur->addr.p.dyn->
2315						    pfid_mask4,
2316						    saddr, AF_INET);
2317					} else {
2318						if (r->rpool.cur->addr.p.dyn->
2319						    pfid_acnt6 < 1)
2320							return (NULL);
2321						PF_POOLMASK(naddr,
2322						    &r->rpool.cur->addr.p.dyn->
2323						    pfid_addr6,
2324						    &r->rpool.cur->addr.p.dyn->
2325						    pfid_mask6,
2326						    saddr, AF_INET6);
2327					}
2328				} else
2329					PF_POOLMASK(naddr,
2330					    &r->rpool.cur->addr.v.a.addr,
2331					    &r->rpool.cur->addr.v.a.mask,
2332					    saddr, pd->af);
2333				break;
2334			case PF_IN:
2335				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2336					if (pd->af == AF_INET) {
2337						if (r->src.addr.p.dyn->
2338						    pfid_acnt4 < 1)
2339							return (NULL);
2340						PF_POOLMASK(naddr,
2341						    &r->src.addr.p.dyn->
2342						    pfid_addr4,
2343						    &r->src.addr.p.dyn->
2344						    pfid_mask4,
2345						    daddr, AF_INET);
2346					} else {
2347						if (r->src.addr.p.dyn->
2348						    pfid_acnt6 < 1)
2349							return (NULL);
2350						PF_POOLMASK(naddr,
2351						    &r->src.addr.p.dyn->
2352						    pfid_addr6,
2353						    &r->src.addr.p.dyn->
2354						    pfid_mask6,
2355						    daddr, AF_INET6);
2356					}
2357				} else
2358					PF_POOLMASK(naddr,
2359					    &r->src.addr.v.a.addr,
2360					    &r->src.addr.v.a.mask, daddr,
2361					    pd->af);
2362				break;
2363			}
2364			break;
2365		case PF_RDR: {
2366			if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
2367				return (NULL);
2368
2369			if (r->rpool.proxy_port[1]) {
2370				u_int32_t	tmp_nport;
2371
2372				tmp_nport = ((ntohs(dport) -
2373				    ntohs(r->dst.port[0])) %
2374				    (r->rpool.proxy_port[1] -
2375				    r->rpool.proxy_port[0] + 1)) +
2376				    r->rpool.proxy_port[0];
2377
2378				/* wrap around if necessary */
2379				if (tmp_nport > 65535)
2380					tmp_nport -= 65535;
2381				*nport = htons((u_int16_t)tmp_nport);
2382			} else if (r->rpool.proxy_port[0])
2383				*nport = htons(r->rpool.proxy_port[0]);
2384			break;
2385		}
2386		default:
2387			return (NULL);
2388		}
2389	}
2390
2391	return (r);
2392}
2393
2394int
2395#ifdef __FreeBSD__
2396pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd,
2397    struct inpcb *inp_arg)
2398#else
2399pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd)
2400#endif
2401{
2402	struct pf_addr		*saddr, *daddr;
2403	u_int16_t		 sport, dport;
2404#ifdef __FreeBSD__
2405	struct inpcbinfo	*pi;
2406#else
2407	struct inpcbtable	*tb;
2408#endif
2409	struct inpcb		*inp;
2410
2411	*uid = UID_MAX;
2412	*gid = GID_MAX;
2413#ifdef __FreeBSD__
2414	if (inp_arg != NULL) {
2415		INP_LOCK_ASSERT(inp_arg);
2416		if (inp_arg->inp_socket) {
2417			*uid = inp_arg->inp_socket->so_cred->cr_uid;
2418			*gid = inp_arg->inp_socket->so_cred->cr_groups[0];
2419			return (1);
2420		} else
2421			return (0);
2422	}
2423#endif
2424	switch (pd->proto) {
2425	case IPPROTO_TCP:
2426		sport = pd->hdr.tcp->th_sport;
2427		dport = pd->hdr.tcp->th_dport;
2428#ifdef __FreeBSD__
2429		pi = &tcbinfo;
2430#else
2431		tb = &tcbtable;
2432#endif
2433		break;
2434	case IPPROTO_UDP:
2435		sport = pd->hdr.udp->uh_sport;
2436		dport = pd->hdr.udp->uh_dport;
2437#ifdef __FreeBSD__
2438		pi = &udbinfo;
2439#else
2440		tb = &udbtable;
2441#endif
2442		break;
2443	default:
2444		return (0);
2445	}
2446	if (direction == PF_IN) {
2447		saddr = pd->src;
2448		daddr = pd->dst;
2449	} else {
2450		u_int16_t	p;
2451
2452		p = sport;
2453		sport = dport;
2454		dport = p;
2455		saddr = pd->dst;
2456		daddr = pd->src;
2457	}
2458	switch (pd->af) {
2459	case AF_INET:
2460#ifdef __FreeBSD__
2461		INP_INFO_RLOCK(pi);	/* XXX LOR */
2462		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2463			dport, 0, NULL);
2464		if (inp == NULL) {
2465			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2466			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2467			if(inp == NULL) {
2468				INP_INFO_RUNLOCK(pi);
2469				return (0);
2470			}
2471		}
2472#else
2473		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2474		if (inp == NULL) {
2475			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
2476			if (inp == NULL)
2477				return (0);
2478		}
2479#endif
2480		break;
2481#ifdef INET6
2482	case AF_INET6:
2483#ifdef __FreeBSD__
2484		INP_INFO_RLOCK(pi);
2485		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2486			&daddr->v6, dport, 0, NULL);
2487		if (inp == NULL) {
2488			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2489			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2490			if (inp == NULL) {
2491				INP_INFO_RUNLOCK(pi);
2492				return (0);
2493			}
2494		}
2495#else
2496		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2497		    dport);
2498		if (inp == NULL) {
2499			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
2500			if (inp == NULL)
2501				return (0);
2502		}
2503#endif
2504		break;
2505#endif /* INET6 */
2506
2507	default:
2508		return (0);
2509	}
2510#ifdef __FreeBSD__
2511	INP_LOCK(inp);
2512	if ((inp->inp_socket == NULL) || (inp->inp_socket->so_cred == NULL)) {
2513		INP_UNLOCK(inp);
2514		INP_INFO_RUNLOCK(pi);
2515		return (0);
2516	}
2517	*uid = inp->inp_socket->so_cred->cr_uid;
2518	*gid = inp->inp_socket->so_cred->cr_groups[0];
2519	INP_UNLOCK(inp);
2520	INP_INFO_RUNLOCK(pi);
2521#else
2522	*uid = inp->inp_socket->so_euid;
2523	*gid = inp->inp_socket->so_egid;
2524#endif
2525	return (1);
2526}
2527
2528u_int8_t
2529pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2530{
2531	int		 hlen;
2532	u_int8_t	 hdr[60];
2533	u_int8_t	*opt, optlen;
2534	u_int8_t	 wscale = 0;
2535
2536	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2537	if (hlen <= sizeof(struct tcphdr))
2538		return (0);
2539	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2540		return (0);
2541	opt = hdr + sizeof(struct tcphdr);
2542	hlen -= sizeof(struct tcphdr);
2543	while (hlen >= 3) {
2544		switch (*opt) {
2545		case TCPOPT_EOL:
2546		case TCPOPT_NOP:
2547			++opt;
2548			--hlen;
2549			break;
2550		case TCPOPT_WINDOW:
2551			wscale = opt[2];
2552			if (wscale > TCP_MAX_WINSHIFT)
2553				wscale = TCP_MAX_WINSHIFT;
2554			wscale |= PF_WSCALE_FLAG;
2555			/* FALLTHROUGH */
2556		default:
2557			optlen = opt[1];
2558			if (optlen < 2)
2559				optlen = 2;
2560			hlen -= optlen;
2561			opt += optlen;
2562			break;
2563		}
2564	}
2565	return (wscale);
2566}
2567
2568u_int16_t
2569pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2570{
2571	int		 hlen;
2572	u_int8_t	 hdr[60];
2573	u_int8_t	*opt, optlen;
2574	u_int16_t	 mss = tcp_mssdflt;
2575
2576	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2577	if (hlen <= sizeof(struct tcphdr))
2578		return (0);
2579	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2580		return (0);
2581	opt = hdr + sizeof(struct tcphdr);
2582	hlen -= sizeof(struct tcphdr);
2583	while (hlen >= TCPOLEN_MAXSEG) {
2584		switch (*opt) {
2585		case TCPOPT_EOL:
2586		case TCPOPT_NOP:
2587			++opt;
2588			--hlen;
2589			break;
2590		case TCPOPT_MAXSEG:
2591			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2592			NTOHS(mss);
2593			/* FALLTHROUGH */
2594		default:
2595			optlen = opt[1];
2596			if (optlen < 2)
2597				optlen = 2;
2598			hlen -= optlen;
2599			opt += optlen;
2600			break;
2601		}
2602	}
2603	return (mss);
2604}
2605
2606u_int16_t
2607pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2608{
2609#ifdef INET
2610	struct sockaddr_in	*dst;
2611	struct route		 ro;
2612#endif /* INET */
2613#ifdef INET6
2614	struct sockaddr_in6	*dst6;
2615	struct route_in6	 ro6;
2616#endif /* INET6 */
2617	struct rtentry		*rt = NULL;
2618	int			 hlen = 0;	/* make the compiler happy */
2619	u_int16_t		 mss = tcp_mssdflt;
2620
2621	switch (af) {
2622#ifdef INET
2623	case AF_INET:
2624		hlen = sizeof(struct ip);
2625		bzero(&ro, sizeof(ro));
2626		dst = (struct sockaddr_in *)&ro.ro_dst;
2627		dst->sin_family = AF_INET;
2628		dst->sin_len = sizeof(*dst);
2629		dst->sin_addr = addr->v4;
2630#ifdef __FreeBSD__
2631#ifdef RTF_PRCLONING
2632		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2633#else /* !RTF_PRCLONING */
2634		rtalloc_ign(&ro, RTF_CLONING);
2635#endif
2636#else /* ! __FreeBSD__ */
2637		rtalloc_noclone(&ro, NO_CLONING);
2638#endif
2639		rt = ro.ro_rt;
2640		break;
2641#endif /* INET */
2642#ifdef INET6
2643	case AF_INET6:
2644		hlen = sizeof(struct ip6_hdr);
2645		bzero(&ro6, sizeof(ro6));
2646		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2647		dst6->sin6_family = AF_INET6;
2648		dst6->sin6_len = sizeof(*dst6);
2649		dst6->sin6_addr = addr->v6;
2650#ifdef __FreeBSD__
2651#ifdef RTF_PRCLONING
2652		rtalloc_ign((struct route *)&ro6,
2653		    (RTF_CLONING | RTF_PRCLONING));
2654#else /* !RTF_PRCLONING */
2655		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
2656#endif
2657#else /* ! __FreeBSD__ */
2658		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
2659#endif
2660		rt = ro6.ro_rt;
2661		break;
2662#endif /* INET6 */
2663	}
2664
2665	if (rt && rt->rt_ifp) {
2666		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2667		mss = max(tcp_mssdflt, mss);
2668		RTFREE(rt);
2669	}
2670	mss = min(mss, offer);
2671	mss = max(mss, 64);		/* sanity - at least max opt space */
2672	return (mss);
2673}
2674
2675void
2676pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2677{
2678	struct pf_rule *r = s->rule.ptr;
2679
2680	s->rt_kif = NULL;
2681	if (!r->rt || r->rt == PF_FASTROUTE)
2682		return;
2683	switch (s->af) {
2684#ifdef INET
2685	case AF_INET:
2686		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2687		    &s->nat_src_node);
2688		s->rt_kif = r->rpool.cur->kif;
2689		break;
2690#endif /* INET */
2691#ifdef INET6
2692	case AF_INET6:
2693		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2694		    &s->nat_src_node);
2695		s->rt_kif = r->rpool.cur->kif;
2696		break;
2697#endif /* INET6 */
2698	}
2699}
2700
2701int
2702pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2703    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2704#ifdef __FreeBSD__
2705    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
2706    struct inpcb *inp)
2707#else
2708    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2709#endif
2710{
2711	struct pf_rule		*nr = NULL;
2712	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2713	struct tcphdr		*th = pd->hdr.tcp;
2714	u_int16_t		 bport, nport = 0;
2715	sa_family_t		 af = pd->af;
2716	int			 lookup = -1;
2717	uid_t			 uid;
2718	gid_t			 gid;
2719	struct pf_rule		*r, *a = NULL;
2720	struct pf_ruleset	*ruleset = NULL;
2721	struct pf_src_node	*nsn = NULL;
2722	u_short			 reason;
2723	int			 rewrite = 0;
2724	struct pf_tag		*pftag = NULL;
2725	int			 tag = -1;
2726	u_int16_t		 mss = tcp_mssdflt;
2727
2728	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2729
2730	if (direction == PF_OUT) {
2731		bport = nport = th->th_sport;
2732		/* check outgoing packet for BINAT/NAT */
2733		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2734		    saddr, th->th_sport, daddr, th->th_dport,
2735		    &pd->naddr, &nport)) != NULL) {
2736			PF_ACPY(&pd->baddr, saddr, af);
2737			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2738			    &th->th_sum, &pd->naddr, nport, 0, af);
2739			rewrite++;
2740			if (nr->natpass)
2741				r = NULL;
2742			pd->nat_rule = nr;
2743		}
2744	} else {
2745		bport = nport = th->th_dport;
2746		/* check incoming packet for BINAT/RDR */
2747		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
2748		    saddr, th->th_sport, daddr, th->th_dport,
2749		    &pd->naddr, &nport)) != NULL) {
2750			PF_ACPY(&pd->baddr, daddr, af);
2751			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2752			    &th->th_sum, &pd->naddr, nport, 0, af);
2753			rewrite++;
2754			if (nr->natpass)
2755				r = NULL;
2756			pd->nat_rule = nr;
2757		}
2758	}
2759
2760	while (r != NULL) {
2761		r->evaluations++;
2762		if (r->kif != NULL &&
2763		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2764			r = r->skip[PF_SKIP_IFP].ptr;
2765		else if (r->direction && r->direction != direction)
2766			r = r->skip[PF_SKIP_DIR].ptr;
2767		else if (r->af && r->af != af)
2768			r = r->skip[PF_SKIP_AF].ptr;
2769		else if (r->proto && r->proto != IPPROTO_TCP)
2770			r = r->skip[PF_SKIP_PROTO].ptr;
2771		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2772			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2773		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2774		    r->src.port[0], r->src.port[1], th->th_sport))
2775			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2776		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2777			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2778		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2779		    r->dst.port[0], r->dst.port[1], th->th_dport))
2780			r = r->skip[PF_SKIP_DST_PORT].ptr;
2781		else if (r->tos && !(r->tos & pd->tos))
2782			r = TAILQ_NEXT(r, entries);
2783		else if (r->rule_flag & PFRULE_FRAGMENT)
2784			r = TAILQ_NEXT(r, entries);
2785		else if ((r->flagset & th->th_flags) != r->flags)
2786			r = TAILQ_NEXT(r, entries);
2787		else if (r->uid.op && (lookup != -1 || (lookup =
2788#ifdef __FreeBSD__
2789		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
2790#else
2791		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2792#endif
2793		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2794		    uid))
2795			r = TAILQ_NEXT(r, entries);
2796		else if (r->gid.op && (lookup != -1 || (lookup =
2797#ifdef __FreeBSD__
2798		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
2799#else
2800		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2801#endif
2802		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2803		    gid))
2804			r = TAILQ_NEXT(r, entries);
2805		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
2806			r = TAILQ_NEXT(r, entries);
2807		else if (r->anchorname[0] && r->anchor == NULL)
2808			r = TAILQ_NEXT(r, entries);
2809		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
2810		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
2811			r = TAILQ_NEXT(r, entries);
2812		else {
2813			if (r->tag)
2814				tag = r->tag;
2815			if (r->anchor == NULL) {
2816				*rm = r;
2817				*am = a;
2818				*rsm = ruleset;
2819				if ((*rm)->quick)
2820					break;
2821				r = TAILQ_NEXT(r, entries);
2822			} else
2823				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2824				    PF_RULESET_FILTER);
2825		}
2826		if (r == NULL && a != NULL)
2827			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2828			    PF_RULESET_FILTER);
2829	}
2830	r = *rm;
2831	a = *am;
2832	ruleset = *rsm;
2833
2834	REASON_SET(&reason, PFRES_MATCH);
2835
2836	if (r->log) {
2837		if (rewrite)
2838			m_copyback(m, off, sizeof(*th), (caddr_t)th);
2839		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
2840	}
2841
2842	if ((r->action == PF_DROP) &&
2843	    ((r->rule_flag & PFRULE_RETURNRST) ||
2844	    (r->rule_flag & PFRULE_RETURNICMP) ||
2845	    (r->rule_flag & PFRULE_RETURN))) {
2846		/* undo NAT changes, if they have taken place */
2847		if (nr != NULL) {
2848			if (direction == PF_OUT) {
2849				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2850				    &th->th_sum, &pd->baddr, bport, 0, af);
2851				rewrite++;
2852			} else {
2853				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2854				    &th->th_sum, &pd->baddr, bport, 0, af);
2855				rewrite++;
2856			}
2857		}
2858		if (((r->rule_flag & PFRULE_RETURNRST) ||
2859		    (r->rule_flag & PFRULE_RETURN)) &&
2860		    !(th->th_flags & TH_RST)) {
2861			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
2862
2863			if (th->th_flags & TH_SYN)
2864				ack++;
2865			if (th->th_flags & TH_FIN)
2866				ack++;
2867			pf_send_tcp(r, af, pd->dst,
2868			    pd->src, th->th_dport, th->th_sport,
2869			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
2870			    r->return_ttl);
2871		} else if ((af == AF_INET) && r->return_icmp)
2872			pf_send_icmp(m, r->return_icmp >> 8,
2873			    r->return_icmp & 255, af, r);
2874		else if ((af == AF_INET6) && r->return_icmp6)
2875			pf_send_icmp(m, r->return_icmp6 >> 8,
2876			    r->return_icmp6 & 255, af, r);
2877	}
2878
2879	if (r->action == PF_DROP)
2880		return (PF_DROP);
2881
2882	if (pf_tag_packet(m, pftag, tag)) {
2883		REASON_SET(&reason, PFRES_MEMORY);
2884		return (PF_DROP);
2885	}
2886
2887	if (r->keep_state || nr != NULL ||
2888	    (pd->flags & PFDESC_TCP_NORM)) {
2889		/* create new state */
2890		u_int16_t	 len;
2891		struct pf_state	*s = NULL;
2892		struct pf_src_node *sn = NULL;
2893
2894		len = pd->tot_len - off - (th->th_off << 2);
2895
2896		/* check maximums */
2897		if (r->max_states && (r->states >= r->max_states))
2898			goto cleanup;
2899		/* src node for flter rule */
2900		if ((r->rule_flag & PFRULE_SRCTRACK ||
2901		    r->rpool.opts & PF_POOL_STICKYADDR) &&
2902		    pf_insert_src_node(&sn, r, saddr, af) != 0)
2903			goto cleanup;
2904		/* src node for translation rule */
2905		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
2906		    ((direction == PF_OUT &&
2907		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
2908		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
2909			goto cleanup;
2910		s = pool_get(&pf_state_pl, PR_NOWAIT);
2911		if (s == NULL) {
2912cleanup:
2913			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
2914				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
2915				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2916				pf_status.src_nodes--;
2917				pool_put(&pf_src_tree_pl, sn);
2918			}
2919			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
2920			    nsn->expire == 0) {
2921				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
2922				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2923				pf_status.src_nodes--;
2924				pool_put(&pf_src_tree_pl, nsn);
2925			}
2926			REASON_SET(&reason, PFRES_MEMORY);
2927			return (PF_DROP);
2928		}
2929		bzero(s, sizeof(*s));
2930		r->states++;
2931		if (a != NULL)
2932			a->states++;
2933		s->rule.ptr = r;
2934		s->nat_rule.ptr = nr;
2935		if (s->nat_rule.ptr != NULL)
2936			s->nat_rule.ptr->states++;
2937		s->anchor.ptr = a;
2938		s->allow_opts = r->allow_opts;
2939		s->log = r->log & 2;
2940		s->proto = IPPROTO_TCP;
2941		s->direction = direction;
2942		s->af = af;
2943		if (direction == PF_OUT) {
2944			PF_ACPY(&s->gwy.addr, saddr, af);
2945			s->gwy.port = th->th_sport;		/* sport */
2946			PF_ACPY(&s->ext.addr, daddr, af);
2947			s->ext.port = th->th_dport;
2948			if (nr != NULL) {
2949				PF_ACPY(&s->lan.addr, &pd->baddr, af);
2950				s->lan.port = bport;
2951			} else {
2952				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2953				s->lan.port = s->gwy.port;
2954			}
2955		} else {
2956			PF_ACPY(&s->lan.addr, daddr, af);
2957			s->lan.port = th->th_dport;
2958			PF_ACPY(&s->ext.addr, saddr, af);
2959			s->ext.port = th->th_sport;
2960			if (nr != NULL) {
2961				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
2962				s->gwy.port = bport;
2963			} else {
2964				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2965				s->gwy.port = s->lan.port;
2966			}
2967		}
2968
2969		s->src.seqlo = ntohl(th->th_seq);
2970		s->src.seqhi = s->src.seqlo + len + 1;
2971		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2972		    r->keep_state == PF_STATE_MODULATE) {
2973			/* Generate sequence number modulator */
2974			while ((s->src.seqdiff = arc4random()) == 0)
2975				;
2976			pf_change_a(&th->th_seq, &th->th_sum,
2977			    htonl(s->src.seqlo + s->src.seqdiff), 0);
2978			rewrite = 1;
2979		} else
2980			s->src.seqdiff = 0;
2981		if (th->th_flags & TH_SYN) {
2982			s->src.seqhi++;
2983			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
2984		}
2985		s->src.max_win = MAX(ntohs(th->th_win), 1);
2986		if (s->src.wscale & PF_WSCALE_MASK) {
2987			/* Remove scale factor from initial window */
2988			int win = s->src.max_win;
2989			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
2990			s->src.max_win = (win - 1) >>
2991			    (s->src.wscale & PF_WSCALE_MASK);
2992		}
2993		if (th->th_flags & TH_FIN)
2994			s->src.seqhi++;
2995		s->dst.seqhi = 1;
2996		s->dst.max_win = 1;
2997		s->src.state = TCPS_SYN_SENT;
2998		s->dst.state = TCPS_CLOSED;
2999#ifdef __FreeBSD__
3000		s->creation = time_second;
3001		s->expire = time_second;
3002#else
3003		s->creation = time.tv_sec;
3004		s->expire = time.tv_sec;
3005#endif
3006		s->timeout = PFTM_TCP_FIRST_PACKET;
3007		pf_set_rt_ifp(s, saddr);
3008		if (sn != NULL) {
3009			s->src_node = sn;
3010			s->src_node->states++;
3011		}
3012		if (nsn != NULL) {
3013			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3014			s->nat_src_node = nsn;
3015			s->nat_src_node->states++;
3016		}
3017		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3018		    off, pd, th, &s->src, &s->dst)) {
3019			REASON_SET(&reason, PFRES_MEMORY);
3020			pf_src_tree_remove_state(s);
3021			pool_put(&pf_state_pl, s);
3022			return (PF_DROP);
3023		}
3024		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3025		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
3026		    &s->dst, &rewrite)) {
3027			pf_normalize_tcp_cleanup(s);
3028			pf_src_tree_remove_state(s);
3029			pool_put(&pf_state_pl, s);
3030			return (PF_DROP);
3031		}
3032		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3033			pf_normalize_tcp_cleanup(s);
3034			REASON_SET(&reason, PFRES_MEMORY);
3035			pf_src_tree_remove_state(s);
3036			pool_put(&pf_state_pl, s);
3037			return (PF_DROP);
3038		} else
3039			*sm = s;
3040		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3041		    r->keep_state == PF_STATE_SYNPROXY) {
3042			s->src.state = PF_TCPS_PROXY_SRC;
3043			if (nr != NULL) {
3044				if (direction == PF_OUT) {
3045					pf_change_ap(saddr, &th->th_sport,
3046					    pd->ip_sum, &th->th_sum, &pd->baddr,
3047					    bport, 0, af);
3048				} else {
3049					pf_change_ap(daddr, &th->th_dport,
3050					    pd->ip_sum, &th->th_sum, &pd->baddr,
3051					    bport, 0, af);
3052				}
3053			}
3054			s->src.seqhi = arc4random();
3055			/* Find mss option */
3056			mss = pf_get_mss(m, off, th->th_off, af);
3057			mss = pf_calc_mss(saddr, af, mss);
3058			mss = pf_calc_mss(daddr, af, mss);
3059			s->src.mss = mss;
3060			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3061			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3062			    TH_SYN|TH_ACK, 0, s->src.mss, 0);
3063			return (PF_SYNPROXY_DROP);
3064		}
3065	}
3066
3067	/* copy back packet headers if we performed NAT operations */
3068	if (rewrite)
3069		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3070
3071	return (PF_PASS);
3072}
3073
3074int
3075pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
3076    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3077#ifdef __FreeBSD__
3078    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3079    struct inpcb *inp)
3080#else
3081    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3082#endif
3083{
3084	struct pf_rule		*nr = NULL;
3085	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3086	struct udphdr		*uh = pd->hdr.udp;
3087	u_int16_t		 bport, nport = 0;
3088	sa_family_t		 af = pd->af;
3089	int			 lookup = -1;
3090	uid_t			 uid;
3091	gid_t			 gid;
3092	struct pf_rule		*r, *a = NULL;
3093	struct pf_ruleset	*ruleset = NULL;
3094	struct pf_src_node	*nsn = NULL;
3095	u_short			 reason;
3096	int			 rewrite = 0;
3097	struct pf_tag		*pftag = NULL;
3098	int			 tag = -1;
3099
3100	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3101
3102	if (direction == PF_OUT) {
3103		bport = nport = uh->uh_sport;
3104		/* check outgoing packet for BINAT/NAT */
3105		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3106		    saddr, uh->uh_sport, daddr, uh->uh_dport,
3107		    &pd->naddr, &nport)) != NULL) {
3108			PF_ACPY(&pd->baddr, saddr, af);
3109			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3110			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3111			rewrite++;
3112			if (nr->natpass)
3113				r = NULL;
3114			pd->nat_rule = nr;
3115		}
3116	} else {
3117		bport = nport = uh->uh_dport;
3118		/* check incoming packet for BINAT/RDR */
3119		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3120		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
3121		    &nport)) != NULL) {
3122			PF_ACPY(&pd->baddr, daddr, af);
3123			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3124			    &uh->uh_sum, &pd->naddr, nport, 1, af);
3125			rewrite++;
3126			if (nr->natpass)
3127				r = NULL;
3128			pd->nat_rule = nr;
3129		}
3130	}
3131
3132	while (r != NULL) {
3133		r->evaluations++;
3134		if (r->kif != NULL &&
3135		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3136			r = r->skip[PF_SKIP_IFP].ptr;
3137		else if (r->direction && r->direction != direction)
3138			r = r->skip[PF_SKIP_DIR].ptr;
3139		else if (r->af && r->af != af)
3140			r = r->skip[PF_SKIP_AF].ptr;
3141		else if (r->proto && r->proto != IPPROTO_UDP)
3142			r = r->skip[PF_SKIP_PROTO].ptr;
3143		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3144			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3145		else if (r->src.port_op && !pf_match_port(r->src.port_op,
3146		    r->src.port[0], r->src.port[1], uh->uh_sport))
3147			r = r->skip[PF_SKIP_SRC_PORT].ptr;
3148		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3149			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3150		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3151		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
3152			r = r->skip[PF_SKIP_DST_PORT].ptr;
3153		else if (r->tos && !(r->tos & pd->tos))
3154			r = TAILQ_NEXT(r, entries);
3155		else if (r->rule_flag & PFRULE_FRAGMENT)
3156			r = TAILQ_NEXT(r, entries);
3157		else if (r->uid.op && (lookup != -1 || (lookup =
3158#ifdef __FreeBSD__
3159		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
3160#else
3161		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3162#endif
3163		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3164		    uid))
3165			r = TAILQ_NEXT(r, entries);
3166		else if (r->gid.op && (lookup != -1 || (lookup =
3167#ifdef __FreeBSD__
3168		    pf_socket_lookup(&uid, &gid, direction, pd, inp), 1)) &&
3169#else
3170		    pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
3171#endif
3172		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3173		    gid))
3174			r = TAILQ_NEXT(r, entries);
3175		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3176			r = TAILQ_NEXT(r, entries);
3177		else if (r->anchorname[0] && r->anchor == NULL)
3178			r = TAILQ_NEXT(r, entries);
3179		else if (r->os_fingerprint != PF_OSFP_ANY)
3180			r = TAILQ_NEXT(r, entries);
3181		else {
3182			if (r->tag)
3183				tag = r->tag;
3184			if (r->anchor == NULL) {
3185				*rm = r;
3186				*am = a;
3187				*rsm = ruleset;
3188				if ((*rm)->quick)
3189					break;
3190				r = TAILQ_NEXT(r, entries);
3191			} else
3192				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3193				    PF_RULESET_FILTER);
3194		}
3195		if (r == NULL && a != NULL)
3196			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3197			    PF_RULESET_FILTER);
3198	}
3199	r = *rm;
3200	a = *am;
3201	ruleset = *rsm;
3202
3203	REASON_SET(&reason, PFRES_MATCH);
3204
3205	if (r->log) {
3206		if (rewrite)
3207			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3208		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3209	}
3210
3211	if ((r->action == PF_DROP) &&
3212	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3213	    (r->rule_flag & PFRULE_RETURN))) {
3214		/* undo NAT changes, if they have taken place */
3215		if (nr != NULL) {
3216			if (direction == PF_OUT) {
3217				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3218				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3219				rewrite++;
3220			} else {
3221				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3222				    &uh->uh_sum, &pd->baddr, bport, 1, af);
3223				rewrite++;
3224			}
3225		}
3226		if ((af == AF_INET) && r->return_icmp)
3227			pf_send_icmp(m, r->return_icmp >> 8,
3228			    r->return_icmp & 255, af, r);
3229		else if ((af == AF_INET6) && r->return_icmp6)
3230			pf_send_icmp(m, r->return_icmp6 >> 8,
3231			    r->return_icmp6 & 255, af, r);
3232	}
3233
3234	if (r->action == PF_DROP)
3235		return (PF_DROP);
3236
3237	if (pf_tag_packet(m, pftag, tag)) {
3238		REASON_SET(&reason, PFRES_MEMORY);
3239		return (PF_DROP);
3240	}
3241
3242	if (r->keep_state || nr != NULL) {
3243		/* create new state */
3244		struct pf_state	*s = NULL;
3245		struct pf_src_node *sn = NULL;
3246
3247		/* check maximums */
3248		if (r->max_states && (r->states >= r->max_states))
3249			goto cleanup;
3250		/* src node for flter rule */
3251		if ((r->rule_flag & PFRULE_SRCTRACK ||
3252		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3253		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3254			goto cleanup;
3255		/* src node for translation rule */
3256		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3257		    ((direction == PF_OUT &&
3258		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3259		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3260			goto cleanup;
3261		s = pool_get(&pf_state_pl, PR_NOWAIT);
3262		if (s == NULL) {
3263cleanup:
3264			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3265				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3266				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3267				pf_status.src_nodes--;
3268				pool_put(&pf_src_tree_pl, sn);
3269			}
3270			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3271			    nsn->expire == 0) {
3272				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3273				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3274				pf_status.src_nodes--;
3275				pool_put(&pf_src_tree_pl, nsn);
3276			}
3277			REASON_SET(&reason, PFRES_MEMORY);
3278			return (PF_DROP);
3279		}
3280		bzero(s, sizeof(*s));
3281		r->states++;
3282		if (a != NULL)
3283			a->states++;
3284		s->rule.ptr = r;
3285		s->nat_rule.ptr = nr;
3286		if (s->nat_rule.ptr != NULL)
3287			s->nat_rule.ptr->states++;
3288		s->anchor.ptr = a;
3289		s->allow_opts = r->allow_opts;
3290		s->log = r->log & 2;
3291		s->proto = IPPROTO_UDP;
3292		s->direction = direction;
3293		s->af = af;
3294		if (direction == PF_OUT) {
3295			PF_ACPY(&s->gwy.addr, saddr, af);
3296			s->gwy.port = uh->uh_sport;
3297			PF_ACPY(&s->ext.addr, daddr, af);
3298			s->ext.port = uh->uh_dport;
3299			if (nr != NULL) {
3300				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3301				s->lan.port = bport;
3302			} else {
3303				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3304				s->lan.port = s->gwy.port;
3305			}
3306		} else {
3307			PF_ACPY(&s->lan.addr, daddr, af);
3308			s->lan.port = uh->uh_dport;
3309			PF_ACPY(&s->ext.addr, saddr, af);
3310			s->ext.port = uh->uh_sport;
3311			if (nr != NULL) {
3312				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3313				s->gwy.port = bport;
3314			} else {
3315				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3316				s->gwy.port = s->lan.port;
3317			}
3318		}
3319		s->src.state = PFUDPS_SINGLE;
3320		s->dst.state = PFUDPS_NO_TRAFFIC;
3321#ifdef __FreeBSD__
3322		s->creation = time_second;
3323		s->expire = time_second;
3324#else
3325		s->creation = time.tv_sec;
3326		s->expire = time.tv_sec;
3327#endif
3328		s->timeout = PFTM_UDP_FIRST_PACKET;
3329		pf_set_rt_ifp(s, saddr);
3330		if (sn != NULL) {
3331			s->src_node = sn;
3332			s->src_node->states++;
3333		}
3334		if (nsn != NULL) {
3335			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3336			s->nat_src_node = nsn;
3337			s->nat_src_node->states++;
3338		}
3339		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3340			REASON_SET(&reason, PFRES_MEMORY);
3341			pf_src_tree_remove_state(s);
3342			pool_put(&pf_state_pl, s);
3343			return (PF_DROP);
3344		} else
3345			*sm = s;
3346	}
3347
3348	/* copy back packet headers if we performed NAT operations */
3349	if (rewrite)
3350		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3351
3352	return (PF_PASS);
3353}
3354
3355int
3356pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3357    struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3358    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3359{
3360	struct pf_rule		*nr = NULL;
3361	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3362	struct pf_rule		*r, *a = NULL;
3363	struct pf_ruleset	*ruleset = NULL;
3364	struct pf_src_node	*nsn = NULL;
3365	u_short			 reason;
3366	u_int16_t		 icmpid = 0;	/* make the compiler happy */
3367	sa_family_t		 af = pd->af;
3368	u_int8_t		 icmptype = 0;	/* make the compiler happy */
3369	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
3370	int			 state_icmp = 0;
3371	struct pf_tag		*pftag = NULL;
3372	int			 tag = -1;
3373#ifdef INET6
3374	int			 rewrite = 0;
3375#endif /* INET6 */
3376
3377	switch (pd->proto) {
3378#ifdef INET
3379	case IPPROTO_ICMP:
3380		icmptype = pd->hdr.icmp->icmp_type;
3381		icmpcode = pd->hdr.icmp->icmp_code;
3382		icmpid = pd->hdr.icmp->icmp_id;
3383
3384		if (icmptype == ICMP_UNREACH ||
3385		    icmptype == ICMP_SOURCEQUENCH ||
3386		    icmptype == ICMP_REDIRECT ||
3387		    icmptype == ICMP_TIMXCEED ||
3388		    icmptype == ICMP_PARAMPROB)
3389			state_icmp++;
3390		break;
3391#endif /* INET */
3392#ifdef INET6
3393	case IPPROTO_ICMPV6:
3394		icmptype = pd->hdr.icmp6->icmp6_type;
3395		icmpcode = pd->hdr.icmp6->icmp6_code;
3396		icmpid = pd->hdr.icmp6->icmp6_id;
3397
3398		if (icmptype == ICMP6_DST_UNREACH ||
3399		    icmptype == ICMP6_PACKET_TOO_BIG ||
3400		    icmptype == ICMP6_TIME_EXCEEDED ||
3401		    icmptype == ICMP6_PARAM_PROB)
3402			state_icmp++;
3403		break;
3404#endif /* INET6 */
3405	}
3406
3407	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3408
3409	if (direction == PF_OUT) {
3410		/* check outgoing packet for BINAT/NAT */
3411		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3412		    saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) {
3413			PF_ACPY(&pd->baddr, saddr, af);
3414			switch (af) {
3415#ifdef INET
3416			case AF_INET:
3417				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3418				    pd->naddr.v4.s_addr, 0);
3419				break;
3420#endif /* INET */
3421#ifdef INET6
3422			case AF_INET6:
3423				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3424				    &pd->naddr, 0);
3425				rewrite++;
3426				break;
3427#endif /* INET6 */
3428			}
3429			if (nr->natpass)
3430				r = NULL;
3431			pd->nat_rule = nr;
3432		}
3433	} else {
3434		/* check incoming packet for BINAT/RDR */
3435		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3436		    saddr, icmpid, daddr, icmpid, &pd->naddr, NULL)) != NULL) {
3437			PF_ACPY(&pd->baddr, daddr, af);
3438			switch (af) {
3439#ifdef INET
3440			case AF_INET:
3441				pf_change_a(&daddr->v4.s_addr,
3442				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3443				break;
3444#endif /* INET */
3445#ifdef INET6
3446			case AF_INET6:
3447				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3448				    &pd->naddr, 0);
3449				rewrite++;
3450				break;
3451#endif /* INET6 */
3452			}
3453			if (nr->natpass)
3454				r = NULL;
3455			pd->nat_rule = nr;
3456		}
3457	}
3458
3459	while (r != NULL) {
3460		r->evaluations++;
3461		if (r->kif != NULL &&
3462		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3463			r = r->skip[PF_SKIP_IFP].ptr;
3464		else if (r->direction && r->direction != direction)
3465			r = r->skip[PF_SKIP_DIR].ptr;
3466		else if (r->af && r->af != af)
3467			r = r->skip[PF_SKIP_AF].ptr;
3468		else if (r->proto && r->proto != pd->proto)
3469			r = r->skip[PF_SKIP_PROTO].ptr;
3470		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3471			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3472		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3473			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3474		else if (r->type && r->type != icmptype + 1)
3475			r = TAILQ_NEXT(r, entries);
3476		else if (r->code && r->code != icmpcode + 1)
3477			r = TAILQ_NEXT(r, entries);
3478		else if (r->tos && !(r->tos & pd->tos))
3479			r = TAILQ_NEXT(r, entries);
3480		else if (r->rule_flag & PFRULE_FRAGMENT)
3481			r = TAILQ_NEXT(r, entries);
3482		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3483			r = TAILQ_NEXT(r, entries);
3484		else if (r->anchorname[0] && r->anchor == NULL)
3485			r = TAILQ_NEXT(r, entries);
3486		else if (r->os_fingerprint != PF_OSFP_ANY)
3487			r = TAILQ_NEXT(r, entries);
3488		else {
3489			if (r->tag)
3490				tag = r->tag;
3491			if (r->anchor == NULL) {
3492				*rm = r;
3493				*am = a;
3494				*rsm = ruleset;
3495				if ((*rm)->quick)
3496					break;
3497				r = TAILQ_NEXT(r, entries);
3498			} else
3499				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3500				    PF_RULESET_FILTER);
3501		}
3502		if (r == NULL && a != NULL)
3503			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3504			    PF_RULESET_FILTER);
3505	}
3506	r = *rm;
3507	a = *am;
3508	ruleset = *rsm;
3509
3510	REASON_SET(&reason, PFRES_MATCH);
3511
3512	if (r->log) {
3513#ifdef INET6
3514		if (rewrite)
3515			m_copyback(m, off, sizeof(struct icmp6_hdr),
3516			    (caddr_t)pd->hdr.icmp6);
3517#endif /* INET6 */
3518		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3519	}
3520
3521	if (r->action != PF_PASS)
3522		return (PF_DROP);
3523
3524	if (pf_tag_packet(m, pftag, tag)) {
3525		REASON_SET(&reason, PFRES_MEMORY);
3526		return (PF_DROP);
3527	}
3528
3529	if (!state_icmp && (r->keep_state || nr != NULL)) {
3530		/* create new state */
3531		struct pf_state	*s = NULL;
3532		struct pf_src_node *sn = NULL;
3533
3534		/* check maximums */
3535		if (r->max_states && (r->states >= r->max_states))
3536			goto cleanup;
3537		/* src node for flter rule */
3538		if ((r->rule_flag & PFRULE_SRCTRACK ||
3539		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3540		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3541			goto cleanup;
3542		/* src node for translation rule */
3543		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3544		    ((direction == PF_OUT &&
3545		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3546		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3547			goto cleanup;
3548		s = pool_get(&pf_state_pl, PR_NOWAIT);
3549		if (s == NULL) {
3550cleanup:
3551			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3552				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3553				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3554				pf_status.src_nodes--;
3555				pool_put(&pf_src_tree_pl, sn);
3556			}
3557			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3558			    nsn->expire == 0) {
3559				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3560				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3561				pf_status.src_nodes--;
3562				pool_put(&pf_src_tree_pl, nsn);
3563			}
3564			REASON_SET(&reason, PFRES_MEMORY);
3565			return (PF_DROP);
3566		}
3567		bzero(s, sizeof(*s));
3568		r->states++;
3569		if (a != NULL)
3570			a->states++;
3571		s->rule.ptr = r;
3572		s->nat_rule.ptr = nr;
3573		if (s->nat_rule.ptr != NULL)
3574			s->nat_rule.ptr->states++;
3575		s->anchor.ptr = a;
3576		s->allow_opts = r->allow_opts;
3577		s->log = r->log & 2;
3578		s->proto = pd->proto;
3579		s->direction = direction;
3580		s->af = af;
3581		if (direction == PF_OUT) {
3582			PF_ACPY(&s->gwy.addr, saddr, af);
3583			s->gwy.port = icmpid;
3584			PF_ACPY(&s->ext.addr, daddr, af);
3585			s->ext.port = icmpid;
3586			if (nr != NULL)
3587				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3588			else
3589				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3590			s->lan.port = icmpid;
3591		} else {
3592			PF_ACPY(&s->lan.addr, daddr, af);
3593			s->lan.port = icmpid;
3594			PF_ACPY(&s->ext.addr, saddr, af);
3595			s->ext.port = icmpid;
3596			if (nr != NULL)
3597				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3598			else
3599				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3600			s->gwy.port = icmpid;
3601		}
3602#ifdef __FreeBSD__
3603		s->creation = time_second;
3604		s->expire = time_second;
3605#else
3606		s->creation = time.tv_sec;
3607		s->expire = time.tv_sec;
3608#endif
3609		s->timeout = PFTM_ICMP_FIRST_PACKET;
3610		pf_set_rt_ifp(s, saddr);
3611		if (sn != NULL) {
3612			s->src_node = sn;
3613			s->src_node->states++;
3614		}
3615		if (nsn != NULL) {
3616			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3617			s->nat_src_node = nsn;
3618			s->nat_src_node->states++;
3619		}
3620		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3621			REASON_SET(&reason, PFRES_MEMORY);
3622			pf_src_tree_remove_state(s);
3623			pool_put(&pf_state_pl, s);
3624			return (PF_DROP);
3625		} else
3626			*sm = s;
3627	}
3628
3629#ifdef INET6
3630	/* copy back packet headers if we performed IPv6 NAT operations */
3631	if (rewrite)
3632		m_copyback(m, off, sizeof(struct icmp6_hdr),
3633		    (caddr_t)pd->hdr.icmp6);
3634#endif /* INET6 */
3635
3636	return (PF_PASS);
3637}
3638
3639int
3640pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3641    struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3642    struct pf_rule **am, struct pf_ruleset **rsm)
3643{
3644	struct pf_rule		*nr = NULL;
3645	struct pf_rule		*r, *a = NULL;
3646	struct pf_ruleset	*ruleset = NULL;
3647	struct pf_src_node	*nsn = NULL;
3648	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3649	sa_family_t		 af = pd->af;
3650	u_short			 reason;
3651	struct pf_tag		*pftag = NULL;
3652	int			 tag = -1;
3653
3654	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3655
3656	if (direction == PF_OUT) {
3657		/* check outgoing packet for BINAT/NAT */
3658		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3659		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3660			PF_ACPY(&pd->baddr, saddr, af);
3661			switch (af) {
3662#ifdef INET
3663			case AF_INET:
3664				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3665				    pd->naddr.v4.s_addr, 0);
3666				break;
3667#endif /* INET */
3668#ifdef INET6
3669			case AF_INET6:
3670				PF_ACPY(saddr, &pd->naddr, af);
3671				break;
3672#endif /* INET6 */
3673			}
3674			if (nr->natpass)
3675				r = NULL;
3676			pd->nat_rule = nr;
3677		}
3678	} else {
3679		/* check incoming packet for BINAT/RDR */
3680		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3681		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3682			PF_ACPY(&pd->baddr, daddr, af);
3683			switch (af) {
3684#ifdef INET
3685			case AF_INET:
3686				pf_change_a(&daddr->v4.s_addr,
3687				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
3688				break;
3689#endif /* INET */
3690#ifdef INET6
3691			case AF_INET6:
3692				PF_ACPY(daddr, &pd->naddr, af);
3693				break;
3694#endif /* INET6 */
3695			}
3696			if (nr->natpass)
3697				r = NULL;
3698			pd->nat_rule = nr;
3699		}
3700	}
3701
3702	while (r != NULL) {
3703		r->evaluations++;
3704		if (r->kif != NULL &&
3705		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3706			r = r->skip[PF_SKIP_IFP].ptr;
3707		else if (r->direction && r->direction != direction)
3708			r = r->skip[PF_SKIP_DIR].ptr;
3709		else if (r->af && r->af != af)
3710			r = r->skip[PF_SKIP_AF].ptr;
3711		else if (r->proto && r->proto != pd->proto)
3712			r = r->skip[PF_SKIP_PROTO].ptr;
3713		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3714			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3715		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3716			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3717		else if (r->tos && !(r->tos & pd->tos))
3718			r = TAILQ_NEXT(r, entries);
3719		else if (r->rule_flag & PFRULE_FRAGMENT)
3720			r = TAILQ_NEXT(r, entries);
3721		else if (r->match_tag && !pf_match_tag(m, r, nr, pftag, &tag))
3722			r = TAILQ_NEXT(r, entries);
3723		else if (r->anchorname[0] && r->anchor == NULL)
3724			r = TAILQ_NEXT(r, entries);
3725		else if (r->os_fingerprint != PF_OSFP_ANY)
3726			r = TAILQ_NEXT(r, entries);
3727		else {
3728			if (r->tag)
3729				tag = r->tag;
3730			if (r->anchor == NULL) {
3731				*rm = r;
3732				*am = a;
3733				*rsm = ruleset;
3734				if ((*rm)->quick)
3735					break;
3736				r = TAILQ_NEXT(r, entries);
3737			} else
3738				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3739				    PF_RULESET_FILTER);
3740		}
3741		if (r == NULL && a != NULL)
3742			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3743			    PF_RULESET_FILTER);
3744	}
3745	r = *rm;
3746	a = *am;
3747	ruleset = *rsm;
3748
3749	REASON_SET(&reason, PFRES_MATCH);
3750
3751	if (r->log)
3752		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3753
3754	if ((r->action == PF_DROP) &&
3755	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3756	    (r->rule_flag & PFRULE_RETURN))) {
3757		struct pf_addr *a = NULL;
3758
3759		if (nr != NULL) {
3760			if (direction == PF_OUT)
3761				a = saddr;
3762			else
3763				a = daddr;
3764		}
3765		if (a != NULL) {
3766			switch (af) {
3767#ifdef INET
3768			case AF_INET:
3769				pf_change_a(&a->v4.s_addr, pd->ip_sum,
3770				    pd->baddr.v4.s_addr, 0);
3771				break;
3772#endif /* INET */
3773#ifdef INET6
3774			case AF_INET6:
3775				PF_ACPY(a, &pd->baddr, af);
3776				break;
3777#endif /* INET6 */
3778			}
3779		}
3780		if ((af == AF_INET) && r->return_icmp)
3781			pf_send_icmp(m, r->return_icmp >> 8,
3782			    r->return_icmp & 255, af, r);
3783		else if ((af == AF_INET6) && r->return_icmp6)
3784			pf_send_icmp(m, r->return_icmp6 >> 8,
3785			    r->return_icmp6 & 255, af, r);
3786	}
3787
3788	if (r->action != PF_PASS)
3789		return (PF_DROP);
3790
3791	if (pf_tag_packet(m, pftag, tag)) {
3792		REASON_SET(&reason, PFRES_MEMORY);
3793		return (PF_DROP);
3794	}
3795
3796	if (r->keep_state || nr != NULL) {
3797		/* create new state */
3798		struct pf_state	*s = NULL;
3799		struct pf_src_node *sn = NULL;
3800
3801		/* check maximums */
3802		if (r->max_states && (r->states >= r->max_states))
3803			goto cleanup;
3804		/* src node for flter rule */
3805		if ((r->rule_flag & PFRULE_SRCTRACK ||
3806		    r->rpool.opts & PF_POOL_STICKYADDR) &&
3807		    pf_insert_src_node(&sn, r, saddr, af) != 0)
3808			goto cleanup;
3809		/* src node for translation rule */
3810		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3811		    ((direction == PF_OUT &&
3812		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3813		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3814			goto cleanup;
3815		s = pool_get(&pf_state_pl, PR_NOWAIT);
3816		if (s == NULL) {
3817cleanup:
3818			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3819				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3820				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3821				pf_status.src_nodes--;
3822				pool_put(&pf_src_tree_pl, sn);
3823			}
3824			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3825			    nsn->expire == 0) {
3826				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3827				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3828				pf_status.src_nodes--;
3829				pool_put(&pf_src_tree_pl, nsn);
3830			}
3831			REASON_SET(&reason, PFRES_MEMORY);
3832			return (PF_DROP);
3833		}
3834		bzero(s, sizeof(*s));
3835		r->states++;
3836		if (a != NULL)
3837			a->states++;
3838		s->rule.ptr = r;
3839		s->nat_rule.ptr = nr;
3840		if (s->nat_rule.ptr != NULL)
3841			s->nat_rule.ptr->states++;
3842		s->anchor.ptr = a;
3843		s->allow_opts = r->allow_opts;
3844		s->log = r->log & 2;
3845		s->proto = pd->proto;
3846		s->direction = direction;
3847		s->af = af;
3848		if (direction == PF_OUT) {
3849			PF_ACPY(&s->gwy.addr, saddr, af);
3850			PF_ACPY(&s->ext.addr, daddr, af);
3851			if (nr != NULL)
3852				PF_ACPY(&s->lan.addr, &pd->baddr, af);
3853			else
3854				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3855		} else {
3856			PF_ACPY(&s->lan.addr, daddr, af);
3857			PF_ACPY(&s->ext.addr, saddr, af);
3858			if (nr != NULL)
3859				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3860			else
3861				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3862		}
3863		s->src.state = PFOTHERS_SINGLE;
3864		s->dst.state = PFOTHERS_NO_TRAFFIC;
3865#ifdef __FreeBSD__
3866		s->creation = time_second;
3867		s->expire = time_second;
3868#else
3869		s->creation = time.tv_sec;
3870		s->expire = time.tv_sec;
3871#endif
3872		s->timeout = PFTM_OTHER_FIRST_PACKET;
3873		pf_set_rt_ifp(s, saddr);
3874		if (sn != NULL) {
3875			s->src_node = sn;
3876			s->src_node->states++;
3877		}
3878		if (nsn != NULL) {
3879			PF_ACPY(&nsn->raddr, &pd->naddr, af);
3880			s->nat_src_node = nsn;
3881			s->nat_src_node->states++;
3882		}
3883		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3884			REASON_SET(&reason, PFRES_MEMORY);
3885			pf_src_tree_remove_state(s);
3886			pool_put(&pf_state_pl, s);
3887			return (PF_DROP);
3888		} else
3889			*sm = s;
3890	}
3891
3892	return (PF_PASS);
3893}
3894
3895int
3896pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3897    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3898    struct pf_ruleset **rsm)
3899{
3900	struct pf_rule		*r, *a = NULL;
3901	struct pf_ruleset	*ruleset = NULL;
3902	sa_family_t		 af = pd->af;
3903	u_short			 reason;
3904	struct pf_tag		*pftag = NULL;
3905	int			 tag = -1;
3906
3907	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3908	while (r != NULL) {
3909		r->evaluations++;
3910		if (r->kif != NULL &&
3911		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3912			r = r->skip[PF_SKIP_IFP].ptr;
3913		else if (r->direction && r->direction != direction)
3914			r = r->skip[PF_SKIP_DIR].ptr;
3915		else if (r->af && r->af != af)
3916			r = r->skip[PF_SKIP_AF].ptr;
3917		else if (r->proto && r->proto != pd->proto)
3918			r = r->skip[PF_SKIP_PROTO].ptr;
3919		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3920			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3921		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3922			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3923		else if (r->tos && !(r->tos & pd->tos))
3924			r = TAILQ_NEXT(r, entries);
3925		else if (r->src.port_op || r->dst.port_op ||
3926		    r->flagset || r->type || r->code ||
3927		    r->os_fingerprint != PF_OSFP_ANY)
3928			r = TAILQ_NEXT(r, entries);
3929		else if (r->match_tag && !pf_match_tag(m, r, NULL, pftag, &tag))
3930			r = TAILQ_NEXT(r, entries);
3931		else if (r->anchorname[0] && r->anchor == NULL)
3932			r = TAILQ_NEXT(r, entries);
3933		else {
3934			if (r->anchor == NULL) {
3935				*rm = r;
3936				*am = a;
3937				*rsm = ruleset;
3938				if ((*rm)->quick)
3939					break;
3940				r = TAILQ_NEXT(r, entries);
3941			} else
3942				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3943				    PF_RULESET_FILTER);
3944		}
3945		if (r == NULL && a != NULL)
3946			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3947			    PF_RULESET_FILTER);
3948	}
3949	r = *rm;
3950	a = *am;
3951	ruleset = *rsm;
3952
3953	REASON_SET(&reason, PFRES_MATCH);
3954
3955	if (r->log)
3956		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3957
3958	if (r->action != PF_PASS)
3959		return (PF_DROP);
3960
3961	if (pf_tag_packet(m, pftag, tag)) {
3962		REASON_SET(&reason, PFRES_MEMORY);
3963		return (PF_DROP);
3964	}
3965
3966	return (PF_PASS);
3967}
3968
3969int
3970pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3971    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3972    u_short *reason)
3973{
3974	struct pf_state		 key;
3975	struct tcphdr		*th = pd->hdr.tcp;
3976	u_int16_t		 win = ntohs(th->th_win);
3977	u_int32_t		 ack, end, seq;
3978	u_int8_t		 sws, dws;
3979	int			 ackskew;
3980	int			 copyback = 0;
3981	struct pf_state_peer	*src, *dst;
3982
3983	key.af = pd->af;
3984	key.proto = IPPROTO_TCP;
3985	if (direction == PF_IN)	{
3986		PF_ACPY(&key.ext.addr, pd->src, key.af);
3987		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3988		key.ext.port = th->th_sport;
3989		key.gwy.port = th->th_dport;
3990	} else {
3991		PF_ACPY(&key.lan.addr, pd->src, key.af);
3992		PF_ACPY(&key.ext.addr, pd->dst, key.af);
3993		key.lan.port = th->th_sport;
3994		key.ext.port = th->th_dport;
3995	}
3996
3997	STATE_LOOKUP();
3998
3999	if (direction == (*state)->direction) {
4000		src = &(*state)->src;
4001		dst = &(*state)->dst;
4002	} else {
4003		src = &(*state)->dst;
4004		dst = &(*state)->src;
4005	}
4006
4007	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4008		if (direction != (*state)->direction)
4009			return (PF_SYNPROXY_DROP);
4010		if (th->th_flags & TH_SYN) {
4011			if (ntohl(th->th_seq) != (*state)->src.seqlo)
4012				return (PF_DROP);
4013			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4014			    pd->src, th->th_dport, th->th_sport,
4015			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4016			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0);
4017			return (PF_SYNPROXY_DROP);
4018		} else if (!(th->th_flags & TH_ACK) ||
4019		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4020		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
4021			return (PF_DROP);
4022		else
4023			(*state)->src.state = PF_TCPS_PROXY_DST;
4024	}
4025	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4026		struct pf_state_host *src, *dst;
4027
4028		if (direction == PF_OUT) {
4029			src = &(*state)->gwy;
4030			dst = &(*state)->ext;
4031		} else {
4032			src = &(*state)->ext;
4033			dst = &(*state)->lan;
4034		}
4035		if (direction == (*state)->direction) {
4036			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4037			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4038			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
4039				return (PF_DROP);
4040			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4041			if ((*state)->dst.seqhi == 1)
4042				(*state)->dst.seqhi = arc4random();
4043			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4044			    &dst->addr, src->port, dst->port,
4045			    (*state)->dst.seqhi, 0, TH_SYN, 0,
4046			    (*state)->src.mss, 0);
4047			return (PF_SYNPROXY_DROP);
4048		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4049		    (TH_SYN|TH_ACK)) ||
4050		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1))
4051			return (PF_DROP);
4052		else {
4053			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4054			(*state)->dst.seqlo = ntohl(th->th_seq);
4055			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4056			    pd->src, th->th_dport, th->th_sport,
4057			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4058			    TH_ACK, (*state)->src.max_win, 0, 0);
4059			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4060			    &dst->addr, src->port, dst->port,
4061			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4062			    TH_ACK, (*state)->dst.max_win, 0, 0);
4063			(*state)->src.seqdiff = (*state)->dst.seqhi -
4064			    (*state)->src.seqlo;
4065			(*state)->dst.seqdiff = (*state)->src.seqhi -
4066			    (*state)->dst.seqlo;
4067			(*state)->src.seqhi = (*state)->src.seqlo +
4068			    (*state)->src.max_win;
4069			(*state)->dst.seqhi = (*state)->dst.seqlo +
4070			    (*state)->dst.max_win;
4071			(*state)->src.wscale = (*state)->dst.wscale = 0;
4072			(*state)->src.state = (*state)->dst.state =
4073			    TCPS_ESTABLISHED;
4074			return (PF_SYNPROXY_DROP);
4075		}
4076	}
4077
4078	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4079		sws = src->wscale & PF_WSCALE_MASK;
4080		dws = dst->wscale & PF_WSCALE_MASK;
4081	} else
4082		sws = dws = 0;
4083
4084	/*
4085	 * Sequence tracking algorithm from Guido van Rooij's paper:
4086	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
4087	 *	tcp_filtering.ps
4088	 */
4089
4090	seq = ntohl(th->th_seq);
4091	if (src->seqlo == 0) {
4092		/* First packet from this end. Set its state */
4093
4094		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4095		    src->scrub == NULL) {
4096			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4097				REASON_SET(reason, PFRES_MEMORY);
4098				return (PF_DROP);
4099			}
4100		}
4101
4102		/* Deferred generation of sequence number modulator */
4103		if (dst->seqdiff && !src->seqdiff) {
4104			while ((src->seqdiff = arc4random()) == 0)
4105				;
4106			ack = ntohl(th->th_ack) - dst->seqdiff;
4107			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4108			    src->seqdiff), 0);
4109			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4110			copyback = 1;
4111		} else {
4112			ack = ntohl(th->th_ack);
4113		}
4114
4115		end = seq + pd->p_len;
4116		if (th->th_flags & TH_SYN) {
4117			end++;
4118			if (dst->wscale & PF_WSCALE_FLAG) {
4119				src->wscale = pf_get_wscale(m, off, th->th_off,
4120				    pd->af);
4121				if (src->wscale & PF_WSCALE_FLAG) {
4122					/* Remove scale factor from initial
4123					 * window */
4124					sws = src->wscale & PF_WSCALE_MASK;
4125					win = ((u_int32_t)win + (1 << sws) - 1)
4126					    >> sws;
4127					dws = dst->wscale & PF_WSCALE_MASK;
4128				} else {
4129					/* fixup other window */
4130					dst->max_win <<= dst->wscale &
4131					    PF_WSCALE_MASK;
4132					/* in case of a retrans SYN|ACK */
4133					dst->wscale = 0;
4134				}
4135			}
4136		}
4137		if (th->th_flags & TH_FIN)
4138			end++;
4139
4140		src->seqlo = seq;
4141		if (src->state < TCPS_SYN_SENT)
4142			src->state = TCPS_SYN_SENT;
4143
4144		/*
4145		 * May need to slide the window (seqhi may have been set by
4146		 * the crappy stack check or if we picked up the connection
4147		 * after establishment)
4148		 */
4149		if (src->seqhi == 1 ||
4150		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4151			src->seqhi = end + MAX(1, dst->max_win << dws);
4152		if (win > src->max_win)
4153			src->max_win = win;
4154
4155	} else {
4156		ack = ntohl(th->th_ack) - dst->seqdiff;
4157		if (src->seqdiff) {
4158			/* Modulate sequence numbers */
4159			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4160			    src->seqdiff), 0);
4161			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4162			copyback = 1;
4163		}
4164		end = seq + pd->p_len;
4165		if (th->th_flags & TH_SYN)
4166			end++;
4167		if (th->th_flags & TH_FIN)
4168			end++;
4169	}
4170
4171	if ((th->th_flags & TH_ACK) == 0) {
4172		/* Let it pass through the ack skew check */
4173		ack = dst->seqlo;
4174	} else if ((ack == 0 &&
4175	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4176	    /* broken tcp stacks do not set ack */
4177	    (dst->state < TCPS_SYN_SENT)) {
4178		/*
4179		 * Many stacks (ours included) will set the ACK number in an
4180		 * FIN|ACK if the SYN times out -- no sequence to ACK.
4181		 */
4182		ack = dst->seqlo;
4183	}
4184
4185	if (seq == end) {
4186		/* Ease sequencing restrictions on no data packets */
4187		seq = src->seqlo;
4188		end = seq;
4189	}
4190
4191	ackskew = dst->seqlo - ack;
4192
4193#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
4194	if (SEQ_GEQ(src->seqhi, end) &&
4195	    /* Last octet inside other's window space */
4196	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4197	    /* Retrans: not more than one window back */
4198	    (ackskew >= -MAXACKWINDOW) &&
4199	    /* Acking not more than one reassembled fragment backwards */
4200	    (ackskew <= (MAXACKWINDOW << sws))) {
4201	    /* Acking not more than one window forward */
4202
4203		/* update max window */
4204		if (src->max_win < win)
4205			src->max_win = win;
4206		/* synchronize sequencing */
4207		if (SEQ_GT(end, src->seqlo))
4208			src->seqlo = end;
4209		/* slide the window of what the other end can send */
4210		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4211			dst->seqhi = ack + MAX((win << sws), 1);
4212
4213
4214		/* update states */
4215		if (th->th_flags & TH_SYN)
4216			if (src->state < TCPS_SYN_SENT)
4217				src->state = TCPS_SYN_SENT;
4218		if (th->th_flags & TH_FIN)
4219			if (src->state < TCPS_CLOSING)
4220				src->state = TCPS_CLOSING;
4221		if (th->th_flags & TH_ACK) {
4222			if (dst->state == TCPS_SYN_SENT)
4223				dst->state = TCPS_ESTABLISHED;
4224			else if (dst->state == TCPS_CLOSING)
4225				dst->state = TCPS_FIN_WAIT_2;
4226		}
4227		if (th->th_flags & TH_RST)
4228			src->state = dst->state = TCPS_TIME_WAIT;
4229
4230		/* update expire time */
4231#ifdef __FreeBSD__
4232		(*state)->expire = time_second;
4233#else
4234		(*state)->expire = time.tv_sec;
4235#endif
4236		if (src->state >= TCPS_FIN_WAIT_2 &&
4237		    dst->state >= TCPS_FIN_WAIT_2)
4238			(*state)->timeout = PFTM_TCP_CLOSED;
4239		else if (src->state >= TCPS_FIN_WAIT_2 ||
4240		    dst->state >= TCPS_FIN_WAIT_2)
4241			(*state)->timeout = PFTM_TCP_FIN_WAIT;
4242		else if (src->state < TCPS_ESTABLISHED ||
4243		    dst->state < TCPS_ESTABLISHED)
4244			(*state)->timeout = PFTM_TCP_OPENING;
4245		else if (src->state >= TCPS_CLOSING ||
4246		    dst->state >= TCPS_CLOSING)
4247			(*state)->timeout = PFTM_TCP_CLOSING;
4248		else
4249			(*state)->timeout = PFTM_TCP_ESTABLISHED;
4250
4251		/* Fall through to PASS packet */
4252
4253	} else if ((dst->state < TCPS_SYN_SENT ||
4254		dst->state >= TCPS_FIN_WAIT_2 ||
4255		src->state >= TCPS_FIN_WAIT_2) &&
4256	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4257	    /* Within a window forward of the originating packet */
4258	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4259	    /* Within a window backward of the originating packet */
4260
4261		/*
4262		 * This currently handles three situations:
4263		 *  1) Stupid stacks will shotgun SYNs before their peer
4264		 *     replies.
4265		 *  2) When PF catches an already established stream (the
4266		 *     firewall rebooted, the state table was flushed, routes
4267		 *     changed...)
4268		 *  3) Packets get funky immediately after the connection
4269		 *     closes (this should catch Solaris spurious ACK|FINs
4270		 *     that web servers like to spew after a close)
4271		 *
4272		 * This must be a little more careful than the above code
4273		 * since packet floods will also be caught here. We don't
4274		 * update the TTL here to mitigate the damage of a packet
4275		 * flood and so the same code can handle awkward establishment
4276		 * and a loosened connection close.
4277		 * In the establishment case, a correct peer response will
4278		 * validate the connection, go through the normal state code
4279		 * and keep updating the state TTL.
4280		 */
4281
4282		if (pf_status.debug >= PF_DEBUG_MISC) {
4283			printf("pf: loose state match: ");
4284			pf_print_state(*state);
4285			pf_print_flags(th->th_flags);
4286			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n",
4287			    seq, ack, pd->p_len, ackskew,
4288			    (*state)->packets[0], (*state)->packets[1]);
4289		}
4290
4291		/* update max window */
4292		if (src->max_win < win)
4293			src->max_win = win;
4294		/* synchronize sequencing */
4295		if (SEQ_GT(end, src->seqlo))
4296			src->seqlo = end;
4297		/* slide the window of what the other end can send */
4298		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4299			dst->seqhi = ack + MAX((win << sws), 1);
4300
4301		/*
4302		 * Cannot set dst->seqhi here since this could be a shotgunned
4303		 * SYN and not an already established connection.
4304		 */
4305
4306		if (th->th_flags & TH_FIN)
4307			if (src->state < TCPS_CLOSING)
4308				src->state = TCPS_CLOSING;
4309		if (th->th_flags & TH_RST)
4310			src->state = dst->state = TCPS_TIME_WAIT;
4311
4312		/* Fall through to PASS packet */
4313
4314	} else {
4315		if ((*state)->dst.state == TCPS_SYN_SENT &&
4316		    (*state)->src.state == TCPS_SYN_SENT) {
4317			/* Send RST for state mismatches during handshake */
4318			if (!(th->th_flags & TH_RST)) {
4319				u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
4320
4321				if (th->th_flags & TH_SYN)
4322					ack++;
4323				if (th->th_flags & TH_FIN)
4324					ack++;
4325				pf_send_tcp((*state)->rule.ptr, pd->af,
4326				    pd->dst, pd->src, th->th_dport,
4327				    th->th_sport, ntohl(th->th_ack), ack,
4328				    TH_RST|TH_ACK, 0, 0,
4329				    (*state)->rule.ptr->return_ttl);
4330			}
4331			src->seqlo = 0;
4332			src->seqhi = 1;
4333			src->max_win = 1;
4334		} else if (pf_status.debug >= PF_DEBUG_MISC) {
4335			printf("pf: BAD state: ");
4336			pf_print_state(*state);
4337			pf_print_flags(th->th_flags);
4338			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d "
4339			    "dir=%s,%s\n", seq, ack, pd->p_len, ackskew,
4340			    (*state)->packets[0], (*state)->packets[1],
4341			    direction == PF_IN ? "in" : "out",
4342			    direction == (*state)->direction ? "fwd" : "rev");
4343			printf("pf: State failure on: %c %c %c %c | %c %c\n",
4344			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4345			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4346			    ' ': '2',
4347			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4348			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4349			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4350			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4351		}
4352		return (PF_DROP);
4353	}
4354
4355	if (dst->scrub || src->scrub) {
4356		if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4357		    src, dst, &copyback))
4358			return (PF_DROP);
4359	}
4360
4361	/* Any packets which have gotten here are to be passed */
4362
4363	/* translate source/destination address, if necessary */
4364	if (STATE_TRANSLATE(*state)) {
4365		if (direction == PF_OUT)
4366			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4367			    &th->th_sum, &(*state)->gwy.addr,
4368			    (*state)->gwy.port, 0, pd->af);
4369		else
4370			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4371			    &th->th_sum, &(*state)->lan.addr,
4372			    (*state)->lan.port, 0, pd->af);
4373		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4374	} else if (copyback) {
4375		/* Copyback sequence modulation or stateful scrub changes */
4376		m_copyback(m, off, sizeof(*th), (caddr_t)th);
4377	}
4378
4379	return (PF_PASS);
4380}
4381
4382int
4383pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4384    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4385{
4386	struct pf_state_peer	*src, *dst;
4387	struct pf_state		 key;
4388	struct udphdr		*uh = pd->hdr.udp;
4389
4390	key.af = pd->af;
4391	key.proto = IPPROTO_UDP;
4392	if (direction == PF_IN)	{
4393		PF_ACPY(&key.ext.addr, pd->src, key.af);
4394		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4395		key.ext.port = uh->uh_sport;
4396		key.gwy.port = uh->uh_dport;
4397	} else {
4398		PF_ACPY(&key.lan.addr, pd->src, key.af);
4399		PF_ACPY(&key.ext.addr, pd->dst, key.af);
4400		key.lan.port = uh->uh_sport;
4401		key.ext.port = uh->uh_dport;
4402	}
4403
4404	STATE_LOOKUP();
4405
4406	if (direction == (*state)->direction) {
4407		src = &(*state)->src;
4408		dst = &(*state)->dst;
4409	} else {
4410		src = &(*state)->dst;
4411		dst = &(*state)->src;
4412	}
4413
4414	/* update states */
4415	if (src->state < PFUDPS_SINGLE)
4416		src->state = PFUDPS_SINGLE;
4417	if (dst->state == PFUDPS_SINGLE)
4418		dst->state = PFUDPS_MULTIPLE;
4419
4420	/* update expire time */
4421#ifdef __FreeBSD__
4422	(*state)->expire = time_second;
4423#else
4424	(*state)->expire = time.tv_sec;
4425#endif
4426	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4427		(*state)->timeout = PFTM_UDP_MULTIPLE;
4428	else
4429		(*state)->timeout = PFTM_UDP_SINGLE;
4430
4431	/* translate source/destination address, if necessary */
4432	if (STATE_TRANSLATE(*state)) {
4433		if (direction == PF_OUT)
4434			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4435			    &uh->uh_sum, &(*state)->gwy.addr,
4436			    (*state)->gwy.port, 1, pd->af);
4437		else
4438			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4439			    &uh->uh_sum, &(*state)->lan.addr,
4440			    (*state)->lan.port, 1, pd->af);
4441		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4442	}
4443
4444	return (PF_PASS);
4445}
4446
4447int
4448pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4449    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4450{
4451	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4452	u_int16_t	 icmpid = 0;		/* make the compiler happy */
4453	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
4454	u_int8_t	 icmptype = 0;		/* make the compiler happy */
4455	int		 state_icmp = 0;
4456
4457	switch (pd->proto) {
4458#ifdef INET
4459	case IPPROTO_ICMP:
4460		icmptype = pd->hdr.icmp->icmp_type;
4461		icmpid = pd->hdr.icmp->icmp_id;
4462		icmpsum = &pd->hdr.icmp->icmp_cksum;
4463
4464		if (icmptype == ICMP_UNREACH ||
4465		    icmptype == ICMP_SOURCEQUENCH ||
4466		    icmptype == ICMP_REDIRECT ||
4467		    icmptype == ICMP_TIMXCEED ||
4468		    icmptype == ICMP_PARAMPROB)
4469			state_icmp++;
4470		break;
4471#endif /* INET */
4472#ifdef INET6
4473	case IPPROTO_ICMPV6:
4474		icmptype = pd->hdr.icmp6->icmp6_type;
4475		icmpid = pd->hdr.icmp6->icmp6_id;
4476		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4477
4478		if (icmptype == ICMP6_DST_UNREACH ||
4479		    icmptype == ICMP6_PACKET_TOO_BIG ||
4480		    icmptype == ICMP6_TIME_EXCEEDED ||
4481		    icmptype == ICMP6_PARAM_PROB)
4482			state_icmp++;
4483		break;
4484#endif /* INET6 */
4485	}
4486
4487	if (!state_icmp) {
4488
4489		/*
4490		 * ICMP query/reply message not related to a TCP/UDP packet.
4491		 * Search for an ICMP state.
4492		 */
4493		struct pf_state		key;
4494
4495		key.af = pd->af;
4496		key.proto = pd->proto;
4497		if (direction == PF_IN)	{
4498			PF_ACPY(&key.ext.addr, pd->src, key.af);
4499			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4500			key.ext.port = icmpid;
4501			key.gwy.port = icmpid;
4502		} else {
4503			PF_ACPY(&key.lan.addr, pd->src, key.af);
4504			PF_ACPY(&key.ext.addr, pd->dst, key.af);
4505			key.lan.port = icmpid;
4506			key.ext.port = icmpid;
4507		}
4508
4509		STATE_LOOKUP();
4510
4511#ifdef __FreeBSD__
4512		(*state)->expire = time_second;
4513#else
4514		(*state)->expire = time.tv_sec;
4515#endif
4516		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4517
4518		/* translate source/destination address, if necessary */
4519		if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) {
4520			if (direction == PF_OUT) {
4521				switch (pd->af) {
4522#ifdef INET
4523				case AF_INET:
4524					pf_change_a(&saddr->v4.s_addr,
4525					    pd->ip_sum,
4526					    (*state)->gwy.addr.v4.s_addr, 0);
4527					break;
4528#endif /* INET */
4529#ifdef INET6
4530				case AF_INET6:
4531					pf_change_a6(saddr,
4532					    &pd->hdr.icmp6->icmp6_cksum,
4533					    &(*state)->gwy.addr, 0);
4534					m_copyback(m, off,
4535					    sizeof(struct icmp6_hdr),
4536					    (caddr_t)pd->hdr.icmp6);
4537					break;
4538#endif /* INET6 */
4539				}
4540			} else {
4541				switch (pd->af) {
4542#ifdef INET
4543				case AF_INET:
4544					pf_change_a(&daddr->v4.s_addr,
4545					    pd->ip_sum,
4546					    (*state)->lan.addr.v4.s_addr, 0);
4547					break;
4548#endif /* INET */
4549#ifdef INET6
4550				case AF_INET6:
4551					pf_change_a6(daddr,
4552					    &pd->hdr.icmp6->icmp6_cksum,
4553					    &(*state)->lan.addr, 0);
4554					m_copyback(m, off,
4555					    sizeof(struct icmp6_hdr),
4556					    (caddr_t)pd->hdr.icmp6);
4557					break;
4558#endif /* INET6 */
4559				}
4560			}
4561		}
4562
4563		return (PF_PASS);
4564
4565	} else {
4566		/*
4567		 * ICMP error message in response to a TCP/UDP packet.
4568		 * Extract the inner TCP/UDP header and search for that state.
4569		 */
4570
4571		struct pf_pdesc	pd2;
4572#ifdef INET
4573		struct ip	h2;
4574#endif /* INET */
4575#ifdef INET6
4576		struct ip6_hdr	h2_6;
4577		int		terminal = 0;
4578#endif /* INET6 */
4579		int		ipoff2 = 0;	/* make the compiler happy */
4580		int		off2 = 0;	/* make the compiler happy */
4581
4582		pd2.af = pd->af;
4583		switch (pd->af) {
4584#ifdef INET
4585		case AF_INET:
4586			/* offset of h2 in mbuf chain */
4587			ipoff2 = off + ICMP_MINLEN;
4588
4589			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4590			    NULL, NULL, pd2.af)) {
4591				DPFPRINTF(PF_DEBUG_MISC,
4592				    ("pf: ICMP error message too short "
4593				    "(ip)\n"));
4594				return (PF_DROP);
4595			}
4596			/*
4597			 * ICMP error messages don't refer to non-first
4598			 * fragments
4599			 */
4600			if (h2.ip_off & htons(IP_OFFMASK))
4601				return (PF_DROP);
4602
4603			/* offset of protocol header that follows h2 */
4604			off2 = ipoff2 + (h2.ip_hl << 2);
4605
4606			pd2.proto = h2.ip_p;
4607			pd2.src = (struct pf_addr *)&h2.ip_src;
4608			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4609			pd2.ip_sum = &h2.ip_sum;
4610			break;
4611#endif /* INET */
4612#ifdef INET6
4613		case AF_INET6:
4614			ipoff2 = off + sizeof(struct icmp6_hdr);
4615
4616			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4617			    NULL, NULL, pd2.af)) {
4618				DPFPRINTF(PF_DEBUG_MISC,
4619				    ("pf: ICMP error message too short "
4620				    "(ip6)\n"));
4621				return (PF_DROP);
4622			}
4623			pd2.proto = h2_6.ip6_nxt;
4624			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4625			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4626			pd2.ip_sum = NULL;
4627			off2 = ipoff2 + sizeof(h2_6);
4628			do {
4629				switch (pd2.proto) {
4630				case IPPROTO_FRAGMENT:
4631					/*
4632					 * ICMPv6 error messages for
4633					 * non-first fragments
4634					 */
4635					return (PF_DROP);
4636				case IPPROTO_AH:
4637				case IPPROTO_HOPOPTS:
4638				case IPPROTO_ROUTING:
4639				case IPPROTO_DSTOPTS: {
4640					/* get next header and header length */
4641					struct ip6_ext opt6;
4642
4643					if (!pf_pull_hdr(m, off2, &opt6,
4644					    sizeof(opt6), NULL, NULL, pd2.af)) {
4645						DPFPRINTF(PF_DEBUG_MISC,
4646						    ("pf: ICMPv6 short opt\n"));
4647						return (PF_DROP);
4648					}
4649					if (pd2.proto == IPPROTO_AH)
4650						off2 += (opt6.ip6e_len + 2) * 4;
4651					else
4652						off2 += (opt6.ip6e_len + 1) * 8;
4653					pd2.proto = opt6.ip6e_nxt;
4654					/* goto the next header */
4655					break;
4656				}
4657				default:
4658					terminal++;
4659					break;
4660				}
4661			} while (!terminal);
4662			break;
4663#endif /* INET6 */
4664		}
4665
4666		switch (pd2.proto) {
4667		case IPPROTO_TCP: {
4668			struct tcphdr		 th;
4669			u_int32_t		 seq;
4670			struct pf_state		 key;
4671			struct pf_state_peer	*src, *dst;
4672			u_int8_t		 dws;
4673			int			 copyback = 0;
4674
4675			/*
4676			 * Only the first 8 bytes of the TCP header can be
4677			 * expected. Don't access any TCP header fields after
4678			 * th_seq, an ackskew test is not possible.
4679			 */
4680			if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) {
4681				DPFPRINTF(PF_DEBUG_MISC,
4682				    ("pf: ICMP error message too short "
4683				    "(tcp)\n"));
4684				return (PF_DROP);
4685			}
4686
4687			key.af = pd2.af;
4688			key.proto = IPPROTO_TCP;
4689			if (direction == PF_IN)	{
4690				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4691				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4692				key.ext.port = th.th_dport;
4693				key.gwy.port = th.th_sport;
4694			} else {
4695				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4696				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4697				key.lan.port = th.th_dport;
4698				key.ext.port = th.th_sport;
4699			}
4700
4701			STATE_LOOKUP();
4702
4703			if (direction == (*state)->direction) {
4704				src = &(*state)->dst;
4705				dst = &(*state)->src;
4706			} else {
4707				src = &(*state)->src;
4708				dst = &(*state)->dst;
4709			}
4710
4711			if (src->wscale && dst->wscale &&
4712			    !(th.th_flags & TH_SYN))
4713				dws = dst->wscale & PF_WSCALE_MASK;
4714			else
4715				dws = 0;
4716
4717			/* Demodulate sequence number */
4718			seq = ntohl(th.th_seq) - src->seqdiff;
4719			if (src->seqdiff) {
4720				pf_change_a(&th.th_seq, icmpsum,
4721				    htonl(seq), 0);
4722				copyback = 1;
4723			}
4724
4725			if (!SEQ_GEQ(src->seqhi, seq) ||
4726			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4727				if (pf_status.debug >= PF_DEBUG_MISC) {
4728					printf("pf: BAD ICMP %d:%d ",
4729					    icmptype, pd->hdr.icmp->icmp_code);
4730					pf_print_host(pd->src, 0, pd->af);
4731					printf(" -> ");
4732					pf_print_host(pd->dst, 0, pd->af);
4733					printf(" state: ");
4734					pf_print_state(*state);
4735					printf(" seq=%u\n", seq);
4736				}
4737				return (PF_DROP);
4738			}
4739
4740			if (STATE_TRANSLATE(*state)) {
4741				if (direction == PF_IN) {
4742					pf_change_icmp(pd2.src, &th.th_sport,
4743					    daddr, &(*state)->lan.addr,
4744					    (*state)->lan.port, NULL,
4745					    pd2.ip_sum, icmpsum,
4746					    pd->ip_sum, 0, pd2.af);
4747				} else {
4748					pf_change_icmp(pd2.dst, &th.th_dport,
4749					    saddr, &(*state)->gwy.addr,
4750					    (*state)->gwy.port, NULL,
4751					    pd2.ip_sum, icmpsum,
4752					    pd->ip_sum, 0, pd2.af);
4753				}
4754				copyback = 1;
4755			}
4756
4757			if (copyback) {
4758				switch (pd2.af) {
4759#ifdef INET
4760				case AF_INET:
4761					m_copyback(m, off, ICMP_MINLEN,
4762					    (caddr_t)pd->hdr.icmp);
4763					m_copyback(m, ipoff2, sizeof(h2),
4764					    (caddr_t)&h2);
4765					break;
4766#endif /* INET */
4767#ifdef INET6
4768				case AF_INET6:
4769					m_copyback(m, off,
4770					    sizeof(struct icmp6_hdr),
4771					    (caddr_t)pd->hdr.icmp6);
4772					m_copyback(m, ipoff2, sizeof(h2_6),
4773					    (caddr_t)&h2_6);
4774					break;
4775#endif /* INET6 */
4776				}
4777				m_copyback(m, off2, 8, (caddr_t)&th);
4778			}
4779
4780			return (PF_PASS);
4781			break;
4782		}
4783		case IPPROTO_UDP: {
4784			struct udphdr		uh;
4785			struct pf_state		key;
4786
4787			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4788			    NULL, NULL, pd2.af)) {
4789				DPFPRINTF(PF_DEBUG_MISC,
4790				    ("pf: ICMP error message too short "
4791				    "(udp)\n"));
4792				return (PF_DROP);
4793			}
4794
4795			key.af = pd2.af;
4796			key.proto = IPPROTO_UDP;
4797			if (direction == PF_IN)	{
4798				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4799				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4800				key.ext.port = uh.uh_dport;
4801				key.gwy.port = uh.uh_sport;
4802			} else {
4803				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4804				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4805				key.lan.port = uh.uh_dport;
4806				key.ext.port = uh.uh_sport;
4807			}
4808
4809			STATE_LOOKUP();
4810
4811			if (STATE_TRANSLATE(*state)) {
4812				if (direction == PF_IN) {
4813					pf_change_icmp(pd2.src, &uh.uh_sport,
4814					    daddr, &(*state)->lan.addr,
4815					    (*state)->lan.port, &uh.uh_sum,
4816					    pd2.ip_sum, icmpsum,
4817					    pd->ip_sum, 1, pd2.af);
4818				} else {
4819					pf_change_icmp(pd2.dst, &uh.uh_dport,
4820					    saddr, &(*state)->gwy.addr,
4821					    (*state)->gwy.port, &uh.uh_sum,
4822					    pd2.ip_sum, icmpsum,
4823					    pd->ip_sum, 1, pd2.af);
4824				}
4825				switch (pd2.af) {
4826#ifdef INET
4827				case AF_INET:
4828					m_copyback(m, off, ICMP_MINLEN,
4829					    (caddr_t)pd->hdr.icmp);
4830					m_copyback(m, ipoff2, sizeof(h2),
4831					    (caddr_t)&h2);
4832					break;
4833#endif /* INET */
4834#ifdef INET6
4835				case AF_INET6:
4836					m_copyback(m, off,
4837					    sizeof(struct icmp6_hdr),
4838					    (caddr_t)pd->hdr.icmp6);
4839					m_copyback(m, ipoff2, sizeof(h2_6),
4840					    (caddr_t)&h2_6);
4841					break;
4842#endif /* INET6 */
4843				}
4844				m_copyback(m, off2, sizeof(uh),
4845				    (caddr_t)&uh);
4846			}
4847
4848			return (PF_PASS);
4849			break;
4850		}
4851#ifdef INET
4852		case IPPROTO_ICMP: {
4853			struct icmp		iih;
4854			struct pf_state		key;
4855
4856			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4857			    NULL, NULL, pd2.af)) {
4858				DPFPRINTF(PF_DEBUG_MISC,
4859				    ("pf: ICMP error message too short i"
4860				    "(icmp)\n"));
4861				return (PF_DROP);
4862			}
4863
4864			key.af = pd2.af;
4865			key.proto = IPPROTO_ICMP;
4866			if (direction == PF_IN)	{
4867				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4868				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4869				key.ext.port = iih.icmp_id;
4870				key.gwy.port = iih.icmp_id;
4871			} else {
4872				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4873				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4874				key.lan.port = iih.icmp_id;
4875				key.ext.port = iih.icmp_id;
4876			}
4877
4878			STATE_LOOKUP();
4879
4880			if (STATE_TRANSLATE(*state)) {
4881				if (direction == PF_IN) {
4882					pf_change_icmp(pd2.src, &iih.icmp_id,
4883					    daddr, &(*state)->lan.addr,
4884					    (*state)->lan.port, NULL,
4885					    pd2.ip_sum, icmpsum,
4886					    pd->ip_sum, 0, AF_INET);
4887				} else {
4888					pf_change_icmp(pd2.dst, &iih.icmp_id,
4889					    saddr, &(*state)->gwy.addr,
4890					    (*state)->gwy.port, NULL,
4891					    pd2.ip_sum, icmpsum,
4892					    pd->ip_sum, 0, AF_INET);
4893				}
4894				m_copyback(m, off, ICMP_MINLEN,
4895				    (caddr_t)pd->hdr.icmp);
4896				m_copyback(m, ipoff2, sizeof(h2),
4897				    (caddr_t)&h2);
4898				m_copyback(m, off2, ICMP_MINLEN,
4899				    (caddr_t)&iih);
4900			}
4901
4902			return (PF_PASS);
4903			break;
4904		}
4905#endif /* INET */
4906#ifdef INET6
4907		case IPPROTO_ICMPV6: {
4908			struct icmp6_hdr	iih;
4909			struct pf_state		key;
4910
4911			if (!pf_pull_hdr(m, off2, &iih,
4912			    sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) {
4913				DPFPRINTF(PF_DEBUG_MISC,
4914				    ("pf: ICMP error message too short "
4915				    "(icmp6)\n"));
4916				return (PF_DROP);
4917			}
4918
4919			key.af = pd2.af;
4920			key.proto = IPPROTO_ICMPV6;
4921			if (direction == PF_IN)	{
4922				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4923				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4924				key.ext.port = iih.icmp6_id;
4925				key.gwy.port = iih.icmp6_id;
4926			} else {
4927				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4928				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4929				key.lan.port = iih.icmp6_id;
4930				key.ext.port = iih.icmp6_id;
4931			}
4932
4933			STATE_LOOKUP();
4934
4935			if (STATE_TRANSLATE(*state)) {
4936				if (direction == PF_IN) {
4937					pf_change_icmp(pd2.src, &iih.icmp6_id,
4938					    daddr, &(*state)->lan.addr,
4939					    (*state)->lan.port, NULL,
4940					    pd2.ip_sum, icmpsum,
4941					    pd->ip_sum, 0, AF_INET6);
4942				} else {
4943					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4944					    saddr, &(*state)->gwy.addr,
4945					    (*state)->gwy.port, NULL,
4946					    pd2.ip_sum, icmpsum,
4947					    pd->ip_sum, 0, AF_INET6);
4948				}
4949				m_copyback(m, off, sizeof(struct icmp6_hdr),
4950				    (caddr_t)pd->hdr.icmp6);
4951				m_copyback(m, ipoff2, sizeof(h2_6),
4952				    (caddr_t)&h2_6);
4953				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4954				    (caddr_t)&iih);
4955			}
4956
4957			return (PF_PASS);
4958			break;
4959		}
4960#endif /* INET6 */
4961		default: {
4962			struct pf_state		key;
4963
4964			key.af = pd2.af;
4965			key.proto = pd2.proto;
4966			if (direction == PF_IN)	{
4967				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4968				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4969				key.ext.port = 0;
4970				key.gwy.port = 0;
4971			} else {
4972				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4973				PF_ACPY(&key.ext.addr, pd2.src, key.af);
4974				key.lan.port = 0;
4975				key.ext.port = 0;
4976			}
4977
4978			STATE_LOOKUP();
4979
4980			if (STATE_TRANSLATE(*state)) {
4981				if (direction == PF_IN) {
4982					pf_change_icmp(pd2.src, NULL,
4983					    daddr, &(*state)->lan.addr,
4984					    0, NULL,
4985					    pd2.ip_sum, icmpsum,
4986					    pd->ip_sum, 0, pd2.af);
4987				} else {
4988					pf_change_icmp(pd2.dst, NULL,
4989					    saddr, &(*state)->gwy.addr,
4990					    0, NULL,
4991					    pd2.ip_sum, icmpsum,
4992					    pd->ip_sum, 0, pd2.af);
4993				}
4994				switch (pd2.af) {
4995#ifdef INET
4996				case AF_INET:
4997					m_copyback(m, off, ICMP_MINLEN,
4998					    (caddr_t)pd->hdr.icmp);
4999					m_copyback(m, ipoff2, sizeof(h2),
5000					    (caddr_t)&h2);
5001					break;
5002#endif /* INET */
5003#ifdef INET6
5004				case AF_INET6:
5005					m_copyback(m, off,
5006					    sizeof(struct icmp6_hdr),
5007					    (caddr_t)pd->hdr.icmp6);
5008					m_copyback(m, ipoff2, sizeof(h2_6),
5009					    (caddr_t)&h2_6);
5010					break;
5011#endif /* INET6 */
5012				}
5013			}
5014
5015			return (PF_PASS);
5016			break;
5017		}
5018		}
5019	}
5020}
5021
5022int
5023pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5024    struct pf_pdesc *pd)
5025{
5026	struct pf_state_peer	*src, *dst;
5027	struct pf_state		 key;
5028
5029	key.af = pd->af;
5030	key.proto = pd->proto;
5031	if (direction == PF_IN)	{
5032		PF_ACPY(&key.ext.addr, pd->src, key.af);
5033		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
5034		key.ext.port = 0;
5035		key.gwy.port = 0;
5036	} else {
5037		PF_ACPY(&key.lan.addr, pd->src, key.af);
5038		PF_ACPY(&key.ext.addr, pd->dst, key.af);
5039		key.lan.port = 0;
5040		key.ext.port = 0;
5041	}
5042
5043	STATE_LOOKUP();
5044
5045	if (direction == (*state)->direction) {
5046		src = &(*state)->src;
5047		dst = &(*state)->dst;
5048	} else {
5049		src = &(*state)->dst;
5050		dst = &(*state)->src;
5051	}
5052
5053	/* update states */
5054	if (src->state < PFOTHERS_SINGLE)
5055		src->state = PFOTHERS_SINGLE;
5056	if (dst->state == PFOTHERS_SINGLE)
5057		dst->state = PFOTHERS_MULTIPLE;
5058
5059	/* update expire time */
5060#ifdef __FreeBSD__
5061	(*state)->expire = time_second;
5062#else
5063	(*state)->expire = time.tv_sec;
5064#endif
5065	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5066		(*state)->timeout = PFTM_OTHER_MULTIPLE;
5067	else
5068		(*state)->timeout = PFTM_OTHER_SINGLE;
5069
5070	/* translate source/destination address, if necessary */
5071	if (STATE_TRANSLATE(*state)) {
5072		if (direction == PF_OUT)
5073			switch (pd->af) {
5074#ifdef INET
5075			case AF_INET:
5076				pf_change_a(&pd->src->v4.s_addr,
5077				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
5078				    0);
5079				break;
5080#endif /* INET */
5081#ifdef INET6
5082			case AF_INET6:
5083				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
5084				break;
5085#endif /* INET6 */
5086			}
5087		else
5088			switch (pd->af) {
5089#ifdef INET
5090			case AF_INET:
5091				pf_change_a(&pd->dst->v4.s_addr,
5092				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
5093				    0);
5094				break;
5095#endif /* INET */
5096#ifdef INET6
5097			case AF_INET6:
5098				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
5099				break;
5100#endif /* INET6 */
5101			}
5102	}
5103
5104	return (PF_PASS);
5105}
5106
5107/*
5108 * ipoff and off are measured from the start of the mbuf chain.
5109 * h must be at "ipoff" on the mbuf chain.
5110 */
5111void *
5112pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5113    u_short *actionp, u_short *reasonp, sa_family_t af)
5114{
5115	switch (af) {
5116#ifdef INET
5117	case AF_INET: {
5118		struct ip	*h = mtod(m, struct ip *);
5119		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5120
5121		if (fragoff) {
5122			if (fragoff >= len)
5123				ACTION_SET(actionp, PF_PASS);
5124			else {
5125				ACTION_SET(actionp, PF_DROP);
5126				REASON_SET(reasonp, PFRES_FRAG);
5127			}
5128			return (NULL);
5129		}
5130		if (m->m_pkthdr.len < off + len ||
5131		    ntohs(h->ip_len) < off + len) {
5132			ACTION_SET(actionp, PF_DROP);
5133			REASON_SET(reasonp, PFRES_SHORT);
5134			return (NULL);
5135		}
5136		break;
5137	}
5138#endif /* INET */
5139#ifdef INET6
5140	case AF_INET6: {
5141		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
5142
5143		if (m->m_pkthdr.len < off + len ||
5144		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5145		    (unsigned)(off + len)) {
5146			ACTION_SET(actionp, PF_DROP);
5147			REASON_SET(reasonp, PFRES_SHORT);
5148			return (NULL);
5149		}
5150		break;
5151	}
5152#endif /* INET6 */
5153	}
5154	m_copydata(m, off, len, p);
5155	return (p);
5156}
5157
5158int
5159pf_routable(struct pf_addr *addr, sa_family_t af)
5160{
5161	struct sockaddr_in	*dst;
5162	struct route		 ro;
5163	int			 ret = 0;
5164
5165	bzero(&ro, sizeof(ro));
5166	dst = satosin(&ro.ro_dst);
5167	dst->sin_family = af;
5168	dst->sin_len = sizeof(*dst);
5169	dst->sin_addr = addr->v4;
5170#ifdef __FreeBSD__
5171#ifdef RTF_PRCLONING
5172	rtalloc_ign(&ro, (RTF_CLONING|RTF_PRCLONING));
5173#else /* !RTF_PRCLONING */
5174	rtalloc_ign(&ro, RTF_CLONING);
5175#endif
5176#else /* ! __FreeBSD__ */
5177	rtalloc_noclone(&ro, NO_CLONING);
5178#endif
5179
5180	if (ro.ro_rt != NULL) {
5181		ret = 1;
5182		RTFREE(ro.ro_rt);
5183	}
5184
5185	return (ret);
5186}
5187
5188#ifdef INET
5189
5190void
5191pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5192    struct pf_state *s)
5193{
5194	struct mbuf		*m0, *m1;
5195	struct m_tag		*mtag;
5196	struct route		 iproute;
5197	struct route		*ro = NULL;	/* XXX: was uninitialized */
5198	struct sockaddr_in	*dst;
5199	struct ip		*ip;
5200	struct ifnet		*ifp = NULL;
5201	struct pf_addr		 naddr;
5202	struct pf_src_node	*sn = NULL;
5203	int			 error = 0;
5204#ifdef __FreeBSD__
5205	int sw_csum;
5206#endif
5207
5208	if (m == NULL || *m == NULL || r == NULL ||
5209	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5210		panic("pf_route: invalid parameters");
5211
5212	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5213		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5214		    NULL) {
5215			m0 = *m;
5216			*m = NULL;
5217			goto bad;
5218		}
5219		*(char *)(mtag + 1) = 1;
5220		m_tag_prepend(*m, mtag);
5221	} else {
5222		if (*(char *)(mtag + 1) > 3) {
5223			m0 = *m;
5224			*m = NULL;
5225			goto bad;
5226		}
5227		(*(char *)(mtag + 1))++;
5228	}
5229
5230	if (r->rt == PF_DUPTO) {
5231#ifdef __FreeBSD__
5232		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5233#else
5234		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5235#endif
5236			return;
5237#ifdef __FreeBSD__
5238		if ((mtag = m_tag_copy(mtag, M_DONTWAIT)) == NULL)
5239#else
5240		if ((mtag = m_tag_copy(mtag)) == NULL)
5241#endif
5242			goto bad;
5243		m_tag_prepend(m0, mtag);
5244	} else {
5245		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5246			return;
5247		m0 = *m;
5248	}
5249
5250	if (m0->m_len < sizeof(struct ip))
5251		panic("pf_route: m0->m_len < sizeof(struct ip)");
5252	ip = mtod(m0, struct ip *);
5253
5254	ro = &iproute;
5255	bzero((caddr_t)ro, sizeof(*ro));
5256	dst = satosin(&ro->ro_dst);
5257	dst->sin_family = AF_INET;
5258	dst->sin_len = sizeof(*dst);
5259	dst->sin_addr = ip->ip_dst;
5260
5261	if (r->rt == PF_FASTROUTE) {
5262		rtalloc(ro);
5263		if (ro->ro_rt == 0) {
5264			ipstat.ips_noroute++;
5265			goto bad;
5266		}
5267
5268		ifp = ro->ro_rt->rt_ifp;
5269		ro->ro_rt->rt_use++;
5270
5271		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5272			dst = satosin(ro->ro_rt->rt_gateway);
5273	} else {
5274		if (TAILQ_EMPTY(&r->rpool.list))
5275			panic("pf_route: TAILQ_EMPTY(&r->rpool.list)");
5276		if (s == NULL) {
5277			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5278			    &naddr, NULL, &sn);
5279			if (!PF_AZERO(&naddr, AF_INET))
5280				dst->sin_addr.s_addr = naddr.v4.s_addr;
5281			ifp = r->rpool.cur->kif ?
5282			    r->rpool.cur->kif->pfik_ifp : NULL;
5283		} else {
5284			if (!PF_AZERO(&s->rt_addr, AF_INET))
5285				dst->sin_addr.s_addr =
5286				    s->rt_addr.v4.s_addr;
5287			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5288		}
5289	}
5290	if (ifp == NULL)
5291		goto bad;
5292
5293	if (oifp != ifp) {
5294#ifdef __FreeBSD__
5295		PF_UNLOCK();
5296		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) {
5297			PF_LOCK();
5298			goto bad;
5299		} else if (m0 == NULL) {
5300			PF_LOCK();
5301			goto done;
5302		}
5303		PF_LOCK();
5304#else
5305		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS)
5306			goto bad;
5307		else if (m0 == NULL)
5308			goto done;
5309#endif
5310		if (m0->m_len < sizeof(struct ip))
5311			panic("pf_route: m0->m_len < sizeof(struct ip)");
5312		ip = mtod(m0, struct ip *);
5313	}
5314
5315#ifdef __FreeBSD__
5316	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5317	m0->m_pkthdr.csum_flags |= CSUM_IP;
5318	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5319	if (sw_csum & CSUM_DELAY_DATA) {
5320		/*
5321		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5322		 */
5323		NTOHS(ip->ip_len);
5324		NTOHS(ip->ip_off);	 /* XXX: needed? */
5325		in_delayed_cksum(m0);
5326		HTONS(ip->ip_len);
5327		HTONS(ip->ip_off);
5328		sw_csum &= ~CSUM_DELAY_DATA;
5329	}
5330	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5331
5332	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5333	    (ifp->if_hwassist & CSUM_FRAGMENT &&
5334		((ip->ip_off & htons(IP_DF)) == 0))) {
5335		/*
5336		 * ip->ip_len = htons(ip->ip_len);
5337		 * ip->ip_off = htons(ip->ip_off);
5338		 */
5339		ip->ip_sum = 0;
5340		if (sw_csum & CSUM_DELAY_IP) {
5341			/* From KAME */
5342			if (ip->ip_v == IPVERSION &&
5343			    (ip->ip_hl << 2) == sizeof(*ip)) {
5344				ip->ip_sum = in_cksum_hdr(ip);
5345			} else {
5346				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5347			}
5348		}
5349		PF_UNLOCK();
5350		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5351		PF_LOCK();
5352		goto done;
5353	}
5354
5355#else
5356	/* Copied from ip_output. */
5357#ifdef IPSEC
5358	/*
5359	 * If deferred crypto processing is needed, check that the
5360	 * interface supports it.
5361	 */
5362	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
5363	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
5364		/* Notify IPsec to do its own crypto. */
5365		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
5366		goto bad;
5367	}
5368#endif /* IPSEC */
5369
5370	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
5371	if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
5372		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
5373		    ifp->if_bridge != NULL) {
5374			in_delayed_cksum(m0);
5375			m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
5376		}
5377	} else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
5378		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
5379		    ifp->if_bridge != NULL) {
5380			in_delayed_cksum(m0);
5381			m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
5382		}
5383	}
5384
5385	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5386		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5387		    ifp->if_bridge == NULL) {
5388			m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
5389			ipstat.ips_outhwcsum++;
5390		} else {
5391			ip->ip_sum = 0;
5392			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5393		}
5394		/* Update relevant hardware checksum stats for TCP/UDP */
5395		if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
5396			tcpstat.tcps_outhwcsum++;
5397		else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
5398			udpstat.udps_outhwcsum++;
5399		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
5400		goto done;
5401	}
5402#endif
5403	/*
5404	 * Too large for interface; fragment if possible.
5405	 * Must be able to put at least 8 bytes per fragment.
5406	 */
5407	if (ip->ip_off & htons(IP_DF)) {
5408		ipstat.ips_cantfrag++;
5409		if (r->rt != PF_DUPTO) {
5410#ifdef __FreeBSD__
5411			/* icmp_error() expects host byte ordering */
5412			NTOHS(ip->ip_len);
5413			NTOHS(ip->ip_off);
5414			PF_UNLOCK();
5415#endif
5416			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5417			    ifp);
5418#ifdef __FreeBSD__
5419			PF_LOCK();
5420#endif
5421			goto done;
5422		} else
5423			goto bad;
5424	}
5425
5426	m1 = m0;
5427#ifdef __FreeBSD__
5428	/*
5429	 * XXX: is cheaper + less error prone than own function
5430	 */
5431	NTOHS(ip->ip_len);
5432	NTOHS(ip->ip_off);
5433	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5434#else
5435	error = ip_fragment(m0, ifp, ifp->if_mtu);
5436#endif
5437	if (error) {
5438#ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
5439		m0 = NULL;
5440#endif
5441		goto bad;
5442	}
5443
5444	for (m0 = m1; m0; m0 = m1) {
5445		m1 = m0->m_nextpkt;
5446		m0->m_nextpkt = 0;
5447#ifdef __FreeBSD__
5448		if (error == 0) {
5449			PF_UNLOCK();
5450			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5451			    NULL);
5452			PF_LOCK();
5453		} else
5454#else
5455		if (error == 0)
5456			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5457			    NULL);
5458		else
5459#endif
5460			m_freem(m0);
5461	}
5462
5463	if (error == 0)
5464		ipstat.ips_fragmented++;
5465
5466done:
5467	if (r->rt != PF_DUPTO)
5468		*m = NULL;
5469	if (ro == &iproute && ro->ro_rt)
5470		RTFREE(ro->ro_rt);
5471	return;
5472
5473bad:
5474	m_freem(m0);
5475	goto done;
5476}
5477#endif /* INET */
5478
5479#ifdef INET6
5480void
5481pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5482    struct pf_state *s)
5483{
5484	struct mbuf		*m0;
5485	struct m_tag		*mtag;
5486	struct route_in6	 ip6route;
5487	struct route_in6	*ro;
5488	struct sockaddr_in6	*dst;
5489	struct ip6_hdr		*ip6;
5490	struct ifnet		*ifp = NULL;
5491	struct pf_addr		 naddr;
5492	struct pf_src_node	*sn = NULL;
5493	int			 error = 0;
5494
5495	if (m == NULL || *m == NULL || r == NULL ||
5496	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5497		panic("pf_route6: invalid parameters");
5498
5499	if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) {
5500		if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) ==
5501		    NULL) {
5502			m0 = *m;
5503			*m = NULL;
5504			goto bad;
5505		}
5506		*(char *)(mtag + 1) = 1;
5507		m_tag_prepend(*m, mtag);
5508	} else {
5509		if (*(char *)(mtag + 1) > 3) {
5510			m0 = *m;
5511			*m = NULL;
5512			goto bad;
5513		}
5514		(*(char *)(mtag + 1))++;
5515	}
5516
5517	if (r->rt == PF_DUPTO) {
5518#ifdef __FreeBSD__
5519		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
5520#else
5521		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
5522#endif
5523			return;
5524#ifdef __FreeBSD__
5525		if ((mtag = m_tag_copy(mtag, M_DONTWAIT)) == NULL)
5526#else
5527		if ((mtag = m_tag_copy(mtag)) == NULL)
5528#endif
5529			goto bad;
5530		m_tag_prepend(m0, mtag);
5531	} else {
5532		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5533			return;
5534		m0 = *m;
5535	}
5536
5537	if (m0->m_len < sizeof(struct ip6_hdr))
5538		panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5539	ip6 = mtod(m0, struct ip6_hdr *);
5540
5541	ro = &ip6route;
5542	bzero((caddr_t)ro, sizeof(*ro));
5543	dst = (struct sockaddr_in6 *)&ro->ro_dst;
5544	dst->sin6_family = AF_INET6;
5545	dst->sin6_len = sizeof(*dst);
5546	dst->sin6_addr = ip6->ip6_dst;
5547
5548	/* Cheat. */
5549	if (r->rt == PF_FASTROUTE) {
5550#ifdef __FreeBSD__
5551		m0->m_flags |= M_SKIP_FIREWALL;
5552		PF_UNLOCK();
5553		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5554		PF_LOCK();
5555#else
5556		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
5557		if (mtag == NULL)
5558			goto bad;
5559		m_tag_prepend(m0, mtag);
5560		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
5561#endif
5562		return;
5563	}
5564
5565	if (TAILQ_EMPTY(&r->rpool.list))
5566		panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)");
5567	if (s == NULL) {
5568		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5569		    &naddr, NULL, &sn);
5570		if (!PF_AZERO(&naddr, AF_INET6))
5571			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5572			    &naddr, AF_INET6);
5573		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5574	} else {
5575		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5576			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5577			    &s->rt_addr, AF_INET6);
5578		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5579	}
5580	if (ifp == NULL)
5581		goto bad;
5582
5583	if (oifp != ifp) {
5584#ifdef __FreeBSD__
5585		PF_UNLOCK();
5586		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) {
5587			PF_LOCK();
5588			goto bad;
5589		} else if (m0 == NULL) {
5590			PF_LOCK();
5591			goto done;
5592		}
5593		PF_LOCK();
5594#else
5595		if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS)
5596			goto bad;
5597		else if (m0 == NULL)
5598			goto done;
5599#endif
5600		if (m0->m_len < sizeof(struct ip6_hdr))
5601			panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5602		ip6 = mtod(m0, struct ip6_hdr *);
5603	}
5604
5605	/*
5606	 * If the packet is too large for the outgoing interface,
5607	 * send back an icmp6 error.
5608	 */
5609	if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5610		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5611	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5612#ifdef __FreeBSD__
5613		PF_UNLOCK();
5614#endif
5615		error = nd6_output(ifp, ifp, m0, dst, NULL);
5616#ifdef __FreeBSD__
5617		PF_LOCK();
5618#endif
5619	} else {
5620		in6_ifstat_inc(ifp, ifs6_in_toobig);
5621#ifdef __FreeBSD__
5622		if (r->rt != PF_DUPTO) {
5623			PF_UNLOCK();
5624			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5625			PF_LOCK();
5626		 } else
5627#else
5628		if (r->rt != PF_DUPTO)
5629			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5630		else
5631#endif
5632			goto bad;
5633	}
5634
5635done:
5636	if (r->rt != PF_DUPTO)
5637		*m = NULL;
5638	return;
5639
5640bad:
5641	m_freem(m0);
5642	goto done;
5643}
5644#endif /* INET6 */
5645
5646
5647#ifdef __FreeBSD__
5648/*
5649 * FreeBSD supports cksum offloads for the following drivers.
5650 *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
5651 *   ti(4), txp(4), xl(4)
5652 *
5653 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
5654 *  network driver performed cksum including pseudo header, need to verify
5655 *   csum_data
5656 * CSUM_DATA_VALID :
5657 *  network driver performed cksum, needs to additional pseudo header
5658 *  cksum computation with partial csum_data(i.e. lack of H/W support for
5659 *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
5660 *
5661 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
5662 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
5663 * TCP/UDP layer.
5664 * Also, set csum_data to 0xffff to force cksum validation.
5665 */
5666int
5667pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5668{
5669	u_int16_t sum = 0;
5670	int hw_assist = 0;
5671	struct ip *ip;
5672
5673	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5674		return (1);
5675	if (m->m_pkthdr.len < off + len)
5676		return (1);
5677
5678	switch (p) {
5679	case IPPROTO_TCP:
5680		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5681			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5682				sum = m->m_pkthdr.csum_data;
5683			} else {
5684				ip = mtod(m, struct ip *);
5685				sum = in_pseudo(ip->ip_src.s_addr,
5686					ip->ip_dst.s_addr, htonl((u_short)len +
5687					m->m_pkthdr.csum_data + IPPROTO_TCP));
5688			}
5689			sum ^= 0xffff;
5690			++hw_assist;
5691		}
5692		break;
5693	case IPPROTO_UDP:
5694		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5695			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5696				sum = m->m_pkthdr.csum_data;
5697			} else {
5698				ip = mtod(m, struct ip *);
5699				sum = in_pseudo(ip->ip_src.s_addr,
5700					ip->ip_dst.s_addr, htonl((u_short)len +
5701					m->m_pkthdr.csum_data + IPPROTO_UDP));
5702			}
5703			sum ^= 0xffff;
5704			++hw_assist;
5705                }
5706		break;
5707	case IPPROTO_ICMP:
5708#ifdef INET6
5709	case IPPROTO_ICMPV6:
5710#endif /* INET6 */
5711		break;
5712	default:
5713		return (1);
5714	}
5715
5716	if (!hw_assist) {
5717		switch (af) {
5718		case AF_INET:
5719			if (p == IPPROTO_ICMP) {
5720				if (m->m_len < off)
5721					return (1);
5722				m->m_data += off;
5723				m->m_len -= off;
5724				sum = in_cksum(m, len);
5725				m->m_data -= off;
5726				m->m_len += off;
5727			} else {
5728				if (m->m_len < sizeof(struct ip))
5729					return (1);
5730				sum = in4_cksum(m, p, off, len);
5731			}
5732			break;
5733#ifdef INET6
5734		case AF_INET6:
5735			if (m->m_len < sizeof(struct ip6_hdr))
5736				return (1);
5737			sum = in6_cksum(m, p, off, len);
5738			break;
5739#endif /* INET6 */
5740		default:
5741			return (1);
5742		}
5743	}
5744	if (sum) {
5745		switch (p) {
5746		case IPPROTO_TCP:
5747			tcpstat.tcps_rcvbadsum++;
5748			break;
5749		case IPPROTO_UDP:
5750			udpstat.udps_badsum++;
5751			break;
5752		case IPPROTO_ICMP:
5753			icmpstat.icps_checksum++;
5754			break;
5755#ifdef INET6
5756		case IPPROTO_ICMPV6:
5757			icmp6stat.icp6s_checksum++;
5758			break;
5759#endif /* INET6 */
5760		}
5761		return (1);
5762	} else {
5763		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
5764			m->m_pkthdr.csum_flags |=
5765			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5766			m->m_pkthdr.csum_data = 0xffff;
5767		}
5768	}
5769	return (0);
5770}
5771#else
5772/*
5773 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5774 *   off is the offset where the protocol header starts
5775 *   len is the total length of protocol header plus payload
5776 * returns 0 when the checksum is valid, otherwise returns 1.
5777 */
5778int
5779pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
5780    sa_family_t af)
5781{
5782	u_int16_t flag_ok, flag_bad;
5783	u_int16_t sum;
5784
5785	switch (p) {
5786	case IPPROTO_TCP:
5787		flag_ok = M_TCP_CSUM_IN_OK;
5788		flag_bad = M_TCP_CSUM_IN_BAD;
5789		break;
5790	case IPPROTO_UDP:
5791		flag_ok = M_UDP_CSUM_IN_OK;
5792		flag_bad = M_UDP_CSUM_IN_BAD;
5793		break;
5794	case IPPROTO_ICMP:
5795#ifdef INET6
5796	case IPPROTO_ICMPV6:
5797#endif /* INET6 */
5798		flag_ok = flag_bad = 0;
5799		break;
5800	default:
5801		return (1);
5802	}
5803	if (m->m_pkthdr.csum & flag_ok)
5804		return (0);
5805	if (m->m_pkthdr.csum & flag_bad)
5806		return (1);
5807	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5808		return (1);
5809	if (m->m_pkthdr.len < off + len)
5810		return (1);
5811		switch (af) {
5812	case AF_INET:
5813		if (p == IPPROTO_ICMP) {
5814			if (m->m_len < off)
5815				return (1);
5816			m->m_data += off;
5817			m->m_len -= off;
5818			sum = in_cksum(m, len);
5819			m->m_data -= off;
5820			m->m_len += off;
5821		} else {
5822			if (m->m_len < sizeof(struct ip))
5823				return (1);
5824			sum = in4_cksum(m, p, off, len);
5825		}
5826		break;
5827#ifdef INET6
5828	case AF_INET6:
5829		if (m->m_len < sizeof(struct ip6_hdr))
5830			return (1);
5831		sum = in6_cksum(m, p, off, len);
5832		break;
5833#endif /* INET6 */
5834	default:
5835		return (1);
5836	}
5837	if (sum) {
5838		m->m_pkthdr.csum |= flag_bad;
5839		switch (p) {
5840		case IPPROTO_TCP:
5841			tcpstat.tcps_rcvbadsum++;
5842			break;
5843		case IPPROTO_UDP:
5844			udpstat.udps_badsum++;
5845			break;
5846		case IPPROTO_ICMP:
5847			icmpstat.icps_checksum++;
5848			break;
5849#ifdef INET6
5850		case IPPROTO_ICMPV6:
5851			icmp6stat.icp6s_checksum++;
5852			break;
5853#endif /* INET6 */
5854		}
5855		return (1);
5856	}
5857	m->m_pkthdr.csum |= flag_ok;
5858	return (0);
5859}
5860#endif
5861
5862static int
5863pf_add_mbuf_tag(struct mbuf *m, u_int tag)
5864{
5865	struct m_tag *mtag;
5866
5867	if (m_tag_find(m, tag, NULL) != NULL)
5868		return (0);
5869	mtag = m_tag_get(tag, 0, M_NOWAIT);
5870	if (mtag == NULL)
5871		return (1);
5872	m_tag_prepend(m, mtag);
5873	return (0);
5874}
5875
5876#ifdef INET
5877int
5878#ifdef __FreeBSD__
5879pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
5880#else
5881pf_test(int dir, struct ifnet *ifp, struct mbuf **m0)
5882#endif
5883{
5884	struct pfi_kif		*kif;
5885	u_short			 action, reason = 0, log = 0;
5886	struct mbuf		*m = *m0;
5887	struct ip		*h = NULL;	/* make the compiler happy */
5888	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
5889	struct pf_state		*s = NULL;
5890	struct pf_ruleset	*ruleset = NULL;
5891	struct pf_pdesc		 pd;
5892	int			 off, dirndx, pqid = 0;
5893
5894#ifdef __FreeBSD__
5895	PF_LOCK();
5896#endif
5897	if (!pf_status.running ||
5898#ifdef __FreeBSD__
5899	    (m->m_flags & M_SKIP_FIREWALL)) {
5900		PF_UNLOCK();
5901#else
5902	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5903#endif
5904	    	return (PF_PASS);
5905	}
5906
5907	kif = pfi_index2kif[ifp->if_index];
5908	if (kif == NULL) {
5909#ifdef __FreeBSD__
5910		PF_UNLOCK();
5911#endif
5912		return (PF_DROP);
5913	}
5914
5915#ifdef __FreeBSD__
5916	M_ASSERTPKTHDR(m);
5917#else
5918#ifdef DIAGNOSTIC
5919	if ((m->m_flags & M_PKTHDR) == 0)
5920		panic("non-M_PKTHDR is passed to pf_test");
5921#endif
5922#endif
5923
5924	memset(&pd, 0, sizeof(pd));
5925	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5926		action = PF_DROP;
5927		REASON_SET(&reason, PFRES_SHORT);
5928		log = 1;
5929		goto done;
5930	}
5931
5932	/* We do IP header normalization and packet reassembly here */
5933	if (pf_normalize_ip(m0, dir, kif, &reason) != PF_PASS) {
5934		action = PF_DROP;
5935		goto done;
5936	}
5937	m = *m0;
5938	h = mtod(m, struct ip *);
5939
5940	off = h->ip_hl << 2;
5941	if (off < (int)sizeof(*h)) {
5942		action = PF_DROP;
5943		REASON_SET(&reason, PFRES_SHORT);
5944		log = 1;
5945		goto done;
5946	}
5947
5948	pd.src = (struct pf_addr *)&h->ip_src;
5949	pd.dst = (struct pf_addr *)&h->ip_dst;
5950	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
5951	pd.ip_sum = &h->ip_sum;
5952	pd.proto = h->ip_p;
5953	pd.af = AF_INET;
5954	pd.tos = h->ip_tos;
5955	pd.tot_len = ntohs(h->ip_len);
5956
5957	/* handle fragments that didn't get reassembled by normalization */
5958	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5959		action = pf_test_fragment(&r, dir, kif, m, h,
5960		    &pd, &a, &ruleset);
5961		goto done;
5962	}
5963
5964	switch (h->ip_p) {
5965
5966	case IPPROTO_TCP: {
5967		struct tcphdr	th;
5968
5969		pd.hdr.tcp = &th;
5970		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5971		    &action, &reason, AF_INET)) {
5972			log = action != PF_PASS;
5973			goto done;
5974		}
5975		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5976		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
5977			action = PF_DROP;
5978			goto done;
5979		}
5980		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5981		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5982			pqid = 1;
5983		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5984		if (action == PF_DROP)
5985			goto done;
5986		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5987		    &reason);
5988		if (action == PF_PASS) {
5989#if NPFSYNC
5990			pfsync_update_state(s);
5991#endif
5992			r = s->rule.ptr;
5993			a = s->anchor.ptr;
5994			log = s->log;
5995		} else if (s == NULL)
5996#ifdef __FreeBSD__
5997			action = pf_test_tcp(&r, &s, dir, kif,
5998			    m, off, h, &pd, &a, &ruleset, inp);
5999#else
6000			action = pf_test_tcp(&r, &s, dir, kif,
6001			    m, off, h, &pd, &a, &ruleset);
6002#endif
6003		break;
6004	}
6005
6006	case IPPROTO_UDP: {
6007		struct udphdr	uh;
6008
6009		pd.hdr.udp = &uh;
6010		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6011		    &action, &reason, AF_INET)) {
6012			log = action != PF_PASS;
6013			goto done;
6014		}
6015		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6016		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
6017			action = PF_DROP;
6018			goto done;
6019		}
6020		if (uh.uh_dport == 0 ||
6021		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6022		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6023			action = PF_DROP;
6024			goto done;
6025		}
6026		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6027		if (action == PF_PASS) {
6028#if NPFSYNC
6029			pfsync_update_state(s);
6030#endif
6031			r = s->rule.ptr;
6032			a = s->anchor.ptr;
6033			log = s->log;
6034		} else if (s == NULL)
6035#ifdef __FreeBSD__
6036			action = pf_test_udp(&r, &s, dir, kif,
6037			    m, off, h, &pd, &a, &ruleset, inp);
6038#else
6039			action = pf_test_udp(&r, &s, dir, kif,
6040			    m, off, h, &pd, &a, &ruleset);
6041#endif
6042		break;
6043	}
6044
6045	case IPPROTO_ICMP: {
6046		struct icmp	ih;
6047
6048		pd.hdr.icmp = &ih;
6049		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
6050		    &action, &reason, AF_INET)) {
6051			log = action != PF_PASS;
6052			goto done;
6053		}
6054		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6055		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
6056			action = PF_DROP;
6057			goto done;
6058		}
6059		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd);
6060		if (action == PF_PASS) {
6061#if NPFSYNC
6062			pfsync_update_state(s);
6063#endif
6064			r = s->rule.ptr;
6065			a = s->anchor.ptr;
6066			log = s->log;
6067		} else if (s == NULL)
6068			action = pf_test_icmp(&r, &s, dir, kif,
6069			    m, off, h, &pd, &a, &ruleset);
6070		break;
6071	}
6072
6073	default:
6074		action = pf_test_state_other(&s, dir, kif, &pd);
6075		if (action == PF_PASS) {
6076#if NPFSYNC
6077			pfsync_update_state(s);
6078#endif
6079			r = s->rule.ptr;
6080			a = s->anchor.ptr;
6081			log = s->log;
6082		} else if (s == NULL)
6083			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6084			    &pd, &a, &ruleset);
6085		break;
6086	}
6087
6088done:
6089	if (action == PF_PASS && h->ip_hl > 5 &&
6090	    !((s && s->allow_opts) || r->allow_opts)) {
6091		action = PF_DROP;
6092		REASON_SET(&reason, PFRES_SHORT);
6093		log = 1;
6094		DPFPRINTF(PF_DEBUG_MISC,
6095		    ("pf: dropping packet with ip options\n"));
6096	}
6097
6098#ifdef ALTQ
6099	if (action == PF_PASS && r->qid) {
6100		struct m_tag	*mtag;
6101		struct altq_tag	*atag;
6102
6103		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6104		if (mtag != NULL) {
6105			atag = (struct altq_tag *)(mtag + 1);
6106			if (pqid || pd.tos == IPTOS_LOWDELAY)
6107				atag->qid = r->pqid;
6108			else
6109				atag->qid = r->qid;
6110			/* add hints for ecn */
6111			atag->af = AF_INET;
6112			atag->hdr = h;
6113			m_tag_prepend(m, mtag);
6114		}
6115	}
6116#endif
6117
6118	/*
6119	 * connections redirected to loopback should not match sockets
6120	 * bound specifically to loopback due to security implications,
6121	 * see tcp_input() and in_pcblookup_listen().
6122	 */
6123	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6124	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6125	    (s->nat_rule.ptr->action == PF_RDR ||
6126	    s->nat_rule.ptr->action == PF_BINAT) &&
6127	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
6128	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6129		action = PF_DROP;
6130		REASON_SET(&reason, PFRES_MEMORY);
6131	}
6132
6133	if (log)
6134		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset);
6135
6136	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6137	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6138
6139	if (action == PF_PASS || r->action == PF_DROP) {
6140		r->packets++;
6141		r->bytes += pd.tot_len;
6142		if (a != NULL) {
6143			a->packets++;
6144			a->bytes += pd.tot_len;
6145		}
6146		if (s != NULL) {
6147			dirndx = (dir == s->direction) ? 0 : 1;
6148			s->packets[dirndx]++;
6149			s->bytes[dirndx] += pd.tot_len;
6150			if (s->nat_rule.ptr != NULL) {
6151				s->nat_rule.ptr->packets++;
6152				s->nat_rule.ptr->bytes += pd.tot_len;
6153			}
6154			if (s->src_node != NULL) {
6155				s->src_node->packets++;
6156				s->src_node->bytes += pd.tot_len;
6157			}
6158			if (s->nat_src_node != NULL) {
6159				s->nat_src_node->packets++;
6160				s->nat_src_node->bytes += pd.tot_len;
6161			}
6162		}
6163		tr = r;
6164		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6165		if (nr != NULL) {
6166			struct pf_addr *x;
6167			/*
6168			 * XXX: we need to make sure that the addresses
6169			 * passed to pfr_update_stats() are the same than
6170			 * the addresses used during matching (pfr_match)
6171			 */
6172			if (r == &pf_default_rule) {
6173				tr = nr;
6174				x = (s == NULL || s->direction == dir) ?
6175				    &pd.baddr : &pd.naddr;
6176			} else
6177				x = (s == NULL || s->direction == dir) ?
6178				    &pd.naddr : &pd.baddr;
6179			if (x == &pd.baddr || s == NULL) {
6180				/* we need to change the address */
6181				if (dir == PF_OUT)
6182					pd.src = x;
6183				else
6184					pd.dst = x;
6185			}
6186		}
6187		if (tr->src.addr.type == PF_ADDR_TABLE)
6188			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6189			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6190			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6191			    tr->src.not);
6192		if (tr->dst.addr.type == PF_ADDR_TABLE)
6193			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6194			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6195			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6196			    tr->dst.not);
6197	}
6198
6199
6200	if (action == PF_SYNPROXY_DROP) {
6201		m_freem(*m0);
6202		*m0 = NULL;
6203		action = PF_PASS;
6204	} else if (r->rt)
6205		/* pf_route can free the mbuf causing *m0 to become NULL */
6206		pf_route(m0, r, dir, ifp, s);
6207
6208#ifdef __FreeBSD__
6209	PF_UNLOCK();
6210#endif
6211
6212	return (action);
6213}
6214#endif /* INET */
6215
6216#ifdef INET6
6217int
6218#ifdef __FreeBSD__
6219pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
6220#else
6221pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0)
6222#endif
6223{
6224	struct pfi_kif		*kif;
6225	u_short			 action, reason = 0, log = 0;
6226	struct mbuf		*m = *m0;
6227	struct ip6_hdr		*h = NULL;	/* make the compiler happy */
6228	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
6229	struct pf_state		*s = NULL;
6230	struct pf_ruleset	*ruleset = NULL;
6231	struct pf_pdesc		 pd;
6232	int			 off, terminal = 0, dirndx;
6233
6234#ifdef __FreeBSD__
6235	PF_LOCK();
6236#endif
6237
6238	if (!pf_status.running ||
6239#ifdef __FreeBSD__
6240	    (m->m_flags & M_SKIP_FIREWALL)) {
6241		PF_UNLOCK();
6242#else
6243	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
6244#endif
6245		return (PF_PASS);
6246	}
6247
6248	kif = pfi_index2kif[ifp->if_index];
6249	if (kif == NULL) {
6250#ifdef __FreeBSD__
6251		PF_UNLOCK();
6252#endif
6253		return (PF_DROP);
6254	}
6255
6256#ifdef __FreeBSD__
6257	M_ASSERTPKTHDR(m);
6258#else
6259#ifdef DIAGNOSTIC
6260	if ((m->m_flags & M_PKTHDR) == 0)
6261		panic("non-M_PKTHDR is passed to pf_test");
6262#endif
6263#endif
6264
6265	memset(&pd, 0, sizeof(pd));
6266	if (m->m_pkthdr.len < (int)sizeof(*h)) {
6267		action = PF_DROP;
6268		REASON_SET(&reason, PFRES_SHORT);
6269		log = 1;
6270		goto done;
6271	}
6272
6273	/* We do IP header normalization and packet reassembly here */
6274	if (pf_normalize_ip6(m0, dir, kif, &reason) != PF_PASS) {
6275		action = PF_DROP;
6276		goto done;
6277	}
6278	m = *m0;
6279	h = mtod(m, struct ip6_hdr *);
6280
6281	pd.src = (struct pf_addr *)&h->ip6_src;
6282	pd.dst = (struct pf_addr *)&h->ip6_dst;
6283	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6284	pd.ip_sum = NULL;
6285	pd.af = AF_INET6;
6286	pd.tos = 0;
6287	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6288
6289	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6290	pd.proto = h->ip6_nxt;
6291	do {
6292		switch (pd.proto) {
6293		case IPPROTO_FRAGMENT:
6294			action = pf_test_fragment(&r, dir, kif, m, h,
6295			    &pd, &a, &ruleset);
6296			if (action == PF_DROP)
6297				REASON_SET(&reason, PFRES_FRAG);
6298			goto done;
6299		case IPPROTO_AH:
6300		case IPPROTO_HOPOPTS:
6301		case IPPROTO_ROUTING:
6302		case IPPROTO_DSTOPTS: {
6303			/* get next header and header length */
6304			struct ip6_ext	opt6;
6305
6306			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6307			    NULL, NULL, pd.af)) {
6308				DPFPRINTF(PF_DEBUG_MISC,
6309				    ("pf: IPv6 short opt\n"));
6310				action = PF_DROP;
6311				REASON_SET(&reason, PFRES_SHORT);
6312				log = 1;
6313				goto done;
6314			}
6315			if (pd.proto == IPPROTO_AH)
6316				off += (opt6.ip6e_len + 2) * 4;
6317			else
6318				off += (opt6.ip6e_len + 1) * 8;
6319			pd.proto = opt6.ip6e_nxt;
6320			/* goto the next header */
6321			break;
6322		}
6323		default:
6324			terminal++;
6325			break;
6326		}
6327	} while (!terminal);
6328
6329	switch (pd.proto) {
6330
6331	case IPPROTO_TCP: {
6332		struct tcphdr	th;
6333
6334		pd.hdr.tcp = &th;
6335		if (!pf_pull_hdr(m, off, &th, sizeof(th),
6336		    &action, &reason, AF_INET6)) {
6337			log = action != PF_PASS;
6338			goto done;
6339		}
6340		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6341		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6342		    IPPROTO_TCP, AF_INET6)) {
6343			action = PF_DROP;
6344			goto done;
6345		}
6346		pd.p_len = pd.tot_len - off - (th.th_off << 2);
6347		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6348		if (action == PF_DROP)
6349			goto done;
6350		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6351		    &reason);
6352		if (action == PF_PASS) {
6353#if NPFSYNC
6354			pfsync_update_state(s);
6355#endif
6356			r = s->rule.ptr;
6357			a = s->anchor.ptr;
6358			log = s->log;
6359		} else if (s == NULL)
6360#ifdef __FreeBSD__
6361			action = pf_test_tcp(&r, &s, dir, kif,
6362			    m, off, h, &pd, &a, &ruleset, inp);
6363#else
6364			action = pf_test_tcp(&r, &s, dir, kif,
6365			    m, off, h, &pd, &a, &ruleset);
6366#endif
6367		break;
6368	}
6369
6370	case IPPROTO_UDP: {
6371		struct udphdr	uh;
6372
6373		pd.hdr.udp = &uh;
6374		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6375		    &action, &reason, AF_INET6)) {
6376			log = action != PF_PASS;
6377			goto done;
6378		}
6379		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6380		    off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6381		    IPPROTO_UDP, AF_INET6)) {
6382			action = PF_DROP;
6383			goto done;
6384		}
6385		if (uh.uh_dport == 0 ||
6386		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6387		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6388			action = PF_DROP;
6389			goto done;
6390		}
6391		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6392		if (action == PF_PASS) {
6393#if NPFSYNC
6394			pfsync_update_state(s);
6395#endif
6396			r = s->rule.ptr;
6397			a = s->anchor.ptr;
6398			log = s->log;
6399		} else if (s == NULL)
6400#ifdef __FreeBSD__
6401			action = pf_test_udp(&r, &s, dir, kif,
6402			    m, off, h, &pd, &a, &ruleset, inp);
6403#else
6404			action = pf_test_udp(&r, &s, dir, kif,
6405			    m, off, h, &pd, &a, &ruleset);
6406#endif
6407		break;
6408	}
6409
6410	case IPPROTO_ICMPV6: {
6411		struct icmp6_hdr	ih;
6412
6413		pd.hdr.icmp6 = &ih;
6414		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6415		    &action, &reason, AF_INET6)) {
6416			log = action != PF_PASS;
6417			goto done;
6418		}
6419		if (dir == PF_IN && pf_check_proto_cksum(m, off,
6420		    ntohs(h->ip6_plen)  - (off - sizeof(struct ip6_hdr)),
6421		    IPPROTO_ICMPV6, AF_INET6)) {
6422			action = PF_DROP;
6423			goto done;
6424		}
6425		action = pf_test_state_icmp(&s, dir, kif,
6426		    m, off, h, &pd);
6427		if (action == PF_PASS) {
6428#if NPFSYNC
6429			pfsync_update_state(s);
6430#endif
6431			r = s->rule.ptr;
6432			a = s->anchor.ptr;
6433			log = s->log;
6434		} else if (s == NULL)
6435			action = pf_test_icmp(&r, &s, dir, kif,
6436			    m, off, h, &pd, &a, &ruleset);
6437		break;
6438	}
6439
6440	default:
6441		action = pf_test_state_other(&s, dir, kif, &pd);
6442		if (action == PF_PASS) {
6443			r = s->rule.ptr;
6444			a = s->anchor.ptr;
6445			log = s->log;
6446		} else if (s == NULL)
6447			action = pf_test_other(&r, &s, dir, kif, m, off, h,
6448			    &pd, &a, &ruleset);
6449		break;
6450	}
6451
6452done:
6453	/* XXX handle IPv6 options, if not allowed. not implemented. */
6454
6455#ifdef ALTQ
6456	if (action == PF_PASS && r->qid) {
6457		struct m_tag	*mtag;
6458		struct altq_tag	*atag;
6459
6460		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6461		if (mtag != NULL) {
6462			atag = (struct altq_tag *)(mtag + 1);
6463			if (pd.tos == IPTOS_LOWDELAY)
6464				atag->qid = r->pqid;
6465			else
6466				atag->qid = r->qid;
6467			/* add hints for ecn */
6468			atag->af = AF_INET6;
6469			atag->hdr = h;
6470			m_tag_prepend(m, mtag);
6471		}
6472	}
6473#endif
6474
6475	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6476	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6477	    (s->nat_rule.ptr->action == PF_RDR ||
6478	    s->nat_rule.ptr->action == PF_BINAT) &&
6479	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) &&
6480	    pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) {
6481		action = PF_DROP;
6482		REASON_SET(&reason, PFRES_MEMORY);
6483	}
6484
6485	if (log)
6486		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset);
6487
6488	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6489	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6490
6491	if (action == PF_PASS || r->action == PF_DROP) {
6492		r->packets++;
6493		r->bytes += pd.tot_len;
6494		if (a != NULL) {
6495			a->packets++;
6496			a->bytes += pd.tot_len;
6497		}
6498		if (s != NULL) {
6499			dirndx = (dir == s->direction) ? 0 : 1;
6500			s->packets[dirndx]++;
6501			s->bytes[dirndx] += pd.tot_len;
6502			if (s->nat_rule.ptr != NULL) {
6503				s->nat_rule.ptr->packets++;
6504				s->nat_rule.ptr->bytes += pd.tot_len;
6505			}
6506			if (s->src_node != NULL) {
6507				s->src_node->packets++;
6508				s->src_node->bytes += pd.tot_len;
6509			}
6510			if (s->nat_src_node != NULL) {
6511				s->nat_src_node->packets++;
6512				s->nat_src_node->bytes += pd.tot_len;
6513			}
6514		}
6515		tr = r;
6516		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6517		if (nr != NULL) {
6518			struct pf_addr *x;
6519			/*
6520			 * XXX: we need to make sure that the addresses
6521			 * passed to pfr_update_stats() are the same than
6522			 * the addresses used during matching (pfr_match)
6523			 */
6524			if (r == &pf_default_rule) {
6525				tr = nr;
6526				x = (s == NULL || s->direction == dir) ?
6527				    &pd.baddr : &pd.naddr;
6528			} else {
6529				x = (s == NULL || s->direction == dir) ?
6530				    &pd.naddr : &pd.baddr;
6531			}
6532			if (x == &pd.baddr || s == NULL) {
6533				if (dir == PF_OUT)
6534					pd.src = x;
6535				else
6536					pd.dst = x;
6537			}
6538		}
6539		if (tr->src.addr.type == PF_ADDR_TABLE)
6540			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6541			    s->direction == dir) ? pd.src : pd.dst, pd.af,
6542			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6543			    tr->src.not);
6544		if (tr->dst.addr.type == PF_ADDR_TABLE)
6545			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6546			    s->direction == dir) ? pd.dst : pd.src, pd.af,
6547			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6548			    tr->dst.not);
6549	}
6550
6551
6552	if (action == PF_SYNPROXY_DROP) {
6553		m_freem(*m0);
6554		*m0 = NULL;
6555		action = PF_PASS;
6556	} else if (r->rt)
6557		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6558		pf_route6(m0, r, dir, ifp, s);
6559
6560#ifdef __FreeBSD__
6561	PF_UNLOCK();
6562#endif
6563	return (action);
6564}
6565#endif /* INET6 */
6566