pf.c revision 126261
1/*	$FreeBSD: head/sys/contrib/pf/net/pf.c 126261 2004-02-26 02:34:12Z mlaier $	*/
2/*	$OpenBSD: pf.c,v 1.390 2003/09/24 17:18:03 mcbride Exp $ */
3
4/*
5 * Copyright (c) 2001 Daniel Hartmeier
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#if defined(__FreeBSD__)
39#include "opt_inet.h"
40#include "opt_inet6.h"
41#endif
42
43#if defined(__FreeBSD__) && __FreeBSD__ >= 5
44#include "opt_bpf.h"
45#define NBPFILTER DEV_BPF
46#include "opt_pf.h"
47#define NPFLOG DEV_PFLOG
48#define NPFSYNC DEV_PFSYNC
49#else
50#include "bpfilter.h"
51#include "pflog.h"
52#include "pfsync.h"
53#endif
54
55#include <sys/param.h>
56#include <sys/systm.h>
57#include <sys/mbuf.h>
58#include <sys/filio.h>
59#include <sys/socket.h>
60#include <sys/socketvar.h>
61#include <sys/kernel.h>
62#include <sys/time.h>
63#if defined(__FreeBSD__)
64#include <sys/sysctl.h>
65#else
66#include <sys/pool.h>
67#endif
68
69#include <net/if.h>
70#include <net/if_types.h>
71#include <net/bpf.h>
72#include <net/route.h>
73
74#include <netinet/in.h>
75#include <netinet/in_var.h>
76#include <netinet/in_systm.h>
77#include <netinet/ip.h>
78#include <netinet/ip_var.h>
79#include <netinet/tcp.h>
80#include <netinet/tcp_seq.h>
81#include <netinet/udp.h>
82#include <netinet/ip_icmp.h>
83#include <netinet/in_pcb.h>
84#include <netinet/tcp_timer.h>
85#include <netinet/tcp_var.h>
86#include <netinet/udp_var.h>
87#include <netinet/icmp_var.h>
88
89#if !defined(__FreeBSD__)
90#include <dev/rndvar.h>
91#endif
92#include <net/pfvar.h>
93#include <net/if_pflog.h>
94#include <net/if_pfsync.h>
95
96#ifdef INET6
97#include <netinet/ip6.h>
98#include <netinet/in_pcb.h>
99#include <netinet/icmp6.h>
100#include <netinet6/nd6.h>
101#if defined(__FreeBSD__)
102#include <netinet6/ip6_var.h>
103#include <netinet6/in6_pcb.h>
104#endif
105#endif /* INET6 */
106
107#ifdef ALTQ
108#include <altq/if_altq.h>
109#endif
110
111#if defined(__FreeBSD__)
112#include <machine/in_cksum.h>
113#if (__FreeBSD_version >= 500112)
114#include <sys/limits.h>
115#else
116#include <machine/limits.h>
117#endif
118#include <sys/ucred.h>
119#endif
120
121#if defined(__FreeBSD__)
122extern int ip_optcopy(struct ip *, struct ip *);
123#if (__FreeBSD_version < 501105)
124int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
125	u_long if_hwassist_flags, int sw_csum);
126#endif
127#endif
128
129#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
130struct pf_state_tree;
131
132/*
133 * Global variables
134 */
135
136struct pf_anchorqueue	 pf_anchors;
137struct pf_ruleset	 pf_main_ruleset;
138struct pf_altqqueue	 pf_altqs[2];
139struct pf_palist	 pf_pabuf;
140struct pf_altqqueue	*pf_altqs_active;
141struct pf_altqqueue	*pf_altqs_inactive;
142struct pf_status	 pf_status;
143struct ifnet		*status_ifp;
144
145u_int32_t		 ticket_altqs_active;
146u_int32_t		 ticket_altqs_inactive;
147u_int32_t		 ticket_pabuf;
148
149#if defined(__FreeBSD__)
150struct callout	 	 pf_expire_to;			/* expire timeout */
151#else
152struct timeout		 pf_expire_to;			/* expire timeout */
153#endif
154
155
156#if defined(__FreeBSD__)
157uma_zone_t		 pf_tree_pl, pf_rule_pl, pf_addr_pl;
158uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
159#else
160struct pool		 pf_tree_pl, pf_rule_pl, pf_addr_pl;
161struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
162#endif
163
164void			 pf_dynaddr_update(void *);
165#if defined(__FreeBSD__) && defined(HOOK_HACK)
166void			pf_dynaddr_update_event(void *arg, struct ifnet *ifp);
167#endif
168void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
169void			 pf_print_state(struct pf_state *);
170void			 pf_print_flags(u_int8_t);
171
172u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
173			    u_int8_t);
174void			 pf_change_ap(struct pf_addr *, u_int16_t *,
175			    u_int16_t *, u_int16_t *, struct pf_addr *,
176			    u_int16_t, u_int8_t, sa_family_t);
177#ifdef INET6
178void			 pf_change_a6(struct pf_addr *, u_int16_t *,
179			    struct pf_addr *, u_int8_t);
180#endif /* INET6 */
181void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
182			    struct pf_addr *, struct pf_addr *, u_int16_t,
183			    u_int16_t *, u_int16_t *, u_int16_t *,
184			    u_int16_t *, u_int8_t, sa_family_t);
185void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
186			    const struct pf_addr *, const struct pf_addr *,
187			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
188			    u_int8_t, u_int16_t, u_int16_t, u_int8_t);
189void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
190			    sa_family_t, struct pf_rule *);
191struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
192			    int, int, struct ifnet *,
193			    struct pf_addr *, u_int16_t, struct pf_addr *,
194			    u_int16_t, int);
195struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
196			    int, int, struct ifnet *,
197			    struct pf_addr *, u_int16_t,
198			    struct pf_addr *, u_int16_t,
199			    struct pf_addr *, u_int16_t *);
200int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
201			    int, struct ifnet *, struct mbuf *, int, int,
202			    void *, struct pf_pdesc *, struct pf_rule **,
203			    struct pf_ruleset **);
204int			 pf_test_udp(struct pf_rule **, struct pf_state **,
205			    int, struct ifnet *, struct mbuf *, int, int,
206			    void *, struct pf_pdesc *, struct pf_rule **,
207			    struct pf_ruleset **);
208int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
209			    int, struct ifnet *, struct mbuf *, int, int,
210			    void *, struct pf_pdesc *, struct pf_rule **,
211			    struct pf_ruleset **);
212int			 pf_test_other(struct pf_rule **, struct pf_state **,
213			    int, struct ifnet *, struct mbuf *, int, void *,
214			    struct pf_pdesc *, struct pf_rule **,
215			    struct pf_ruleset **);
216int			 pf_test_fragment(struct pf_rule **, int,
217			    struct ifnet *, struct mbuf *, void *,
218			    struct pf_pdesc *, struct pf_rule **,
219			    struct pf_ruleset **);
220int			 pf_test_state_tcp(struct pf_state **, int,
221			    struct ifnet *, struct mbuf *, int, int,
222			    void *, struct pf_pdesc *, u_short *);
223int			 pf_test_state_udp(struct pf_state **, int,
224			    struct ifnet *, struct mbuf *, int, int,
225			    void *, struct pf_pdesc *);
226int			 pf_test_state_icmp(struct pf_state **, int,
227			    struct ifnet *, struct mbuf *, int, int,
228			    void *, struct pf_pdesc *);
229int			 pf_test_state_other(struct pf_state **, int,
230			    struct ifnet *, struct pf_pdesc *);
231struct pf_tag		*pf_get_tag(struct mbuf *);
232int			 pf_match_tag(struct mbuf *, struct pf_rule *,
233			     struct pf_rule *, struct pf_rule *,
234			     struct pf_tag *, int *);
235void			 pf_hash(struct pf_addr *, struct pf_addr *,
236			    struct pf_poolhashkey *, sa_family_t);
237int			 pf_map_addr(u_int8_t, struct pf_pool *,
238			    struct pf_addr *, struct pf_addr *,
239			    struct pf_addr *);
240int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_pool *,
241			    struct pf_addr *, struct pf_addr *, u_int16_t,
242			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t);
243void			 pf_route(struct mbuf **, struct pf_rule *, int,
244			    struct ifnet *, struct pf_state *);
245void			 pf_route6(struct mbuf **, struct pf_rule *, int,
246			    struct ifnet *, struct pf_state *);
247int			 pf_socket_lookup(uid_t *, gid_t *, int, sa_family_t,
248			    int, struct pf_pdesc *);
249u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
250			    sa_family_t);
251u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
252			    sa_family_t);
253u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
254				u_int16_t);
255void			 pf_set_rt_ifp(struct pf_state *,
256			    struct pf_addr *);
257int			 pf_check_proto_cksum(struct mbuf *, int, int,
258			    u_int8_t, sa_family_t);
259int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
260			    struct pf_addr_wrap *);
261
262#if defined(__FreeBSD__)
263int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
264#endif
265
266#if defined(__FreeBSD__)
267struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
268#else
269struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] =
270    { { &pf_state_pl, PFSTATE_HIWAT }, { &pf_frent_pl, PFFRAG_FRENT_HIWAT } };
271#endif
272
273#define STATE_LOOKUP()							\
274	do {								\
275		if (direction == PF_IN)					\
276			*state = pf_find_state(&tree_ext_gwy, &key);	\
277		else							\
278			*state = pf_find_state(&tree_lan_ext, &key);	\
279		if (*state == NULL)					\
280			return (PF_DROP);				\
281		if (direction == PF_OUT &&				\
282		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
283		    (*state)->rule.ptr->direction == PF_OUT) ||		\
284		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
285		    (*state)->rule.ptr->direction == PF_IN)) &&		\
286		    (*state)->rt_ifp != NULL &&				\
287		    (*state)->rt_ifp != ifp)				\
288			return (PF_PASS);				\
289	} while (0)
290
291#define	STATE_TRANSLATE(s) \
292	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
293	((s)->af == AF_INET6 && \
294	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
295	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
296	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
297	(s)->lan.port != (s)->gwy.port
298
299static __inline int pf_state_compare(struct pf_tree_node *,
300			struct pf_tree_node *);
301
302struct pf_state_tree tree_lan_ext, tree_ext_gwy;
303RB_GENERATE(pf_state_tree, pf_tree_node, entry, pf_state_compare);
304
305static __inline int
306pf_state_compare(struct pf_tree_node *a, struct pf_tree_node *b)
307{
308	int	diff;
309
310	if ((diff = a->proto - b->proto) != 0)
311		return (diff);
312	if ((diff = a->af - b->af) != 0)
313		return (diff);
314	switch (a->af) {
315#ifdef INET
316	case AF_INET:
317		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
318			return (1);
319		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
320			return (-1);
321		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
322			return (1);
323		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
324			return (-1);
325		break;
326#endif /* INET */
327#ifdef INET6
328	case AF_INET6:
329		if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
330			return (1);
331		if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
332			return (-1);
333		if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
334			return (1);
335		if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
336			return (-1);
337		if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
338			return (1);
339		if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
340			return (-1);
341		if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
342			return (1);
343		if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
344			return (-1);
345		if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
346			return (1);
347		if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
348			return (-1);
349		if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
350			return (1);
351		if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
352			return (-1);
353		if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
354			return (1);
355		if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
356			return (-1);
357		if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
358			return (1);
359		if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
360			return (-1);
361		break;
362#endif /* INET6 */
363	}
364
365	if ((diff = a->port[0] - b->port[0]) != 0)
366		return (diff);
367	if ((diff = a->port[1] - b->port[1]) != 0)
368		return (diff);
369
370	return (0);
371}
372
373#ifdef INET6
374void
375pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
376{
377	switch (af) {
378#ifdef INET
379	case AF_INET:
380		dst->addr32[0] = src->addr32[0];
381		break;
382#endif /* INET */
383	case AF_INET6:
384		dst->addr32[0] = src->addr32[0];
385		dst->addr32[1] = src->addr32[1];
386		dst->addr32[2] = src->addr32[2];
387		dst->addr32[3] = src->addr32[3];
388		break;
389	}
390}
391#endif
392
393struct pf_state *
394pf_find_state(struct pf_state_tree *tree, struct pf_tree_node *key)
395{
396	struct pf_tree_node	*k;
397
398	pf_status.fcounters[FCNT_STATE_SEARCH]++;
399	k = RB_FIND(pf_state_tree, tree, key);
400	if (k)
401		return (k->state);
402	else
403		return (NULL);
404}
405
406int
407pf_insert_state(struct pf_state *state)
408{
409	struct pf_tree_node	*keya, *keyb;
410
411	keya = pool_get(&pf_tree_pl, PR_NOWAIT);
412	if (keya == NULL)
413		return (-1);
414	keya->state = state;
415	keya->proto = state->proto;
416	keya->af = state->af;
417	PF_ACPY(&keya->addr[0], &state->lan.addr, state->af);
418	keya->port[0] = state->lan.port;
419	PF_ACPY(&keya->addr[1], &state->ext.addr, state->af);
420	keya->port[1] = state->ext.port;
421
422	/* Thou MUST NOT insert multiple duplicate keys */
423	if (RB_INSERT(pf_state_tree, &tree_lan_ext, keya) != NULL) {
424		if (pf_status.debug >= PF_DEBUG_MISC) {
425			printf("pf: state insert failed: tree_lan_ext");
426			printf(" lan: ");
427			pf_print_host(&state->lan.addr, state->lan.port,
428			    state->af);
429			printf(" gwy: ");
430			pf_print_host(&state->gwy.addr, state->gwy.port,
431			    state->af);
432			printf(" ext: ");
433			pf_print_host(&state->ext.addr, state->ext.port,
434			    state->af);
435			printf("\n");
436		}
437		pool_put(&pf_tree_pl, keya);
438		return (-1);
439	}
440
441	keyb = pool_get(&pf_tree_pl, PR_NOWAIT);
442	if (keyb == NULL) {
443		/* Need to pull out the other state */
444		RB_REMOVE(pf_state_tree, &tree_lan_ext, keya);
445		pool_put(&pf_tree_pl, keya);
446		return (-1);
447	}
448	keyb->state = state;
449	keyb->proto = state->proto;
450	keyb->af = state->af;
451	PF_ACPY(&keyb->addr[0], &state->ext.addr, state->af);
452	keyb->port[0] = state->ext.port;
453	PF_ACPY(&keyb->addr[1], &state->gwy.addr, state->af);
454	keyb->port[1] = state->gwy.port;
455
456	if (RB_INSERT(pf_state_tree, &tree_ext_gwy, keyb) != NULL) {
457		if (pf_status.debug >= PF_DEBUG_MISC) {
458			printf("pf: state insert failed: tree_ext_gwy");
459			printf(" lan: ");
460			pf_print_host(&state->lan.addr, state->lan.port,
461			    state->af);
462			printf(" gwy: ");
463			pf_print_host(&state->gwy.addr, state->gwy.port,
464			    state->af);
465			printf(" ext: ");
466			pf_print_host(&state->ext.addr, state->ext.port,
467			    state->af);
468			printf("\n");
469		}
470		RB_REMOVE(pf_state_tree, &tree_lan_ext, keya);
471		pool_put(&pf_tree_pl, keya);
472		pool_put(&pf_tree_pl, keyb);
473		return (-1);
474	}
475
476	pf_status.fcounters[FCNT_STATE_INSERT]++;
477	pf_status.states++;
478#if NPFSYNC
479	pfsync_insert_state(state);
480#endif
481	return (0);
482}
483
484void
485pf_purge_timeout(void *arg)
486{
487#if defined(__FreeBSD__)
488	struct callout  *to = arg;
489#else
490	struct timeout	*to = arg;
491#endif
492	int		 s;
493
494#if defined(__FreeBSD__)
495	PF_LOCK();
496#endif
497	s = splsoftnet();
498	pf_purge_expired_states();
499	pf_purge_expired_fragments();
500	splx(s);
501#if defined(__FreeBSD__)
502	PF_UNLOCK();
503#endif
504
505#if defined(__FreeBSD__)
506	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
507	    pf_purge_timeout, to);
508#else
509	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
510#endif
511}
512
513u_int32_t
514pf_state_expires(const struct pf_state *state)
515{
516	u_int32_t	timeout;
517	u_int32_t	start;
518	u_int32_t	end;
519	u_int32_t	states;
520
521	/* handle all PFTM_* > PFTM_MAX here */
522	if (state->timeout == PFTM_PURGE)
523#if defined(__FreeBSD__)
524		return (time_second);
525#else
526		return (time.tv_sec);
527#endif
528	if (state->timeout == PFTM_UNTIL_PACKET)
529		return (0);
530#if defined(__FreeBSD__)
531	KASSERT((state->timeout < PFTM_MAX),
532	    ("pf_state_expires: timeout > PFTM_MAX"));
533#else
534	KASSERT(state->timeout < PFTM_MAX);
535#endif
536	timeout = state->rule.ptr->timeout[state->timeout];
537	if (!timeout)
538		timeout = pf_default_rule.timeout[state->timeout];
539	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
540	if (start) {
541		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
542		states = state->rule.ptr->states;
543	} else {
544		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
545		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
546		states = pf_status.states;
547	}
548	if (end && states > start && start < end) {
549		if (states < end)
550			return (state->expire + timeout * (end - states) /
551			    (end - start));
552		else
553#if defined(__FreeBSD__)
554			return (time_second);
555#else
556			return (time.tv_sec);
557#endif
558	}
559	return (state->expire + timeout);
560}
561
562void
563pf_purge_expired_states(void)
564{
565	struct pf_tree_node	*cur, *peer, *next;
566	struct pf_tree_node	 key;
567
568	for (cur = RB_MIN(pf_state_tree, &tree_ext_gwy); cur; cur = next) {
569		next = RB_NEXT(pf_state_tree, &tree_ext_gwy, cur);
570
571#if defined(__FreeBSD__)
572		if (pf_state_expires(cur->state) <= (u_int32_t)time_second) {
573#else
574		if (pf_state_expires(cur->state) <= time.tv_sec) {
575#endif
576			if (cur->state->src.state == PF_TCPS_PROXY_DST)
577				pf_send_tcp(cur->state->rule.ptr,
578				    cur->state->af,
579				    &cur->state->ext.addr,
580				    &cur->state->lan.addr,
581				    cur->state->ext.port,
582				    cur->state->lan.port,
583				    cur->state->src.seqhi,
584				    cur->state->src.seqlo + 1,
585					0,
586				    TH_RST|TH_ACK, 0, 0);
587			RB_REMOVE(pf_state_tree, &tree_ext_gwy, cur);
588
589			/* Need this key's peer (in the other tree) */
590			key.state = cur->state;
591			key.proto = cur->state->proto;
592			key.af = cur->state->af;
593			PF_ACPY(&key.addr[0], &cur->state->lan.addr,
594			    cur->state->af);
595			key.port[0] = cur->state->lan.port;
596			PF_ACPY(&key.addr[1], &cur->state->ext.addr,
597			    cur->state->af);
598			key.port[1] = cur->state->ext.port;
599
600			peer = RB_FIND(pf_state_tree, &tree_lan_ext, &key);
601#if defined(__FreeBSD__)
602			KASSERT((peer), ("peer null :%s", __FUNCTION__));
603			KASSERT((peer->state == cur->state),
604			   ("peer->state != cur->state: %s", __FUNCTION__));
605#else
606			KASSERT(peer);
607			KASSERT(peer->state == cur->state);
608#endif
609			RB_REMOVE(pf_state_tree, &tree_lan_ext, peer);
610
611#if NPFSYNC
612			pfsync_delete_state(cur->state);
613#endif
614			if (--cur->state->rule.ptr->states <= 0)
615				pf_rm_rule(NULL, cur->state->rule.ptr);
616			if (cur->state->nat_rule.ptr != NULL)
617				if (--cur->state->nat_rule.ptr->states <= 0)
618					pf_rm_rule(NULL,
619					    cur->state->nat_rule.ptr);
620			if (cur->state->anchor.ptr != NULL)
621				if (--cur->state->anchor.ptr->states <= 0)
622					pf_rm_rule(NULL,
623					    cur->state->anchor.ptr);
624			pf_normalize_tcp_cleanup(cur->state);
625			pool_put(&pf_state_pl, cur->state);
626			pool_put(&pf_tree_pl, cur);
627			pool_put(&pf_tree_pl, peer);
628			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
629			pf_status.states--;
630		}
631	}
632}
633
634int
635pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
636{
637	if (aw->type != PF_ADDR_TABLE)
638		return (0);
639	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
640		return (1);
641	return (0);
642}
643
644void
645pf_tbladdr_remove(struct pf_addr_wrap *aw)
646{
647	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
648		return;
649	pfr_detach_table(aw->p.tbl);
650	aw->p.tbl = NULL;
651}
652
653void
654pf_tbladdr_copyout(struct pf_addr_wrap *aw)
655{
656	struct pfr_ktable *kt = aw->p.tbl;
657
658	if (aw->type != PF_ADDR_TABLE || kt == NULL)
659		return;
660	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
661		kt = kt->pfrkt_root;
662	aw->p.tbl = NULL;
663	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
664		kt->pfrkt_cnt : -1;
665}
666
667int
668pf_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
669{
670	if (aw->type != PF_ADDR_DYNIFTL)
671		return (0);
672	aw->p.dyn = pool_get(&pf_addr_pl, PR_NOWAIT);
673	if (aw->p.dyn == NULL)
674		return (1);
675	bcopy(aw->v.ifname, aw->p.dyn->ifname, sizeof(aw->p.dyn->ifname));
676	aw->p.dyn->ifp = ifunit(aw->p.dyn->ifname);
677	if (aw->p.dyn->ifp == NULL) {
678		pool_put(&pf_addr_pl, aw->p.dyn);
679		aw->p.dyn = NULL;
680		return (1);
681	}
682	aw->p.dyn->addr = &aw->v.a.addr;
683	aw->p.dyn->af = af;
684	aw->p.dyn->undefined = 1;
685#if !defined(__FreeBSD__)
686	aw->p.dyn->hook_cookie = hook_establish(
687	    aw->p.dyn->ifp->if_addrhooks, 1,
688	    pf_dynaddr_update, aw->p.dyn);
689	if (aw->p.dyn->hook_cookie == NULL) {
690		pool_put(&pf_addr_pl, aw->p.dyn);
691		aw->p.dyn = NULL;
692		return (1);
693	}
694#elif defined(__FreeBSD__) && defined(HOOK_HACK)
695	PF_UNLOCK();
696	aw->p.dyn->hook_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
697	    pf_dynaddr_update_event, aw->p.dyn, EVENTHANDLER_PRI_ANY);
698	PF_LOCK();
699	if (aw->p.dyn->hook_cookie == NULL) {
700		pool_put(&pf_addr_pl, aw->p.dyn);
701		aw->p.dyn = NULL;
702		return (1);
703	}
704#else
705	/*
706	 * XXX
707	 * We have no hook_establish(9)/dohooks(9) kernel interfaces.
708	 * This means that we do not aware of interface address changes(add,
709	 * remove, etc). User should update pf rule manually after interface
710	 * address changed. This may not be possible solution if you use xDSL.
711	 * ipfw/ipfw2's approach with this situation(with me keyword) is not
712	 * very efficient due to analyzing interface address during runtime.
713	 * Another solution is to use a user-land daemon watching address
714	 * changes with socket interface. Neither one is good.
715	 * Supporting hook_establish(9) requries modification of in_control()
716	 * located in netinet/in.c.
717	 */
718#endif
719	pf_dynaddr_update(aw->p.dyn);
720	return (0);
721}
722
723#if defined(__FreeBSD__) && defined(HOOK_HACK)
724void
725pf_dynaddr_update_event(void *arg, struct ifnet *ifp)
726{
727	PF_LOCK();
728	pf_dynaddr_update(arg);
729	PF_UNLOCK();
730}
731#endif
732
733void
734pf_dynaddr_update(void *p)
735{
736	struct pf_addr_dyn	*ad = (struct pf_addr_dyn *)p;
737	struct ifaddr		*ia;
738	int			 s, changed = 0;
739
740	if (ad == NULL || ad->ifp == NULL)
741		panic("pf_dynaddr_update");
742	s = splsoftnet();
743	TAILQ_FOREACH(ia, &ad->ifp->if_addrlist, ifa_list)
744		if (ia->ifa_addr != NULL &&
745		    ia->ifa_addr->sa_family == ad->af) {
746			if (ad->af == AF_INET) {
747				struct in_addr *a, *b;
748
749				a = &ad->addr->v4;
750				b = &((struct sockaddr_in *)ia->ifa_addr)
751				    ->sin_addr;
752				if (ad->undefined ||
753				    memcmp(a, b, sizeof(*a))) {
754					bcopy(b, a, sizeof(*a));
755					changed = 1;
756				}
757			} else if (ad->af == AF_INET6) {
758				struct in6_addr *a, *b;
759
760				a = &ad->addr->v6;
761				b = &((struct sockaddr_in6 *)ia->ifa_addr)
762				    ->sin6_addr;
763				if (ad->undefined ||
764				    memcmp(a, b, sizeof(*a))) {
765					bcopy(b, a, sizeof(*a));
766					changed = 1;
767				}
768			}
769			if (changed)
770				ad->undefined = 0;
771			break;
772		}
773	if (ia == NULL)
774		ad->undefined = 1;
775	splx(s);
776}
777
778void
779pf_dynaddr_remove(struct pf_addr_wrap *aw)
780{
781	if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL)
782		return;
783#if !defined(__FreeBSD__)
784	hook_disestablish(aw->p.dyn->ifp->if_addrhooks,
785	    aw->p.dyn->hook_cookie);
786#elif defined(__FreeBSD__) && defined(HOOK_HACK)
787	PF_UNLOCK();
788	EVENTHANDLER_DEREGISTER(ifaddr_event, aw->p.dyn->hook_cookie);
789	PF_LOCK();
790#else
791	/*
792	 * XXX
793	 * We have no hook_establish(9)/dohooks(9) kernel interfaces.
794	 * See comments above function, pf_dynaddr_setup().
795	 */
796#endif
797	pool_put(&pf_addr_pl, aw->p.dyn);
798	aw->p.dyn = NULL;
799}
800
801void
802pf_dynaddr_copyout(struct pf_addr_wrap *aw)
803{
804	if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL)
805		return;
806	bcopy(aw->p.dyn->ifname, aw->v.ifname, sizeof(aw->v.ifname));
807	aw->p.dyn = (struct pf_addr_dyn *)1;
808}
809
810void
811pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
812{
813	switch (af) {
814#ifdef INET
815	case AF_INET: {
816		u_int32_t a = ntohl(addr->addr32[0]);
817		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
818		    (a>>8)&255, a&255);
819		if (p) {
820			p = ntohs(p);
821			printf(":%u", p);
822		}
823		break;
824	}
825#endif /* INET */
826#ifdef INET6
827	case AF_INET6: {
828		u_int16_t b;
829		u_int8_t i, curstart = 255, curend = 0,
830		    maxstart = 0, maxend = 0;
831		for (i = 0; i < 8; i++) {
832			if (!addr->addr16[i]) {
833				if (curstart == 255)
834					curstart = i;
835				else
836					curend = i;
837			} else {
838				if (curstart) {
839					if ((curend - curstart) >
840					    (maxend - maxstart)) {
841						maxstart = curstart;
842						maxend = curend;
843						curstart = 255;
844					}
845				}
846			}
847		}
848		for (i = 0; i < 8; i++) {
849			if (i >= maxstart && i <= maxend) {
850				if (maxend != 7) {
851					if (i == maxstart)
852						printf(":");
853				} else {
854					if (i == maxend)
855						printf(":");
856				}
857			} else {
858				b = ntohs(addr->addr16[i]);
859				printf("%x", b);
860				if (i < 7)
861					printf(":");
862			}
863		}
864		if (p) {
865			p = ntohs(p);
866			printf("[%u]", p);
867		}
868		break;
869	}
870#endif /* INET6 */
871	}
872}
873
874void
875pf_print_state(struct pf_state *s)
876{
877	switch (s->proto) {
878	case IPPROTO_TCP:
879		printf("TCP ");
880		break;
881	case IPPROTO_UDP:
882		printf("UDP ");
883		break;
884	case IPPROTO_ICMP:
885		printf("ICMP ");
886		break;
887	case IPPROTO_ICMPV6:
888		printf("ICMPV6 ");
889		break;
890	default:
891		printf("%u ", s->proto);
892		break;
893	}
894	pf_print_host(&s->lan.addr, s->lan.port, s->af);
895	printf(" ");
896	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
897	printf(" ");
898	pf_print_host(&s->ext.addr, s->ext.port, s->af);
899	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
900	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
901	if (s->src.wscale && s->dst.wscale)
902		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
903	printf("]");
904	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
905	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
906	if (s->src.wscale && s->dst.wscale)
907		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
908	printf("]");
909	printf(" %u:%u", s->src.state, s->dst.state);
910}
911
912void
913pf_print_flags(u_int8_t f)
914{
915	if (f)
916		printf(" ");
917	if (f & TH_FIN)
918		printf("F");
919	if (f & TH_SYN)
920		printf("S");
921	if (f & TH_RST)
922		printf("R");
923	if (f & TH_PUSH)
924		printf("P");
925	if (f & TH_ACK)
926		printf("A");
927	if (f & TH_URG)
928		printf("U");
929	if (f & TH_ECE)
930		printf("E");
931	if (f & TH_CWR)
932		printf("W");
933}
934
935#define	PF_SET_SKIP_STEPS(i)					\
936	do {							\
937		while (head[i] != cur) {			\
938			head[i]->skip[i].ptr = cur;		\
939			head[i] = TAILQ_NEXT(head[i], entries);	\
940		}						\
941	} while (0)
942
943void
944pf_calc_skip_steps(struct pf_rulequeue *rules)
945{
946	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
947	int i;
948
949	cur = TAILQ_FIRST(rules);
950	prev = cur;
951	for (i = 0; i < PF_SKIP_COUNT; ++i)
952		head[i] = cur;
953	while (cur != NULL) {
954
955		if (cur->ifp != prev->ifp || cur->ifnot != prev->ifnot)
956			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
957		if (cur->direction != prev->direction)
958			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
959		if (cur->af != prev->af)
960			PF_SET_SKIP_STEPS(PF_SKIP_AF);
961		if (cur->proto != prev->proto)
962			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
963		if (cur->src.not != prev->src.not ||
964		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
965			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
966		if (cur->src.port[0] != prev->src.port[0] ||
967		    cur->src.port[1] != prev->src.port[1] ||
968		    cur->src.port_op != prev->src.port_op)
969			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
970		if (cur->dst.not != prev->dst.not ||
971		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
972			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
973		if (cur->dst.port[0] != prev->dst.port[0] ||
974		    cur->dst.port[1] != prev->dst.port[1] ||
975		    cur->dst.port_op != prev->dst.port_op)
976			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
977
978		prev = cur;
979		cur = TAILQ_NEXT(cur, entries);
980	}
981	for (i = 0; i < PF_SKIP_COUNT; ++i)
982		PF_SET_SKIP_STEPS(i);
983}
984
985int
986pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
987{
988	if (aw1->type != aw2->type)
989		return (1);
990	switch (aw1->type) {
991	case PF_ADDR_ADDRMASK:
992		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
993			return (1);
994		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
995			return (1);
996		return (0);
997	case PF_ADDR_DYNIFTL:
998		if (aw1->p.dyn->ifp != aw2->p.dyn->ifp)
999			return (1);
1000		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1001			return (1);
1002		return (0);
1003	case PF_ADDR_NOROUTE:
1004		return (0);
1005	case PF_ADDR_TABLE:
1006		return (aw1->p.tbl != aw2->p.tbl);
1007	default:
1008		printf("invalid address type: %d\n", aw1->type);
1009		return (1);
1010	}
1011}
1012
1013void
1014pf_rule_set_qid(struct pf_rulequeue *rules)
1015{
1016	struct pf_rule *rule;
1017
1018	TAILQ_FOREACH(rule, rules, entries)
1019		if (rule->qname[0] != 0) {
1020			rule->qid = pf_qname_to_qid(rule->qname);
1021			if (rule->pqname[0] != 0)
1022				rule->pqid = pf_qname_to_qid(rule->pqname);
1023			else
1024				rule->pqid = rule->qid;
1025		}
1026}
1027
1028u_int32_t
1029pf_qname_to_qid(char *qname)
1030{
1031	struct pf_altq		*altq;
1032
1033	TAILQ_FOREACH(altq, pf_altqs_active, entries)
1034		if (!strcmp(altq->qname, qname))
1035			return (altq->qid);
1036
1037	return (0);
1038}
1039
1040void
1041pf_update_anchor_rules()
1042{
1043	struct pf_rule	*rule;
1044	int		 i;
1045
1046	for (i = 0; i < PF_RULESET_MAX; ++i)
1047		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
1048		    entries)
1049			if (rule->anchorname[0])
1050				rule->anchor = pf_find_anchor(rule->anchorname);
1051			else
1052				rule->anchor = NULL;
1053}
1054
1055u_int16_t
1056pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1057{
1058	u_int32_t	l;
1059
1060	if (udp && !cksum)
1061		return (0x0000);
1062	l = cksum + old - new;
1063	l = (l >> 16) + (l & 65535);
1064	l = l & 65535;
1065	if (udp && !l)
1066		return (0xFFFF);
1067	return (l);
1068}
1069
1070void
1071pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1072    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1073{
1074	struct pf_addr	ao;
1075	u_int16_t	po = *p;
1076
1077	PF_ACPY(&ao, a, af);
1078	PF_ACPY(a, an, af);
1079
1080	*p = pn;
1081
1082	switch (af) {
1083#ifdef INET
1084	case AF_INET:
1085		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1086		    ao.addr16[0], an->addr16[0], 0),
1087		    ao.addr16[1], an->addr16[1], 0);
1088		*p = pn;
1089		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1090		    ao.addr16[0], an->addr16[0], u),
1091		    ao.addr16[1], an->addr16[1], u),
1092		    po, pn, u);
1093		break;
1094#endif /* INET */
1095#ifdef INET6
1096	case AF_INET6:
1097		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1098		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1099		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1100		    ao.addr16[0], an->addr16[0], u),
1101		    ao.addr16[1], an->addr16[1], u),
1102		    ao.addr16[2], an->addr16[2], u),
1103		    ao.addr16[3], an->addr16[3], u),
1104		    ao.addr16[4], an->addr16[4], u),
1105		    ao.addr16[5], an->addr16[5], u),
1106		    ao.addr16[6], an->addr16[6], u),
1107		    ao.addr16[7], an->addr16[7], u),
1108		    po, pn, u);
1109		break;
1110#endif /* INET6 */
1111	}
1112}
1113
1114
1115/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1116void
1117pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1118{
1119	u_int32_t	ao;
1120
1121	memcpy(&ao, a, sizeof(ao));
1122	memcpy(a, &an, sizeof(u_int32_t));
1123	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1124	    ao % 65536, an % 65536, u);
1125}
1126
1127#ifdef INET6
1128void
1129pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1130{
1131	struct pf_addr	ao;
1132
1133	PF_ACPY(&ao, a, AF_INET6);
1134	PF_ACPY(a, an, AF_INET6);
1135
1136	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1137	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1138	    pf_cksum_fixup(pf_cksum_fixup(*c,
1139	    ao.addr16[0], an->addr16[0], u),
1140	    ao.addr16[1], an->addr16[1], u),
1141	    ao.addr16[2], an->addr16[2], u),
1142	    ao.addr16[3], an->addr16[3], u),
1143	    ao.addr16[4], an->addr16[4], u),
1144	    ao.addr16[5], an->addr16[5], u),
1145	    ao.addr16[6], an->addr16[6], u),
1146	    ao.addr16[7], an->addr16[7], u);
1147}
1148#endif /* INET6 */
1149
1150void
1151pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1152    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1153    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1154{
1155	struct pf_addr	oia, ooa;
1156
1157	PF_ACPY(&oia, ia, af);
1158	PF_ACPY(&ooa, oa, af);
1159
1160	/* Change inner protocol port, fix inner protocol checksum. */
1161	if (ip != NULL) {
1162		u_int16_t	oip = *ip;
1163		u_int32_t	opc;
1164
1165		if (pc != NULL)
1166			opc = *pc;
1167		*ip = np;
1168		if (pc != NULL)
1169			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
1170		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1171		if (pc != NULL)
1172			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1173	}
1174	/* Change inner ip address, fix inner ip and icmp checksums. */
1175	PF_ACPY(ia, na, af);
1176	switch (af) {
1177#ifdef INET
1178	case AF_INET: {
1179		u_int32_t	 oh2c = *h2c;
1180
1181		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1182		    oia.addr16[0], ia->addr16[0], 0),
1183		    oia.addr16[1], ia->addr16[1], 0);
1184		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1185		    oia.addr16[0], ia->addr16[0], 0),
1186		    oia.addr16[1], ia->addr16[1], 0);
1187		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1188		break;
1189	}
1190#endif /* INET */
1191#ifdef INET6
1192	case AF_INET6:
1193		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1194		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1195		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1196		    oia.addr16[0], ia->addr16[0], u),
1197		    oia.addr16[1], ia->addr16[1], u),
1198		    oia.addr16[2], ia->addr16[2], u),
1199		    oia.addr16[3], ia->addr16[3], u),
1200		    oia.addr16[4], ia->addr16[4], u),
1201		    oia.addr16[5], ia->addr16[5], u),
1202		    oia.addr16[6], ia->addr16[6], u),
1203		    oia.addr16[7], ia->addr16[7], u);
1204		break;
1205#endif /* INET6 */
1206	}
1207	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
1208	PF_ACPY(oa, na, af);
1209	switch (af) {
1210#ifdef INET
1211	case AF_INET:
1212		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1213		    ooa.addr16[0], oa->addr16[0], 0),
1214		    ooa.addr16[1], oa->addr16[1], 0);
1215		break;
1216#endif /* INET */
1217#ifdef INET6
1218	case AF_INET6:
1219		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1220		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1221		    pf_cksum_fixup(pf_cksum_fixup(*ic,
1222		    ooa.addr16[0], oa->addr16[0], u),
1223		    ooa.addr16[1], oa->addr16[1], u),
1224		    ooa.addr16[2], oa->addr16[2], u),
1225		    ooa.addr16[3], oa->addr16[3], u),
1226		    ooa.addr16[4], oa->addr16[4], u),
1227		    ooa.addr16[5], oa->addr16[5], u),
1228		    ooa.addr16[6], oa->addr16[6], u),
1229		    ooa.addr16[7], oa->addr16[7], u);
1230		break;
1231#endif /* INET6 */
1232	}
1233}
1234
1235void
1236pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1237    const struct pf_addr *saddr, const struct pf_addr *daddr,
1238    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1239    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
1240{
1241	struct mbuf	*m;
1242	struct m_tag	*mtag;
1243	int		 len, tlen;
1244#ifdef INET
1245	struct ip	*h;
1246#endif /* INET */
1247#ifdef INET6
1248	struct ip6_hdr	*h6;
1249#endif /* INET6 */
1250	struct tcphdr	*th;
1251#if defined(__FreeBSD__)
1252	struct ip 	*ip;
1253#if (__FreeBSD_version < 501114)
1254	struct route 	 ro;
1255#endif
1256#endif
1257	char *opt;
1258
1259	/* maximum segment size tcp option */
1260	tlen = sizeof(struct tcphdr);
1261	if (mss)
1262		tlen += 4;
1263
1264	switch (af) {
1265#ifdef INET
1266	case AF_INET:
1267		len = sizeof(struct ip) + tlen;
1268		break;
1269#endif /* INET */
1270#ifdef INET6
1271	case AF_INET6:
1272		len = sizeof(struct ip6_hdr) + tlen;
1273		break;
1274#endif /* INET6 */
1275	}
1276
1277	/* create outgoing mbuf */
1278	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1279	if (mtag == NULL)
1280		return;
1281	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1282	if (m == NULL) {
1283		m_tag_free(mtag);
1284		return;
1285	}
1286	m_tag_prepend(m, mtag);
1287#ifdef ALTQ
1288	if (r != NULL && r->qid) {
1289		struct altq_tag *atag;
1290
1291		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1292		if (mtag != NULL) {
1293			atag = (struct altq_tag *)(mtag + 1);
1294			atag->qid = r->qid;
1295			/* add hints for ecn */
1296			atag->af = af;
1297			atag->hdr = mtod(m, struct ip *);
1298			m_tag_prepend(m, mtag);
1299		}
1300	}
1301#endif
1302	m->m_data += max_linkhdr;
1303	m->m_pkthdr.len = m->m_len = len;
1304	m->m_pkthdr.rcvif = NULL;
1305	bzero(m->m_data, len);
1306	switch (af) {
1307#ifdef INET
1308	case AF_INET:
1309		h = mtod(m, struct ip *);
1310
1311		/* IP header fields included in the TCP checksum */
1312		h->ip_p = IPPROTO_TCP;
1313		h->ip_len = htons(tlen);
1314		h->ip_src.s_addr = saddr->v4.s_addr;
1315		h->ip_dst.s_addr = daddr->v4.s_addr;
1316
1317		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1318		break;
1319#endif /* INET */
1320#ifdef INET6
1321	case AF_INET6:
1322		h6 = mtod(m, struct ip6_hdr *);
1323
1324		/* IP header fields included in the TCP checksum */
1325		h6->ip6_nxt = IPPROTO_TCP;
1326		h6->ip6_plen = htons(tlen);
1327		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1328		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1329
1330		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1331		break;
1332#endif /* INET6 */
1333	}
1334
1335	/* TCP header */
1336	th->th_sport = sport;
1337	th->th_dport = dport;
1338	th->th_seq = htonl(seq);
1339	th->th_ack = htonl(ack);
1340	th->th_off = tlen >> 2;
1341	th->th_flags = flags;
1342	th->th_win = htons(win);
1343
1344	if (mss) {
1345		opt = (char *)(th + 1);
1346		opt[0] = TCPOPT_MAXSEG;
1347		opt[1] = 4;
1348		HTONS(mss);
1349		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1350	}
1351
1352	switch (af) {
1353#ifdef INET
1354	case AF_INET:
1355		/* TCP checksum */
1356		th->th_sum = in_cksum(m, len);
1357
1358		/* Finish the IP header */
1359		h->ip_v = 4;
1360		h->ip_hl = sizeof(*h) >> 2;
1361		h->ip_tos = IPTOS_LOWDELAY;
1362#if defined(__FreeBSD__)
1363                h->ip_off = htons(path_mtu_discovery ? IP_DF : 0);
1364#else
1365		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
1366#endif
1367		h->ip_len = htons(len);
1368		h->ip_ttl = ttl ? ttl : ip_defttl;
1369		h->ip_sum = 0;
1370#if defined(__FreeBSD__)
1371		ip = mtod(m, struct ip *);
1372		/*
1373		 * XXX
1374		 * OpenBSD changed ip_len/ip_off byte ordering!
1375		 * Because FreeBSD assumes host byte ordering we need to
1376		 * change here.
1377		 */
1378		NTOHS(ip->ip_len);
1379		NTOHS(ip->ip_off);
1380#if (__FreeBSD_version < 501114)
1381		bzero(&ro, sizeof(ro));
1382		ip_rtaddr(ip->ip_dst, &ro);
1383		PF_UNLOCK();
1384		ip_output(m, (void *)NULL, &ro, 0, (void *)NULL,
1385			(void *)NULL);
1386		PF_LOCK();
1387		if(ro.ro_rt) {
1388			RTFREE(ro.ro_rt);
1389		}
1390#else /* __FreeBSD_version >= 501114 */
1391		PF_UNLOCK();
1392		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1393			(void *)NULL);
1394		PF_LOCK();
1395#endif
1396#else /* ! __FreeBSD__ */
1397		ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1398		    (void *)NULL);
1399#endif
1400		break;
1401#endif /* INET */
1402#ifdef INET6
1403	case AF_INET6:
1404		/* TCP checksum */
1405		th->th_sum = in6_cksum(m, IPPROTO_TCP,
1406		    sizeof(struct ip6_hdr), tlen);
1407
1408		h6->ip6_vfc |= IPV6_VERSION;
1409		h6->ip6_hlim = IPV6_DEFHLIM;
1410
1411#if defined(__FreeBSD__)
1412		PF_UNLOCK();
1413		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1414		PF_LOCK();
1415#else
1416		ip6_output(m, NULL, NULL, 0, NULL, NULL);
1417#endif
1418		break;
1419#endif /* INET6 */
1420	}
1421}
1422
1423void
1424pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1425    struct pf_rule *r)
1426{
1427	struct m_tag	*mtag;
1428	struct mbuf	*m0;
1429#if defined(__FreeBSD__)
1430	struct ip *ip;
1431#endif
1432
1433	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1434	if (mtag == NULL)
1435		return;
1436#if defined(__FreeBSD__)
1437	m0 = m_copypacket(m, M_DONTWAIT);
1438#else
1439	m0 = m_copy(m, 0, M_COPYALL);
1440#endif
1441	if (m0 == NULL) {
1442		m_tag_free(mtag);
1443		return;
1444	}
1445	m_tag_prepend(m0, mtag);
1446
1447#ifdef ALTQ
1448	if (r->qid) {
1449		struct altq_tag *atag;
1450
1451		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
1452		if (mtag != NULL) {
1453			atag = (struct altq_tag *)(mtag + 1);
1454			atag->qid = r->qid;
1455			/* add hints for ecn */
1456			atag->af = af;
1457			atag->hdr = mtod(m0, struct ip *);
1458			m_tag_prepend(m0, mtag);
1459		}
1460	}
1461#endif
1462
1463	switch (af) {
1464#ifdef INET
1465	case AF_INET:
1466#if defined(__FreeBSD__)
1467		/* icmp_error() expects host byte ordering */
1468		ip = mtod(m0, struct ip *);
1469		NTOHS(ip->ip_len);
1470		NTOHS(ip->ip_off);
1471		PF_UNLOCK();
1472#endif
1473		icmp_error(m0, type, code, 0, NULL);
1474#if defined(__FreeBSD__)
1475		PF_LOCK();
1476#endif
1477		break;
1478#endif /* INET */
1479#ifdef INET6
1480	case AF_INET6:
1481#if defined(__FreeBSD__)
1482		PF_UNLOCK();
1483#endif
1484		icmp6_error(m0, type, code, 0);
1485#if defined(__FreeBSD__)
1486		PF_LOCK();
1487#endif
1488		break;
1489#endif /* INET6 */
1490	}
1491}
1492
1493/*
1494 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1495 * If n is 0, they match if they are equal. If n is != 0, they match if they
1496 * are different.
1497 */
1498int
1499pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1500    struct pf_addr *b, sa_family_t af)
1501{
1502	int	match = 0;
1503
1504	switch (af) {
1505#ifdef INET
1506	case AF_INET:
1507		if ((a->addr32[0] & m->addr32[0]) ==
1508		    (b->addr32[0] & m->addr32[0]))
1509			match++;
1510		break;
1511#endif /* INET */
1512#ifdef INET6
1513	case AF_INET6:
1514		if (((a->addr32[0] & m->addr32[0]) ==
1515		     (b->addr32[0] & m->addr32[0])) &&
1516		    ((a->addr32[1] & m->addr32[1]) ==
1517		     (b->addr32[1] & m->addr32[1])) &&
1518		    ((a->addr32[2] & m->addr32[2]) ==
1519		     (b->addr32[2] & m->addr32[2])) &&
1520		    ((a->addr32[3] & m->addr32[3]) ==
1521		     (b->addr32[3] & m->addr32[3])))
1522			match++;
1523		break;
1524#endif /* INET6 */
1525	}
1526	if (match) {
1527		if (n)
1528			return (0);
1529		else
1530			return (1);
1531	} else {
1532		if (n)
1533			return (1);
1534		else
1535			return (0);
1536	}
1537}
1538
1539int
1540pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1541{
1542	switch (op) {
1543	case PF_OP_IRG:
1544		return ((p > a1) && (p < a2));
1545	case PF_OP_XRG:
1546		return ((p < a1) || (p > a2));
1547	case PF_OP_RRG:
1548		return ((p >= a1) && (p <= a2));
1549	case PF_OP_EQ:
1550		return (p == a1);
1551	case PF_OP_NE:
1552		return (p != a1);
1553	case PF_OP_LT:
1554		return (p < a1);
1555	case PF_OP_LE:
1556		return (p <= a1);
1557	case PF_OP_GT:
1558		return (p > a1);
1559	case PF_OP_GE:
1560		return (p >= a1);
1561	}
1562	return (0); /* never reached */
1563}
1564
1565int
1566pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1567{
1568	NTOHS(a1);
1569	NTOHS(a2);
1570	NTOHS(p);
1571	return (pf_match(op, a1, a2, p));
1572}
1573
1574int
1575pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1576{
1577	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1578		return (0);
1579	return (pf_match(op, a1, a2, u));
1580}
1581
1582int
1583pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1584{
1585	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1586		return (0);
1587	return (pf_match(op, a1, a2, g));
1588}
1589
1590struct pf_tag *
1591pf_get_tag(struct mbuf *m)
1592{
1593	struct m_tag	*mtag;
1594
1595	if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL)
1596		return ((struct pf_tag *)(mtag + 1));
1597	else
1598		return (NULL);
1599}
1600
1601int
1602pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat,
1603    struct pf_rule *rdr, struct pf_tag *pftag, int *tag)
1604{
1605	if (*tag == -1) {	/* find mbuf tag */
1606		pftag = pf_get_tag(m);
1607		if (pftag != NULL)
1608			*tag = pftag->tag;
1609		else
1610			*tag = 0;
1611		if (nat != NULL && nat->tag)
1612			*tag = nat->tag;
1613		if (rdr != NULL && rdr->tag)
1614			*tag = rdr->tag;
1615	}
1616
1617	return ((!r->match_tag_not && r->match_tag == *tag) ||
1618	    (r->match_tag_not && r->match_tag != *tag));
1619}
1620
1621int
1622pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag)
1623{
1624	struct m_tag	*mtag;
1625
1626	if (tag <= 0)
1627		return (0);
1628
1629	if (pftag == NULL) {
1630		mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT);
1631		if (mtag == NULL)
1632			return (1);
1633		((struct pf_tag *)(mtag + 1))->tag = tag;
1634		m_tag_prepend(m, mtag);
1635	} else
1636		pftag->tag = tag;
1637
1638	return (0);
1639}
1640
1641#define PF_STEP_INTO_ANCHOR(r, a, s, n)					\
1642	do {								\
1643		if ((r) == NULL || (r)->anchor == NULL ||		\
1644		    (s) != NULL || (a) != NULL)				\
1645			panic("PF_STEP_INTO_ANCHOR");			\
1646		(a) = (r);						\
1647		(s) = TAILQ_FIRST(&(r)->anchor->rulesets);		\
1648		(r) = NULL;						\
1649		while ((s) != NULL && ((r) =				\
1650		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1651			(s) = TAILQ_NEXT((s), entries);			\
1652		if ((r) == NULL) {					\
1653			(r) = TAILQ_NEXT((a), entries);			\
1654			(a) = NULL;					\
1655		}							\
1656	} while (0)
1657
1658#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n)				\
1659	do {								\
1660		if ((r) != NULL || (a) == NULL || (s) == NULL)		\
1661			panic("PF_STEP_OUT_OF_ANCHOR");			\
1662		(s) = TAILQ_NEXT((s), entries);				\
1663		while ((s) != NULL && ((r) =				\
1664		    TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)	\
1665			(s) = TAILQ_NEXT((s), entries);			\
1666		if ((r) == NULL) {					\
1667			(r) = TAILQ_NEXT((a), entries);			\
1668			(a) = NULL;					\
1669		}							\
1670	} while (0)
1671
1672#ifdef INET6
1673void
1674pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
1675    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
1676{
1677	switch (af) {
1678#ifdef INET
1679	case AF_INET:
1680		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1681		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1682		break;
1683#endif /* INET */
1684	case AF_INET6:
1685		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1686		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1687		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
1688		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
1689		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
1690		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
1691		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
1692		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
1693		break;
1694	}
1695}
1696
1697void
1698pf_addr_inc(struct pf_addr *addr, u_int8_t af)
1699{
1700	switch (af) {
1701#ifdef INET
1702	case AF_INET:
1703		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
1704		break;
1705#endif /* INET */
1706	case AF_INET6:
1707		if (addr->addr32[3] == 0xffffffff) {
1708			addr->addr32[3] = 0;
1709			if (addr->addr32[2] == 0xffffffff) {
1710				addr->addr32[2] = 0;
1711				if (addr->addr32[1] == 0xffffffff) {
1712					addr->addr32[1] = 0;
1713					addr->addr32[0] =
1714					    htonl(ntohl(addr->addr32[0]) + 1);
1715				} else
1716					addr->addr32[1] =
1717					    htonl(ntohl(addr->addr32[1]) + 1);
1718			} else
1719				addr->addr32[2] =
1720				    htonl(ntohl(addr->addr32[2]) + 1);
1721		} else
1722			addr->addr32[3] =
1723			    htonl(ntohl(addr->addr32[3]) + 1);
1724		break;
1725	}
1726}
1727#endif /* INET6 */
1728
1729#define mix(a,b,c) \
1730	do {					\
1731		a -= b; a -= c; a ^= (c >> 13);	\
1732		b -= c; b -= a; b ^= (a << 8);	\
1733		c -= a; c -= b; c ^= (b >> 13);	\
1734		a -= b; a -= c; a ^= (c >> 12);	\
1735		b -= c; b -= a; b ^= (a << 16);	\
1736		c -= a; c -= b; c ^= (b >> 5);	\
1737		a -= b; a -= c; a ^= (c >> 3);	\
1738		b -= c; b -= a; b ^= (a << 10);	\
1739		c -= a; c -= b; c ^= (b >> 15);	\
1740	} while (0)
1741
1742/*
1743 * hash function based on bridge_hash in if_bridge.c
1744 */
1745void
1746pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
1747    struct pf_poolhashkey *key, sa_family_t af)
1748{
1749	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
1750
1751	switch (af) {
1752#ifdef INET
1753	case AF_INET:
1754		a += inaddr->addr32[0];
1755		b += key->key32[1];
1756		mix(a, b, c);
1757		hash->addr32[0] = c + key->key32[2];
1758		break;
1759#endif /* INET */
1760#ifdef INET6
1761	case AF_INET6:
1762		a += inaddr->addr32[0];
1763		b += inaddr->addr32[2];
1764		mix(a, b, c);
1765		hash->addr32[0] = c;
1766		a += inaddr->addr32[1];
1767		b += inaddr->addr32[3];
1768		c += key->key32[1];
1769		mix(a, b, c);
1770		hash->addr32[1] = c;
1771		a += inaddr->addr32[2];
1772		b += inaddr->addr32[1];
1773		c += key->key32[2];
1774		mix(a, b, c);
1775		hash->addr32[2] = c;
1776		a += inaddr->addr32[3];
1777		b += inaddr->addr32[0];
1778		c += key->key32[3];
1779		mix(a, b, c);
1780		hash->addr32[3] = c;
1781		break;
1782#endif /* INET6 */
1783	}
1784}
1785
1786int
1787pf_map_addr(u_int8_t af, struct pf_pool *rpool, struct pf_addr *saddr,
1788    struct pf_addr *naddr, struct pf_addr *init_addr)
1789{
1790	unsigned char		 hash[16];
1791	struct pf_addr		*raddr;
1792	struct pf_addr		*rmask;
1793	struct pf_pooladdr	*acur = rpool->cur;
1794
1795	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
1796		return (1);
1797	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL &&
1798	    rpool->cur->addr.p.dyn->undefined)
1799		return (1);
1800	if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1801		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
1802			return (1); /* unsupported */
1803	} else {
1804		raddr = &rpool->cur->addr.v.a.addr;
1805		rmask = &rpool->cur->addr.v.a.mask;
1806	}
1807
1808	switch (rpool->opts & PF_POOL_TYPEMASK) {
1809	case PF_POOL_NONE:
1810		PF_ACPY(naddr, raddr, af);
1811		break;
1812	case PF_POOL_BITMASK:
1813		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
1814		break;
1815	case PF_POOL_RANDOM:
1816		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
1817			switch (af) {
1818#ifdef INET
1819			case AF_INET:
1820				rpool->counter.addr32[0] = arc4random();
1821				break;
1822#endif /* INET */
1823#ifdef INET6
1824			case AF_INET6:
1825				if (rmask->addr32[3] != 0xffffffff)
1826					rpool->counter.addr32[3] = arc4random();
1827				else
1828					break;
1829				if (rmask->addr32[2] != 0xffffffff)
1830					rpool->counter.addr32[2] = arc4random();
1831				else
1832					break;
1833				if (rmask->addr32[1] != 0xffffffff)
1834					rpool->counter.addr32[1] = arc4random();
1835				else
1836					break;
1837				if (rmask->addr32[0] != 0xffffffff)
1838					rpool->counter.addr32[0] = arc4random();
1839				break;
1840#endif /* INET6 */
1841			}
1842			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
1843			PF_ACPY(init_addr, naddr, af);
1844
1845		} else {
1846			PF_AINC(&rpool->counter, af);
1847			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
1848		}
1849		break;
1850	case PF_POOL_SRCHASH:
1851		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
1852		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
1853		break;
1854	case PF_POOL_ROUNDROBIN:
1855		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1856			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
1857			    &rpool->tblidx, &rpool->counter,
1858			    &raddr, &rmask, af))
1859				goto get_addr;
1860		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
1861			goto get_addr;
1862
1863	try_next:
1864		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
1865			rpool->cur = TAILQ_FIRST(&rpool->list);
1866		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1867			rpool->tblidx = -1;
1868			if (pfr_pool_get(rpool->cur->addr.p.tbl,
1869			    &rpool->tblidx, &rpool->counter,
1870			    &raddr, &rmask, af)) {
1871				/* table contain no address of type 'af' */
1872				if (rpool->cur != acur)
1873					goto try_next;
1874				return (1);
1875			}
1876		} else {
1877			raddr = &rpool->cur->addr.v.a.addr;
1878			rmask = &rpool->cur->addr.v.a.mask;
1879			PF_ACPY(&rpool->counter, raddr, af);
1880		}
1881
1882	get_addr:
1883		PF_ACPY(naddr, &rpool->counter, af);
1884		PF_AINC(&rpool->counter, af);
1885		break;
1886	}
1887
1888	if (pf_status.debug >= PF_DEBUG_MISC &&
1889	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1890		printf("pf_map_addr: selected address: ");
1891		pf_print_host(naddr, 0, af);
1892		printf("\n");
1893	}
1894
1895	return (0);
1896}
1897
1898int
1899pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_pool *rpool,
1900    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
1901    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high)
1902{
1903	struct pf_tree_node	key;
1904	struct pf_addr		init_addr;
1905	u_int16_t		cut;
1906
1907	bzero(&init_addr, sizeof(init_addr));
1908	if (pf_map_addr(af, rpool, saddr, naddr, &init_addr))
1909		return (1);
1910
1911	do {
1912		key.af = af;
1913		key.proto = proto;
1914		PF_ACPY(&key.addr[0], daddr, key.af);
1915		PF_ACPY(&key.addr[1], naddr, key.af);
1916		key.port[0] = dport;
1917
1918		/*
1919		 * port search; start random, step;
1920		 * similar 2 portloop in in_pcbbind
1921		 */
1922		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
1923			key.port[1] = 0;
1924			if (pf_find_state(&tree_ext_gwy, &key) == NULL)
1925				return (0);
1926		} else if (low == 0 && high == 0) {
1927			key.port[1] = *nport;
1928			if (pf_find_state(&tree_ext_gwy, &key) == NULL) {
1929				return (0);
1930			}
1931		} else if (low == high) {
1932			key.port[1] = htons(low);
1933			if (pf_find_state(&tree_ext_gwy, &key) == NULL) {
1934				*nport = htons(low);
1935				return (0);
1936			}
1937		} else {
1938			u_int16_t tmp;
1939
1940			if (low > high) {
1941				tmp = low;
1942				low = high;
1943				high = tmp;
1944			}
1945			/* low < high */
1946			cut = arc4random() % (1 + high - low) + low;
1947			/* low <= cut <= high */
1948			for (tmp = cut; tmp <= high; ++(tmp)) {
1949				key.port[1] = htons(tmp);
1950				if (pf_find_state(&tree_ext_gwy, &key) ==
1951				    NULL) {
1952					*nport = htons(tmp);
1953					return (0);
1954				}
1955			}
1956			for (tmp = cut - 1; tmp >= low; --(tmp)) {
1957				key.port[1] = htons(tmp);
1958				if (pf_find_state(&tree_ext_gwy, &key) ==
1959				    NULL) {
1960					*nport = htons(tmp);
1961					return (0);
1962				}
1963			}
1964		}
1965
1966		switch (rpool->opts & PF_POOL_TYPEMASK) {
1967		case PF_POOL_RANDOM:
1968		case PF_POOL_ROUNDROBIN:
1969			if (pf_map_addr(af, rpool, saddr, naddr, &init_addr))
1970				return (1);
1971			break;
1972		case PF_POOL_NONE:
1973		case PF_POOL_SRCHASH:
1974		case PF_POOL_BITMASK:
1975		default:
1976			return (1);
1977			break;
1978		}
1979	} while (! PF_AEQ(&init_addr, naddr, af) );
1980
1981	return (1);					/* none available */
1982}
1983
1984struct pf_rule *
1985pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
1986    int direction, struct ifnet *ifp, struct pf_addr *saddr, u_int16_t sport,
1987    struct pf_addr *daddr, u_int16_t dport, int rs_num)
1988{
1989	struct pf_rule		*r, *rm = NULL, *anchorrule = NULL;
1990	struct pf_ruleset	*ruleset = NULL;
1991
1992	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
1993	while (r && rm == NULL) {
1994		struct pf_rule_addr	*src = NULL, *dst = NULL;
1995		struct pf_addr_wrap	*xdst = NULL;
1996
1997		if (r->action == PF_BINAT && direction == PF_IN) {
1998			src = &r->dst;
1999			if (r->rpool.cur != NULL)
2000				xdst = &r->rpool.cur->addr;
2001		} else {
2002			src = &r->src;
2003			dst = &r->dst;
2004		}
2005
2006		r->evaluations++;
2007		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
2008		    (r->ifp == ifp && r->ifnot)))
2009			r = r->skip[PF_SKIP_IFP].ptr;
2010		else if (r->direction && r->direction != direction)
2011			r = r->skip[PF_SKIP_DIR].ptr;
2012		else if (r->af && r->af != pd->af)
2013			r = r->skip[PF_SKIP_AF].ptr;
2014		else if (r->proto && r->proto != pd->proto)
2015			r = r->skip[PF_SKIP_PROTO].ptr;
2016		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
2017			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2018			    PF_SKIP_DST_ADDR].ptr;
2019		else if (src->port_op && !pf_match_port(src->port_op,
2020		    src->port[0], src->port[1], sport))
2021			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2022			    PF_SKIP_DST_PORT].ptr;
2023		else if (dst != NULL &&
2024		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
2025			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2026		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2027			r = TAILQ_NEXT(r, entries);
2028		else if (dst != NULL && dst->port_op &&
2029		    !pf_match_port(dst->port_op, dst->port[0],
2030		    dst->port[1], dport))
2031			r = r->skip[PF_SKIP_DST_PORT].ptr;
2032		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2033		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2034		    off, pd->hdr.tcp), r->os_fingerprint)))
2035			r = TAILQ_NEXT(r, entries);
2036		else if (r->anchorname[0] && r->anchor == NULL)
2037			r = TAILQ_NEXT(r, entries);
2038		else if (r->anchor == NULL)
2039				rm = r;
2040		else
2041			PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
2042		if (r == NULL && anchorrule != NULL)
2043			PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
2044			    rs_num);
2045	}
2046	if (rm != NULL && (rm->action == PF_NONAT ||
2047	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2048		return (NULL);
2049	return (rm);
2050}
2051
2052struct pf_rule *
2053pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2054    struct ifnet *ifp,
2055    struct pf_addr *saddr, u_int16_t sport,
2056    struct pf_addr *daddr, u_int16_t dport,
2057    struct pf_addr *naddr, u_int16_t *nport)
2058{
2059	struct pf_rule	*r = NULL;
2060
2061	if (direction == PF_OUT) {
2062		r = pf_match_translation(pd, m, off, direction, ifp, saddr,
2063		    sport, daddr, dport, PF_RULESET_BINAT);
2064		if (r == NULL)
2065			r = pf_match_translation(pd, m, off, direction, ifp,
2066			    saddr, sport, daddr, dport, PF_RULESET_NAT);
2067	} else {
2068		r = pf_match_translation(pd, m, off, direction, ifp, saddr,
2069		    sport, daddr, dport, PF_RULESET_RDR);
2070		if (r == NULL)
2071			r = pf_match_translation(pd, m, off, direction, ifp,
2072			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
2073	}
2074
2075	if (r != NULL) {
2076		switch (r->action) {
2077		case PF_NONAT:
2078		case PF_NOBINAT:
2079		case PF_NORDR:
2080			return (NULL);
2081			break;
2082		case PF_NAT:
2083			if (pf_get_sport(pd->af, pd->proto, &r->rpool, saddr,
2084			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2085			    r->rpool.proxy_port[1])) {
2086				DPFPRINTF(PF_DEBUG_MISC,
2087				    ("pf: NAT proxy port allocation "
2088				    "(%u-%u) failed\n",
2089				    r->rpool.proxy_port[0],
2090				    r->rpool.proxy_port[1]));
2091				return (NULL);
2092			}
2093			break;
2094		case PF_BINAT:
2095			switch (direction) {
2096			case PF_OUT:
2097				if (r->rpool.cur->addr.type ==
2098				    PF_ADDR_DYNIFTL &&
2099				    r->rpool.cur->addr.p.dyn->undefined)
2100					return (NULL);
2101				else
2102					PF_POOLMASK(naddr,
2103					    &r->rpool.cur->addr.v.a.addr,
2104					    &r->rpool.cur->addr.v.a.mask,
2105					    saddr, pd->af);
2106				break;
2107			case PF_IN:
2108				if (r->src.addr.type == PF_ADDR_DYNIFTL &&
2109				    r->src.addr.p.dyn->undefined)
2110					return (NULL);
2111				else
2112					PF_POOLMASK(naddr,
2113					    &r->src.addr.v.a.addr,
2114					    &r->src.addr.v.a.mask, daddr,
2115					    pd->af);
2116				break;
2117			}
2118			break;
2119		case PF_RDR: {
2120			if (pf_map_addr(r->af, &r->rpool, saddr, naddr, NULL))
2121				return (NULL);
2122
2123			if (r->rpool.proxy_port[1]) {
2124				u_int32_t	tmp_nport;
2125
2126				tmp_nport = ((ntohs(dport) -
2127				    ntohs(r->dst.port[0])) %
2128				    (r->rpool.proxy_port[1] -
2129				    r->rpool.proxy_port[0] + 1)) +
2130				    r->rpool.proxy_port[0];
2131
2132				/* wrap around if necessary */
2133				if (tmp_nport > 65535)
2134					tmp_nport -= 65535;
2135				*nport = htons((u_int16_t)tmp_nport);
2136			} else if (r->rpool.proxy_port[0])
2137				*nport = htons(r->rpool.proxy_port[0]);
2138			break;
2139		}
2140		default:
2141			return (NULL);
2142			break;
2143		}
2144	}
2145
2146	return (r);
2147}
2148
2149int
2150pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, sa_family_t af,
2151    int proto, struct pf_pdesc *pd)
2152{
2153	struct pf_addr		*saddr, *daddr;
2154	u_int16_t		 sport, dport;
2155#if defined(__FreeBSD__)
2156	struct inpcbinfo	*pi;
2157#else
2158	struct inpcbtable	*tb;
2159#endif
2160	struct inpcb		*inp;
2161
2162	*uid = UID_MAX;
2163	*gid = GID_MAX;
2164	switch (proto) {
2165	case IPPROTO_TCP:
2166		sport = pd->hdr.tcp->th_sport;
2167		dport = pd->hdr.tcp->th_dport;
2168#if defined(__FreeBSD__)
2169		pi = &tcbinfo;
2170#else
2171		tb = &tcbtable;
2172#endif
2173		break;
2174	case IPPROTO_UDP:
2175		sport = pd->hdr.udp->uh_sport;
2176		dport = pd->hdr.udp->uh_dport;
2177#if defined(__FreeBSD__)
2178		pi = &udbinfo;
2179#else
2180		tb = &udbtable;
2181#endif
2182		break;
2183	default:
2184		return (0);
2185	}
2186	if (direction == PF_IN) {
2187		saddr = pd->src;
2188		daddr = pd->dst;
2189	} else {
2190		u_int16_t	p;
2191
2192		p = sport;
2193		sport = dport;
2194		dport = p;
2195		saddr = pd->dst;
2196		daddr = pd->src;
2197	}
2198	switch(af) {
2199	case AF_INET:
2200#if defined(__FreeBSD__)
2201#if (__FreeBSD_version >= 500043)
2202		INP_INFO_RLOCK(pi);	/* XXX LOR */
2203#endif
2204		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2205			dport, 0, NULL);
2206		if (inp == NULL) {
2207			inp = in_pcblookup_hash(pi, saddr->v4, sport,
2208			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
2209			if(inp == NULL) {
2210#if (__FreeBSD_version >= 500043)
2211				INP_INFO_RUNLOCK(pi);
2212#endif
2213				return (0);
2214			}
2215		}
2216#else
2217		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
2218		if (inp == NULL) {
2219			inp = in_pcblookup(tb, &saddr->v4, sport, &daddr->v4,
2220			    dport, INPLOOKUP_WILDCARD);
2221			if (inp == NULL)
2222				return (0);
2223		}
2224#endif
2225		break;
2226#ifdef INET6
2227	case AF_INET6:
2228#if defined(__FreeBSD__)
2229#if (__FreeBSD_version >= 500043)
2230		INP_INFO_RLOCK(pi);
2231#endif
2232		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2233			&daddr->v6, dport, 0, NULL);
2234		if (inp == NULL) {
2235			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2236			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2237			if (inp == NULL) {
2238#if (__FreeBSD_version >= 500043)
2239				INP_INFO_RUNLOCK(pi);
2240#endif
2241				return (0);
2242			}
2243		}
2244#else
2245		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
2246		    dport);
2247		if (inp == NULL) {
2248			inp = in_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
2249			    dport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6);
2250			if (inp == NULL)
2251				return (0);
2252		}
2253#endif
2254		break;
2255#endif /* INET6 */
2256
2257	default:
2258		return (0);
2259	}
2260#if defined(__FreeBSD__)
2261#if (__FreeBSD_version >= 500043)
2262	INP_LOCK(inp);
2263#endif
2264	*uid = inp->inp_socket->so_cred->cr_uid;
2265	*gid = inp->inp_socket->so_cred->cr_groups[0];
2266#if (__FreeBSD_version >= 500043)
2267	INP_UNLOCK(inp);
2268	INP_INFO_RUNLOCK(pi);
2269#endif
2270#else
2271	*uid = inp->inp_socket->so_euid;
2272	*gid = inp->inp_socket->so_egid;
2273#endif
2274	return (1);
2275}
2276
2277u_int8_t
2278pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2279{
2280	int		 hlen;
2281	u_int8_t	 hdr[60];
2282	u_int8_t	*opt, optlen;
2283	u_int8_t	 wscale = 0;
2284
2285	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
2286	if (hlen <= sizeof(struct tcphdr))
2287		return (0);
2288	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2289		return (0);
2290	opt = hdr + sizeof(struct tcphdr);
2291	hlen -= sizeof(struct tcphdr);
2292	while (hlen >= 3) {
2293		switch (*opt) {
2294		case TCPOPT_EOL:
2295		case TCPOPT_NOP:
2296			++opt;
2297			--hlen;
2298			break;
2299		case TCPOPT_WINDOW:
2300			wscale = opt[2];
2301			if (wscale > TCP_MAX_WINSHIFT)
2302				wscale = TCP_MAX_WINSHIFT;
2303			wscale |= PF_WSCALE_FLAG;
2304			/* fallthrough */
2305		default:
2306			optlen = opt[1];
2307			if (optlen < 2)
2308				optlen = 2;
2309			hlen -= optlen;
2310			opt += optlen;
2311		}
2312	}
2313	return (wscale);
2314}
2315
2316u_int16_t
2317pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2318{
2319	int		 hlen;
2320	u_int8_t	 hdr[60];
2321	u_int8_t	*opt, optlen;
2322	u_int16_t	 mss = tcp_mssdflt;
2323
2324	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
2325	if (hlen <= sizeof(struct tcphdr))
2326		return (0);
2327	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2328		return (0);
2329	opt = hdr + sizeof(struct tcphdr);
2330	hlen -= sizeof(struct tcphdr);
2331	while (hlen >= TCPOLEN_MAXSEG) {
2332		switch (*opt) {
2333		case TCPOPT_EOL:
2334		case TCPOPT_NOP:
2335			++opt;
2336			--hlen;
2337			break;
2338		case TCPOPT_MAXSEG:
2339			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2340			/* fallthrough */
2341		default:
2342			optlen = opt[1];
2343			if (optlen < 2)
2344				optlen = 2;
2345			hlen -= optlen;
2346			opt += optlen;
2347		}
2348	}
2349	return (mss);
2350}
2351
2352u_int16_t
2353pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2354{
2355#ifdef INET
2356	struct sockaddr_in	*dst;
2357	struct route		 ro;
2358#endif /* INET */
2359#ifdef INET6
2360	struct sockaddr_in6	*dst6;
2361	struct route_in6	 ro6;
2362#endif /* INET6 */
2363	struct rtentry		*rt = NULL;
2364	int			 hlen;
2365	u_int16_t		 mss = tcp_mssdflt;
2366
2367	switch (af) {
2368#ifdef INET
2369	case AF_INET:
2370		hlen = sizeof(struct ip);
2371		bzero(&ro, sizeof(ro));
2372		dst = (struct sockaddr_in *)&ro.ro_dst;
2373		dst->sin_family = AF_INET;
2374		dst->sin_len = sizeof(*dst);
2375		dst->sin_addr = addr->v4;
2376#if defined(__FreeBSD__)
2377#ifdef RTF_PRCLONING
2378		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2379#else /* !RTF_PRCLONING */
2380		rtalloc_ign(&ro, RTF_CLONING);
2381#endif
2382#else /* ! __FreeBSD__ */
2383		rtalloc_noclone(&ro, NO_CLONING);
2384#endif
2385		rt = ro.ro_rt;
2386		break;
2387#endif /* INET */
2388#ifdef INET6
2389	case AF_INET6:
2390		hlen = sizeof(struct ip6_hdr);
2391		bzero(&ro6, sizeof(ro6));
2392		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2393		dst6->sin6_family = AF_INET6;
2394		dst6->sin6_len = sizeof(*dst6);
2395		dst6->sin6_addr = addr->v6;
2396#if defined(__FreeBSD__)
2397#ifdef RTF_PRCLONING
2398		rtalloc_ign((struct route *)&ro6,
2399		    (RTF_CLONING | RTF_PRCLONING));
2400#else /* !RTF_PRCLONING */
2401		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
2402#endif
2403#else /* ! __FreeBSD__ */
2404		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
2405#endif
2406		rt = ro6.ro_rt;
2407		break;
2408#endif /* INET6 */
2409	}
2410
2411	if (rt && rt->rt_ifp) {
2412		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2413		mss = max(tcp_mssdflt, mss);
2414		RTFREE(rt);
2415	}
2416	mss = min(mss, offer);
2417	mss = max(mss, 64);		/* sanity - at least max opt space */
2418	return (mss);
2419}
2420
2421void
2422pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2423{
2424	struct pf_rule *r = s->rule.ptr;
2425
2426	s->rt_ifp = NULL;
2427	if (!r->rt || r->rt == PF_FASTROUTE)
2428		return;
2429	switch (s->af) {
2430#ifdef INET
2431	case AF_INET:
2432		pf_map_addr(AF_INET, &r->rpool, saddr,
2433		    &s->rt_addr, NULL);
2434		s->rt_ifp = r->rpool.cur->ifp;
2435		break;
2436#endif /* INET */
2437#ifdef INET6
2438	case AF_INET6:
2439		pf_map_addr(AF_INET6, &r->rpool, saddr,
2440		    &s->rt_addr, NULL);
2441		s->rt_ifp = r->rpool.cur->ifp;
2442		break;
2443#endif /* INET6 */
2444	}
2445}
2446
2447int
2448pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2449    struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h,
2450    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2451{
2452	struct pf_rule		*nat = NULL, *rdr = NULL;
2453	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2454	struct pf_addr		 baddr, naddr;
2455	struct tcphdr		*th = pd->hdr.tcp;
2456	u_int16_t		 bport, nport = 0;
2457	sa_family_t		 af = pd->af;
2458	int			 lookup = -1;
2459	uid_t			 uid;
2460	gid_t			 gid;
2461	struct pf_rule		*r, *a = NULL;
2462	struct pf_ruleset	*ruleset = NULL;
2463	u_short			 reason;
2464	int			 rewrite = 0;
2465	struct pf_tag		*pftag = NULL;
2466	int			 tag = -1;
2467	u_int16_t		 mss = tcp_mssdflt;
2468
2469	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2470
2471	if (direction == PF_OUT) {
2472		bport = nport = th->th_sport;
2473		/* check outgoing packet for BINAT/NAT */
2474		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp,
2475		    saddr, th->th_sport, daddr, th->th_dport,
2476		    &naddr, &nport)) != NULL) {
2477			PF_ACPY(&baddr, saddr, af);
2478			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2479			    &th->th_sum, &naddr, nport, 0, af);
2480			rewrite++;
2481			if (nat->natpass)
2482				r = NULL;
2483		}
2484	} else {
2485		bport = nport = th->th_dport;
2486		/* check incoming packet for BINAT/RDR */
2487		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr,
2488		    th->th_sport, daddr, th->th_dport,
2489		    &naddr, &nport)) != NULL) {
2490			PF_ACPY(&baddr, daddr, af);
2491			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2492			    &th->th_sum, &naddr, nport, 0, af);
2493			rewrite++;
2494			if (rdr->natpass)
2495				r = NULL;
2496		}
2497	}
2498
2499	while (r != NULL) {
2500		r->evaluations++;
2501		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
2502		    (r->ifp == ifp && r->ifnot)))
2503			r = r->skip[PF_SKIP_IFP].ptr;
2504		else if (r->direction && r->direction != direction)
2505			r = r->skip[PF_SKIP_DIR].ptr;
2506		else if (r->af && r->af != af)
2507			r = r->skip[PF_SKIP_AF].ptr;
2508		else if (r->proto && r->proto != IPPROTO_TCP)
2509			r = r->skip[PF_SKIP_PROTO].ptr;
2510		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2511			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2512		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2513		    r->src.port[0], r->src.port[1], th->th_sport))
2514			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2515		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2516			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2517		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2518		    r->dst.port[0], r->dst.port[1], th->th_dport))
2519			r = r->skip[PF_SKIP_DST_PORT].ptr;
2520		else if (r->tos && !(r->tos & pd->tos))
2521			r = TAILQ_NEXT(r, entries);
2522		else if (r->rule_flag & PFRULE_FRAGMENT)
2523			r = TAILQ_NEXT(r, entries);
2524		else if ((r->flagset & th->th_flags) != r->flags)
2525			r = TAILQ_NEXT(r, entries);
2526		else if (r->uid.op && (lookup != -1 || (lookup =
2527		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_TCP,
2528		    pd), 1)) &&
2529		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2530		    uid))
2531			r = TAILQ_NEXT(r, entries);
2532		else if (r->gid.op && (lookup != -1 || (lookup =
2533		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_TCP,
2534		    pd), 1)) &&
2535		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2536		    gid))
2537			r = TAILQ_NEXT(r, entries);
2538		else if (r->match_tag &&
2539		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
2540			r = TAILQ_NEXT(r, entries);
2541		else if (r->anchorname[0] && r->anchor == NULL)
2542			r = TAILQ_NEXT(r, entries);
2543		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
2544		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
2545			r = TAILQ_NEXT(r, entries);
2546		else {
2547			if (r->tag)
2548				tag = r->tag;
2549			if (r->anchor == NULL) {
2550				*rm = r;
2551				*am = a;
2552				*rsm = ruleset;
2553				if ((*rm)->quick)
2554					break;
2555				r = TAILQ_NEXT(r, entries);
2556			} else
2557				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2558				    PF_RULESET_FILTER);
2559		}
2560		if (r == NULL && a != NULL)
2561			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2562			    PF_RULESET_FILTER);
2563	}
2564	r = *rm;
2565	a = *am;
2566	ruleset = *rsm;
2567
2568	r->packets++;
2569	r->bytes += pd->tot_len;
2570	if (a != NULL) {
2571		a->packets++;
2572		a->bytes += pd->tot_len;
2573	}
2574	REASON_SET(&reason, PFRES_MATCH);
2575
2576	if (r->log) {
2577		if (rewrite)
2578			m_copyback(m, off, sizeof(*th), (caddr_t)th);
2579		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
2580	}
2581
2582	if ((r->action == PF_DROP) &&
2583	    ((r->rule_flag & PFRULE_RETURNRST) ||
2584	    (r->rule_flag & PFRULE_RETURNICMP) ||
2585	    (r->rule_flag & PFRULE_RETURN))) {
2586		/* undo NAT changes, if they have taken place */
2587		if (nat != NULL) {
2588			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2589			    &th->th_sum, &baddr, bport, 0, af);
2590			rewrite++;
2591		} else if (rdr != NULL) {
2592			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2593			    &th->th_sum, &baddr, bport, 0, af);
2594			rewrite++;
2595		}
2596		if (((r->rule_flag & PFRULE_RETURNRST) ||
2597		    (r->rule_flag & PFRULE_RETURN)) &&
2598		    !(th->th_flags & TH_RST)) {
2599			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
2600
2601			if (th->th_flags & TH_SYN)
2602				ack++;
2603			if (th->th_flags & TH_FIN)
2604				ack++;
2605			pf_send_tcp(r, af, pd->dst,
2606			    pd->src, th->th_dport, th->th_sport,
2607			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
2608			    r->return_ttl);
2609		} else if ((af == AF_INET) && r->return_icmp)
2610			pf_send_icmp(m, r->return_icmp >> 8,
2611			    r->return_icmp & 255, af, r);
2612		else if ((af == AF_INET6) && r->return_icmp6)
2613			pf_send_icmp(m, r->return_icmp6 >> 8,
2614			    r->return_icmp6 & 255, af, r);
2615	}
2616
2617	if (r->action == PF_DROP)
2618		return (PF_DROP);
2619
2620	if (pf_tag_packet(m, pftag, tag)) {
2621		REASON_SET(&reason, PFRES_MEMORY);
2622		return (PF_DROP);
2623	}
2624
2625	if (r->keep_state || nat != NULL || rdr != NULL ||
2626	    (pd->flags & PFDESC_TCP_NORM)) {
2627		/* create new state */
2628		u_int16_t	 len;
2629		struct pf_state	*s = NULL;
2630
2631		len = pd->tot_len - off - (th->th_off << 2);
2632		if (!r->max_states || r->states < r->max_states)
2633			s = pool_get(&pf_state_pl, PR_NOWAIT);
2634		if (s == NULL) {
2635			REASON_SET(&reason, PFRES_MEMORY);
2636			return (PF_DROP);
2637		}
2638		bzero(s, sizeof(*s));
2639		r->states++;
2640		if (a != NULL)
2641			a->states++;
2642		s->rule.ptr = r;
2643		if (nat != NULL)
2644			s->nat_rule.ptr = nat;
2645		else
2646			s->nat_rule.ptr = rdr;
2647		if (s->nat_rule.ptr != NULL)
2648			s->nat_rule.ptr->states++;
2649		s->anchor.ptr = a;
2650		s->allow_opts = r->allow_opts;
2651		s->log = r->log & 2;
2652		s->proto = IPPROTO_TCP;
2653		s->direction = direction;
2654		s->af = af;
2655		if (direction == PF_OUT) {
2656			PF_ACPY(&s->gwy.addr, saddr, af);
2657			s->gwy.port = th->th_sport;		/* sport */
2658			PF_ACPY(&s->ext.addr, daddr, af);
2659			s->ext.port = th->th_dport;
2660			if (nat != NULL) {
2661				PF_ACPY(&s->lan.addr, &baddr, af);
2662				s->lan.port = bport;
2663			} else {
2664				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2665				s->lan.port = s->gwy.port;
2666			}
2667		} else {
2668			PF_ACPY(&s->lan.addr, daddr, af);
2669			s->lan.port = th->th_dport;
2670			PF_ACPY(&s->ext.addr, saddr, af);
2671			s->ext.port = th->th_sport;
2672			if (rdr != NULL) {
2673				PF_ACPY(&s->gwy.addr, &baddr, af);
2674				s->gwy.port = bport;
2675			} else {
2676				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2677				s->gwy.port = s->lan.port;
2678			}
2679		}
2680
2681		s->src.seqlo = ntohl(th->th_seq);
2682		s->src.seqhi = s->src.seqlo + len + 1;
2683		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2684		    r->keep_state == PF_STATE_MODULATE) {
2685			/* Generate sequence number modulator */
2686			while ((s->src.seqdiff = arc4random()) == 0)
2687				;
2688			pf_change_a(&th->th_seq, &th->th_sum,
2689			    htonl(s->src.seqlo + s->src.seqdiff), 0);
2690			rewrite = 1;
2691		} else
2692			s->src.seqdiff = 0;
2693		if (th->th_flags & TH_SYN) {
2694			s->src.seqhi++;
2695			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
2696		}
2697		s->src.max_win = MAX(ntohs(th->th_win), 1);
2698		if (s->src.wscale & PF_WSCALE_MASK) {
2699			/* Remove scale factor from initial window */
2700			int win = s->src.max_win;
2701			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
2702			s->src.max_win = (win - 1) >>
2703			    (s->src.wscale & PF_WSCALE_MASK);
2704		}
2705		if (th->th_flags & TH_FIN)
2706			s->src.seqhi++;
2707		s->dst.seqhi = 1;
2708		s->dst.max_win = 1;
2709		s->src.state = TCPS_SYN_SENT;
2710		s->dst.state = TCPS_CLOSED;
2711#if defined(__FreeBSD__)
2712		s->creation = time_second;
2713		s->expire = time_second;
2714#else
2715		s->creation = time.tv_sec;
2716		s->expire = time.tv_sec;
2717#endif
2718		s->timeout = PFTM_TCP_FIRST_PACKET;
2719		s->packets[0] = 1;
2720		s->bytes[0] = pd->tot_len;
2721		pf_set_rt_ifp(s, saddr);
2722
2723		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
2724		    off, pd, th, &s->src, &s->dst)) {
2725			REASON_SET(&reason, PFRES_MEMORY);
2726			pool_put(&pf_state_pl, s);
2727			return (PF_DROP);
2728		}
2729		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
2730		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
2731		    &s->dst, &rewrite)) {
2732			pf_normalize_tcp_cleanup(s);
2733			pool_put(&pf_state_pl, s);
2734			return (PF_DROP);
2735		}
2736		if (pf_insert_state(s)) {
2737			pf_normalize_tcp_cleanup(s);
2738			REASON_SET(&reason, PFRES_MEMORY);
2739			pool_put(&pf_state_pl, s);
2740			return (PF_DROP);
2741		} else
2742			*sm = s;
2743		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2744		    r->keep_state == PF_STATE_SYNPROXY) {
2745			s->src.state = PF_TCPS_PROXY_SRC;
2746			if (nat != NULL)
2747				pf_change_ap(saddr, &th->th_sport,
2748				    pd->ip_sum, &th->th_sum, &baddr,
2749				    bport, 0, af);
2750			else if (rdr != NULL)
2751				pf_change_ap(daddr, &th->th_dport,
2752				    pd->ip_sum, &th->th_sum, &baddr,
2753				    bport, 0, af);
2754			s->src.seqhi = arc4random();
2755			/* Find mss option */
2756			mss = pf_get_mss(m, off, th->th_off, af);
2757			mss = pf_calc_mss(saddr, af, mss);
2758			mss = pf_calc_mss(daddr, af, mss);
2759			s->src.mss = mss;
2760			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
2761			    th->th_sport, s->src.seqhi,
2762			    ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0);
2763			return (PF_SYNPROXY_DROP);
2764		}
2765	}
2766
2767	/* copy back packet headers if we performed NAT operations */
2768	if (rewrite)
2769		m_copyback(m, off, sizeof(*th), (caddr_t)th);
2770
2771	return (PF_PASS);
2772}
2773
2774int
2775pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
2776    struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h,
2777    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2778{
2779	struct pf_rule		*nat = NULL, *rdr = NULL;
2780	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
2781	struct pf_addr		 baddr, naddr;
2782	struct udphdr		*uh = pd->hdr.udp;
2783	u_int16_t		 bport, nport = 0;
2784	sa_family_t		 af = pd->af;
2785	int			 lookup = -1;
2786	uid_t			 uid;
2787	gid_t			 gid;
2788	struct pf_rule		*r, *a = NULL;
2789	struct pf_ruleset	*ruleset = NULL;
2790	u_short			 reason;
2791	int			 rewrite = 0;
2792	struct pf_tag		*pftag = NULL;
2793	int			 tag = -1;
2794
2795	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2796
2797	if (direction == PF_OUT) {
2798		bport = nport = uh->uh_sport;
2799		/* check outgoing packet for BINAT/NAT */
2800		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp,
2801		    saddr, uh->uh_sport, daddr, uh->uh_dport,
2802		    &naddr, &nport)) != NULL) {
2803			PF_ACPY(&baddr, saddr, af);
2804			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
2805			    &uh->uh_sum, &naddr, nport, 1, af);
2806			rewrite++;
2807			if (nat->natpass)
2808				r = NULL;
2809		}
2810	} else {
2811		bport = nport = uh->uh_dport;
2812		/* check incoming packet for BINAT/RDR */
2813		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr,
2814		    uh->uh_sport, daddr, uh->uh_dport, &naddr, &nport))
2815		    != NULL) {
2816			PF_ACPY(&baddr, daddr, af);
2817			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
2818			    &uh->uh_sum, &naddr, nport, 1, af);
2819			rewrite++;
2820			if (rdr->natpass)
2821				r = NULL;
2822		}
2823	}
2824
2825	while (r != NULL) {
2826		r->evaluations++;
2827		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
2828		    (r->ifp == ifp && r->ifnot)))
2829			r = r->skip[PF_SKIP_IFP].ptr;
2830		else if (r->direction && r->direction != direction)
2831			r = r->skip[PF_SKIP_DIR].ptr;
2832		else if (r->af && r->af != af)
2833			r = r->skip[PF_SKIP_AF].ptr;
2834		else if (r->proto && r->proto != IPPROTO_UDP)
2835			r = r->skip[PF_SKIP_PROTO].ptr;
2836		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2837			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2838		else if (r->src.port_op && !pf_match_port(r->src.port_op,
2839		    r->src.port[0], r->src.port[1], uh->uh_sport))
2840			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2841		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2842			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2843		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2844		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
2845			r = r->skip[PF_SKIP_DST_PORT].ptr;
2846		else if (r->tos && !(r->tos & pd->tos))
2847			r = TAILQ_NEXT(r, entries);
2848		else if (r->rule_flag & PFRULE_FRAGMENT)
2849			r = TAILQ_NEXT(r, entries);
2850		else if (r->uid.op && (lookup != -1 || (lookup =
2851		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_UDP,
2852		    pd), 1)) &&
2853		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2854		    uid))
2855			r = TAILQ_NEXT(r, entries);
2856		else if (r->gid.op && (lookup != -1 || (lookup =
2857		    pf_socket_lookup(&uid, &gid, direction, af, IPPROTO_UDP,
2858		    pd), 1)) &&
2859		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2860		    gid))
2861			r = TAILQ_NEXT(r, entries);
2862		else if (r->match_tag &&
2863		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
2864			r = TAILQ_NEXT(r, entries);
2865		else if (r->anchorname[0] && r->anchor == NULL)
2866			r = TAILQ_NEXT(r, entries);
2867		else if (r->os_fingerprint != PF_OSFP_ANY)
2868			r = TAILQ_NEXT(r, entries);
2869		else {
2870			if (r->tag)
2871				tag = r->tag;
2872			if (r->anchor == NULL) {
2873				*rm = r;
2874				*am = a;
2875				*rsm = ruleset;
2876				if ((*rm)->quick)
2877					break;
2878				r = TAILQ_NEXT(r, entries);
2879			} else
2880				PF_STEP_INTO_ANCHOR(r, a, ruleset,
2881				    PF_RULESET_FILTER);
2882		}
2883		if (r == NULL && a != NULL)
2884			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2885			    PF_RULESET_FILTER);
2886	}
2887	r = *rm;
2888	a = *am;
2889	ruleset = *rsm;
2890
2891	r->packets++;
2892	r->bytes += pd->tot_len;
2893	if (a != NULL) {
2894		a->packets++;
2895		a->bytes += pd->tot_len;
2896	}
2897	REASON_SET(&reason, PFRES_MATCH);
2898
2899	if (r->log) {
2900		if (rewrite)
2901			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
2902		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
2903	}
2904
2905	if ((r->action == PF_DROP) &&
2906	    ((r->rule_flag & PFRULE_RETURNICMP) ||
2907	    (r->rule_flag & PFRULE_RETURN))) {
2908		/* undo NAT changes, if they have taken place */
2909		if (nat != NULL) {
2910			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
2911			    &uh->uh_sum, &baddr, bport, 1, af);
2912			rewrite++;
2913		} else if (rdr != NULL) {
2914			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
2915			    &uh->uh_sum, &baddr, bport, 1, af);
2916			rewrite++;
2917		}
2918		if ((af == AF_INET) && r->return_icmp)
2919			pf_send_icmp(m, r->return_icmp >> 8,
2920			    r->return_icmp & 255, af, r);
2921		else if ((af == AF_INET6) && r->return_icmp6)
2922			pf_send_icmp(m, r->return_icmp6 >> 8,
2923			    r->return_icmp6 & 255, af, r);
2924	}
2925
2926	if (r->action == PF_DROP)
2927		return (PF_DROP);
2928
2929	if (pf_tag_packet(m, pftag, tag)) {
2930		REASON_SET(&reason, PFRES_MEMORY);
2931		return (PF_DROP);
2932	}
2933
2934	if (r->keep_state || nat != NULL || rdr != NULL) {
2935		/* create new state */
2936		struct pf_state	*s = NULL;
2937
2938		if (!r->max_states || r->states < r->max_states)
2939			s = pool_get(&pf_state_pl, PR_NOWAIT);
2940		if (s == NULL) {
2941			REASON_SET(&reason, PFRES_MEMORY);
2942			return (PF_DROP);
2943		}
2944		bzero(s, sizeof(*s));
2945		r->states++;
2946		if (a != NULL)
2947			a->states++;
2948		s->rule.ptr = r;
2949		if (nat != NULL)
2950			s->nat_rule.ptr = nat;
2951		else
2952			s->nat_rule.ptr = rdr;
2953		if (s->nat_rule.ptr != NULL)
2954			s->nat_rule.ptr->states++;
2955		s->anchor.ptr = a;
2956		s->allow_opts = r->allow_opts;
2957		s->log = r->log & 2;
2958		s->proto = IPPROTO_UDP;
2959		s->direction = direction;
2960		s->af = af;
2961		if (direction == PF_OUT) {
2962			PF_ACPY(&s->gwy.addr, saddr, af);
2963			s->gwy.port = uh->uh_sport;
2964			PF_ACPY(&s->ext.addr, daddr, af);
2965			s->ext.port = uh->uh_dport;
2966			if (nat != NULL) {
2967				PF_ACPY(&s->lan.addr, &baddr, af);
2968				s->lan.port = bport;
2969			} else {
2970				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2971				s->lan.port = s->gwy.port;
2972			}
2973		} else {
2974			PF_ACPY(&s->lan.addr, daddr, af);
2975			s->lan.port = uh->uh_dport;
2976			PF_ACPY(&s->ext.addr, saddr, af);
2977			s->ext.port = uh->uh_sport;
2978			if (rdr != NULL) {
2979				PF_ACPY(&s->gwy.addr, &baddr, af);
2980				s->gwy.port = bport;
2981			} else {
2982				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2983				s->gwy.port = s->lan.port;
2984			}
2985		}
2986		s->src.state = PFUDPS_SINGLE;
2987		s->dst.state = PFUDPS_NO_TRAFFIC;
2988#if defined(__FreeBSD__)
2989		s->creation = time_second;
2990		s->expire = time_second;
2991#else
2992		s->creation = time.tv_sec;
2993		s->expire = time.tv_sec;
2994#endif
2995		s->timeout = PFTM_UDP_FIRST_PACKET;
2996		s->packets[0] = 1;
2997		s->bytes[0] = pd->tot_len;
2998		pf_set_rt_ifp(s, saddr);
2999		if (pf_insert_state(s)) {
3000			REASON_SET(&reason, PFRES_MEMORY);
3001			pool_put(&pf_state_pl, s);
3002			return (PF_DROP);
3003		} else
3004			*sm = s;
3005	}
3006
3007	/* copy back packet headers if we performed NAT operations */
3008	if (rewrite)
3009		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3010
3011	return (PF_PASS);
3012}
3013
3014int
3015pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3016    struct ifnet *ifp, struct mbuf *m, int ipoff, int off, void *h,
3017    struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3018{
3019	struct pf_rule		*nat = NULL, *rdr = NULL;
3020	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3021	struct pf_addr		 baddr, naddr;
3022	struct pf_rule		*r, *a = NULL;
3023	struct pf_ruleset	*ruleset = NULL;
3024	u_short			 reason;
3025	u_int16_t		 icmpid;
3026	sa_family_t		 af = pd->af;
3027	u_int8_t		 icmptype, icmpcode;
3028	int			 state_icmp = 0;
3029	struct pf_tag		*pftag = NULL;
3030	int			 tag = -1;
3031#ifdef INET6
3032	int			 rewrite = 0;
3033#endif /* INET6 */
3034
3035	switch (pd->proto) {
3036#ifdef INET
3037	case IPPROTO_ICMP:
3038		icmptype = pd->hdr.icmp->icmp_type;
3039		icmpcode = pd->hdr.icmp->icmp_code;
3040		icmpid = pd->hdr.icmp->icmp_id;
3041
3042		if (icmptype == ICMP_UNREACH ||
3043		    icmptype == ICMP_SOURCEQUENCH ||
3044		    icmptype == ICMP_REDIRECT ||
3045		    icmptype == ICMP_TIMXCEED ||
3046		    icmptype == ICMP_PARAMPROB)
3047			state_icmp++;
3048		break;
3049#endif /* INET */
3050#ifdef INET6
3051	case IPPROTO_ICMPV6:
3052		icmptype = pd->hdr.icmp6->icmp6_type;
3053		icmpcode = pd->hdr.icmp6->icmp6_code;
3054		icmpid = pd->hdr.icmp6->icmp6_id;
3055
3056		if (icmptype == ICMP6_DST_UNREACH ||
3057		    icmptype == ICMP6_PACKET_TOO_BIG ||
3058		    icmptype == ICMP6_TIME_EXCEEDED ||
3059		    icmptype == ICMP6_PARAM_PROB)
3060			state_icmp++;
3061		break;
3062#endif /* INET6 */
3063	}
3064
3065	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3066
3067	if (direction == PF_OUT) {
3068		/* check outgoing packet for BINAT/NAT */
3069		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, saddr, 0,
3070		    daddr, 0, &naddr, NULL)) != NULL) {
3071			PF_ACPY(&baddr, saddr, af);
3072			switch (af) {
3073#ifdef INET
3074			case AF_INET:
3075				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3076				    naddr.v4.s_addr, 0);
3077				break;
3078#endif /* INET */
3079#ifdef INET6
3080			case AF_INET6:
3081				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3082				    &naddr, 0);
3083				rewrite++;
3084				break;
3085#endif /* INET6 */
3086			}
3087			if (nat->natpass)
3088				r = NULL;
3089		}
3090	} else {
3091		/* check incoming packet for BINAT/RDR */
3092		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, 0,
3093		    daddr, 0, &naddr, NULL)) != NULL) {
3094			PF_ACPY(&baddr, daddr, af);
3095			switch (af) {
3096#ifdef INET
3097			case AF_INET:
3098				pf_change_a(&daddr->v4.s_addr,
3099				    pd->ip_sum, naddr.v4.s_addr, 0);
3100				break;
3101#endif /* INET */
3102#ifdef INET6
3103			case AF_INET6:
3104				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3105				    &naddr, 0);
3106				rewrite++;
3107				break;
3108#endif /* INET6 */
3109			}
3110			if (rdr->natpass)
3111				r = NULL;
3112		}
3113	}
3114
3115	while (r != NULL) {
3116		r->evaluations++;
3117		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
3118		    (r->ifp == ifp && r->ifnot)))
3119			r = r->skip[PF_SKIP_IFP].ptr;
3120		else if (r->direction && r->direction != direction)
3121			r = r->skip[PF_SKIP_DIR].ptr;
3122		else if (r->af && r->af != af)
3123			r = r->skip[PF_SKIP_AF].ptr;
3124		else if (r->proto && r->proto != pd->proto)
3125			r = r->skip[PF_SKIP_PROTO].ptr;
3126		else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3127			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3128		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3129			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3130		else if (r->type && r->type != icmptype + 1)
3131			r = TAILQ_NEXT(r, entries);
3132		else if (r->code && r->code != icmpcode + 1)
3133			r = TAILQ_NEXT(r, entries);
3134		else if (r->tos && !(r->tos & pd->tos))
3135			r = TAILQ_NEXT(r, entries);
3136		else if (r->rule_flag & PFRULE_FRAGMENT)
3137			r = TAILQ_NEXT(r, entries);
3138		else if (r->match_tag &&
3139		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
3140			r = TAILQ_NEXT(r, entries);
3141		else if (r->anchorname[0] && r->anchor == NULL)
3142			r = TAILQ_NEXT(r, entries);
3143		else if (r->os_fingerprint != PF_OSFP_ANY)
3144			r = TAILQ_NEXT(r, entries);
3145		else {
3146			if (r->tag)
3147				tag = r->tag;
3148			if (r->anchor == NULL) {
3149				*rm = r;
3150				*am = a;
3151				*rsm = ruleset;
3152				if ((*rm)->quick)
3153					break;
3154				r = TAILQ_NEXT(r, entries);
3155			} else
3156				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3157				    PF_RULESET_FILTER);
3158		}
3159		if (r == NULL && a != NULL)
3160			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3161			    PF_RULESET_FILTER);
3162	}
3163	r = *rm;
3164	a = *am;
3165	ruleset = *rsm;
3166
3167	r->packets++;
3168	r->bytes += pd->tot_len;
3169	if (a != NULL) {
3170		a->packets++;
3171		a->bytes += pd->tot_len;
3172	}
3173	REASON_SET(&reason, PFRES_MATCH);
3174
3175	if (r->log) {
3176#ifdef INET6
3177		if (rewrite)
3178			m_copyback(m, off, sizeof(struct icmp6_hdr),
3179			    (caddr_t)pd->hdr.icmp6);
3180#endif /* INET6 */
3181		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
3182	}
3183
3184	if (r->action != PF_PASS)
3185		return (PF_DROP);
3186
3187	if (pf_tag_packet(m, pftag, tag)) {
3188		REASON_SET(&reason, PFRES_MEMORY);
3189		return (PF_DROP);
3190	}
3191
3192	if (!state_icmp && (r->keep_state ||
3193	    nat != NULL || rdr != NULL)) {
3194		/* create new state */
3195		struct pf_state	*s = NULL;
3196
3197		if (!r->max_states || r->states < r->max_states)
3198			s = pool_get(&pf_state_pl, PR_NOWAIT);
3199		if (s == NULL) {
3200			REASON_SET(&reason, PFRES_MEMORY);
3201			return (PF_DROP);
3202		}
3203		bzero(s, sizeof(*s));
3204		r->states++;
3205		if (a != NULL)
3206			a->states++;
3207		s->rule.ptr = r;
3208		if (nat != NULL)
3209			s->nat_rule.ptr = nat;
3210		else
3211			s->nat_rule.ptr = rdr;
3212		if (s->nat_rule.ptr != NULL)
3213			s->nat_rule.ptr->states++;
3214		s->anchor.ptr = a;
3215		s->allow_opts = r->allow_opts;
3216		s->log = r->log & 2;
3217		s->proto = pd->proto;
3218		s->direction = direction;
3219		s->af = af;
3220		if (direction == PF_OUT) {
3221			PF_ACPY(&s->gwy.addr, saddr, af);
3222			s->gwy.port = icmpid;
3223			PF_ACPY(&s->ext.addr, daddr, af);
3224			s->ext.port = icmpid;
3225			if (nat != NULL)
3226				PF_ACPY(&s->lan.addr, &baddr, af);
3227			else
3228				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3229			s->lan.port = icmpid;
3230		} else {
3231			PF_ACPY(&s->lan.addr, daddr, af);
3232			s->lan.port = icmpid;
3233			PF_ACPY(&s->ext.addr, saddr, af);
3234			s->ext.port = icmpid;
3235			if (rdr != NULL)
3236				PF_ACPY(&s->gwy.addr, &baddr, af);
3237			else
3238				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3239			s->gwy.port = icmpid;
3240		}
3241
3242#if defined(__FreeBSD__)
3243		s->creation = time_second;
3244		s->expire = time_second;
3245#else
3246		s->creation = time.tv_sec;
3247		s->expire = time.tv_sec;
3248#endif
3249		s->timeout = PFTM_ICMP_FIRST_PACKET;
3250		s->packets[0] = 1;
3251		s->bytes[0] = pd->tot_len;
3252		pf_set_rt_ifp(s, saddr);
3253		if (pf_insert_state(s)) {
3254			REASON_SET(&reason, PFRES_MEMORY);
3255			pool_put(&pf_state_pl, s);
3256			return (PF_DROP);
3257		} else
3258			*sm = s;
3259	}
3260
3261#ifdef INET6
3262	/* copy back packet headers if we performed IPv6 NAT operations */
3263	if (rewrite)
3264		m_copyback(m, off, sizeof(struct icmp6_hdr),
3265		    (caddr_t)pd->hdr.icmp6);
3266#endif /* INET6 */
3267
3268	return (PF_PASS);
3269}
3270
3271int
3272pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3273    struct ifnet *ifp, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3274    struct pf_rule **am, struct pf_ruleset **rsm)
3275{
3276	struct pf_rule		*nat = NULL, *rdr = NULL;
3277	struct pf_rule		*r, *a = NULL;
3278	struct pf_ruleset	*ruleset = NULL;
3279	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
3280	struct pf_addr		 baddr, naddr;
3281	sa_family_t		 af = pd->af;
3282	u_short			 reason;
3283	struct pf_tag		*pftag = NULL;
3284	int			 tag = -1;
3285
3286	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3287
3288	if (direction == PF_OUT) {
3289		/* check outgoing packet for BINAT/NAT */
3290		if ((nat = pf_get_translation(pd, m, off, PF_OUT, ifp, saddr, 0,
3291		    daddr, 0, &naddr, NULL)) != NULL) {
3292			PF_ACPY(&baddr, saddr, af);
3293			switch (af) {
3294#ifdef INET
3295			case AF_INET:
3296				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3297				    naddr.v4.s_addr, 0);
3298				break;
3299#endif /* INET */
3300#ifdef INET6
3301			case AF_INET6:
3302				PF_ACPY(saddr, &naddr, af);
3303				break;
3304#endif /* INET6 */
3305			}
3306			if (nat->natpass)
3307				r = NULL;
3308		}
3309	} else {
3310		/* check incoming packet for BINAT/RDR */
3311		if ((rdr = pf_get_translation(pd, m, off, PF_IN, ifp, saddr, 0,
3312		    daddr, 0, &naddr, NULL)) != NULL) {
3313			PF_ACPY(&baddr, daddr, af);
3314			switch (af) {
3315#ifdef INET
3316			case AF_INET:
3317				pf_change_a(&daddr->v4.s_addr,
3318				    pd->ip_sum, naddr.v4.s_addr, 0);
3319				break;
3320#endif /* INET */
3321#ifdef INET6
3322			case AF_INET6:
3323				PF_ACPY(daddr, &naddr, af);
3324				break;
3325#endif /* INET6 */
3326			}
3327			if (rdr->natpass)
3328				r = NULL;
3329		}
3330	}
3331
3332	while (r != NULL) {
3333		r->evaluations++;
3334		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
3335		    (r->ifp == ifp && r->ifnot)))
3336			r = r->skip[PF_SKIP_IFP].ptr;
3337		else if (r->direction && r->direction != direction)
3338			r = r->skip[PF_SKIP_DIR].ptr;
3339		else if (r->af && r->af != af)
3340			r = r->skip[PF_SKIP_AF].ptr;
3341		else if (r->proto && r->proto != pd->proto)
3342			r = r->skip[PF_SKIP_PROTO].ptr;
3343		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3344			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3345		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3346			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3347		else if (r->tos && !(r->tos & pd->tos))
3348			r = TAILQ_NEXT(r, entries);
3349		else if (r->rule_flag & PFRULE_FRAGMENT)
3350			r = TAILQ_NEXT(r, entries);
3351		else if (r->match_tag &&
3352		    !pf_match_tag(m, r, nat, rdr, pftag, &tag))
3353			r = TAILQ_NEXT(r, entries);
3354		else if (r->anchorname[0] && r->anchor == NULL)
3355			r = TAILQ_NEXT(r, entries);
3356		else if (r->os_fingerprint != PF_OSFP_ANY)
3357			r = TAILQ_NEXT(r, entries);
3358		else {
3359			if (r->tag)
3360				tag = r->tag;
3361			if (r->anchor == NULL) {
3362				*rm = r;
3363				*am = a;
3364				*rsm = ruleset;
3365				if ((*rm)->quick)
3366					break;
3367				r = TAILQ_NEXT(r, entries);
3368			} else
3369				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3370				    PF_RULESET_FILTER);
3371		}
3372		if (r == NULL && a != NULL)
3373			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3374			    PF_RULESET_FILTER);
3375	}
3376	r = *rm;
3377	a = *am;
3378	ruleset = *rsm;
3379
3380	r->packets++;
3381	r->bytes += pd->tot_len;
3382	if (a != NULL) {
3383		a->packets++;
3384		a->bytes += pd->tot_len;
3385	}
3386	REASON_SET(&reason, PFRES_MATCH);
3387	if (r->log)
3388		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
3389
3390	if ((r->action == PF_DROP) &&
3391	    ((r->rule_flag & PFRULE_RETURNICMP) ||
3392	    (r->rule_flag & PFRULE_RETURN))) {
3393		struct pf_addr *a = NULL;
3394
3395		if (nat != NULL)
3396			a = saddr;
3397		else if (rdr != NULL)
3398			a = daddr;
3399		if (a != NULL) {
3400			switch (af) {
3401#ifdef INET
3402			case AF_INET:
3403				pf_change_a(&a->v4.s_addr, pd->ip_sum,
3404				    baddr.v4.s_addr, 0);
3405				break;
3406#endif /* INET */
3407#ifdef INET6
3408			case AF_INET6:
3409				PF_ACPY(a, &baddr, af);
3410				break;
3411#endif /* INET6 */
3412			}
3413		}
3414		if ((af == AF_INET) && r->return_icmp)
3415			pf_send_icmp(m, r->return_icmp >> 8,
3416			    r->return_icmp & 255, af, r);
3417		else if ((af == AF_INET6) && r->return_icmp6)
3418			pf_send_icmp(m, r->return_icmp6 >> 8,
3419			    r->return_icmp6 & 255, af, r);
3420	}
3421
3422	if (r->action != PF_PASS)
3423		return (PF_DROP);
3424
3425	if (pf_tag_packet(m, pftag, tag)) {
3426		REASON_SET(&reason, PFRES_MEMORY);
3427		return (PF_DROP);
3428	}
3429
3430	if (r->keep_state || nat != NULL || rdr != NULL) {
3431		/* create new state */
3432		struct pf_state	*s = NULL;
3433
3434		if (!r->max_states || r->states < r->max_states)
3435			s = pool_get(&pf_state_pl, PR_NOWAIT);
3436		if (s == NULL) {
3437			REASON_SET(&reason, PFRES_MEMORY);
3438			return (PF_DROP);
3439		}
3440		bzero(s, sizeof(*s));
3441		r->states++;
3442		if (a != NULL)
3443			a->states++;
3444		s->rule.ptr = r;
3445		if (nat != NULL)
3446			s->nat_rule.ptr = nat;
3447		else
3448			s->nat_rule.ptr = rdr;
3449		if (s->nat_rule.ptr != NULL)
3450			s->nat_rule.ptr->states++;
3451		s->anchor.ptr = a;
3452		s->allow_opts = r->allow_opts;
3453		s->log = r->log & 2;
3454		s->proto = pd->proto;
3455		s->direction = direction;
3456		s->af = af;
3457		if (direction == PF_OUT) {
3458			PF_ACPY(&s->gwy.addr, saddr, af);
3459			PF_ACPY(&s->ext.addr, daddr, af);
3460			if (nat != NULL)
3461				PF_ACPY(&s->lan.addr, &baddr, af);
3462			else
3463				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3464		} else {
3465			PF_ACPY(&s->lan.addr, daddr, af);
3466			PF_ACPY(&s->ext.addr, saddr, af);
3467			if (rdr != NULL)
3468				PF_ACPY(&s->gwy.addr, &baddr, af);
3469			else
3470				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3471		}
3472		s->src.state = PFOTHERS_SINGLE;
3473		s->dst.state = PFOTHERS_NO_TRAFFIC;
3474#if defined(__FreeBSD__)
3475		s->creation = time_second;
3476		s->expire = time_second;
3477#else
3478		s->creation = time.tv_sec;
3479		s->expire = time.tv_sec;
3480#endif
3481		s->timeout = PFTM_OTHER_FIRST_PACKET;
3482		s->packets[0] = 1;
3483		s->bytes[0] = pd->tot_len;
3484		pf_set_rt_ifp(s, saddr);
3485		if (pf_insert_state(s)) {
3486			REASON_SET(&reason, PFRES_MEMORY);
3487			if (r->log)
3488				PFLOG_PACKET(ifp, h, m, af, direction, reason,
3489				    r, a, ruleset);
3490			pool_put(&pf_state_pl, s);
3491			return (PF_DROP);
3492		} else
3493			*sm = s;
3494	}
3495
3496	return (PF_PASS);
3497}
3498
3499int
3500pf_test_fragment(struct pf_rule **rm, int direction, struct ifnet *ifp,
3501    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3502    struct pf_ruleset **rsm)
3503{
3504	struct pf_rule		*r, *a = NULL;
3505	struct pf_ruleset	*ruleset = NULL;
3506	sa_family_t		 af = pd->af;
3507	u_short			 reason;
3508	struct pf_tag		*pftag = NULL;
3509	int			 tag = -1;
3510
3511	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3512	while (r != NULL) {
3513		r->evaluations++;
3514		if (r->ifp != NULL && ((r->ifp != ifp && !r->ifnot) ||
3515		    (r->ifp == ifp && r->ifnot)))
3516			r = r->skip[PF_SKIP_IFP].ptr;
3517		else if (r->direction && r->direction != direction)
3518			r = r->skip[PF_SKIP_DIR].ptr;
3519		else if (r->af && r->af != af)
3520			r = r->skip[PF_SKIP_AF].ptr;
3521		else if (r->proto && r->proto != pd->proto)
3522			r = r->skip[PF_SKIP_PROTO].ptr;
3523		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3524			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3525		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3526			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3527		else if (r->tos && !(r->tos & pd->tos))
3528			r = TAILQ_NEXT(r, entries);
3529		else if (r->src.port_op || r->dst.port_op ||
3530		    r->flagset || r->type || r->code ||
3531		    r->os_fingerprint != PF_OSFP_ANY)
3532			r = TAILQ_NEXT(r, entries);
3533		else if (r->match_tag &&
3534		    !pf_match_tag(m, r, NULL, NULL, pftag, &tag))
3535			r = TAILQ_NEXT(r, entries);
3536		else if (r->anchorname[0] && r->anchor == NULL)
3537			r = TAILQ_NEXT(r, entries);
3538		else {
3539			if (r->anchor == NULL) {
3540				*rm = r;
3541				*am = a;
3542				*rsm = ruleset;
3543				if ((*rm)->quick)
3544					break;
3545				r = TAILQ_NEXT(r, entries);
3546			} else
3547				PF_STEP_INTO_ANCHOR(r, a, ruleset,
3548				    PF_RULESET_FILTER);
3549		}
3550		if (r == NULL && a != NULL)
3551			PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3552			    PF_RULESET_FILTER);
3553	}
3554	r = *rm;
3555	a = *am;
3556	ruleset = *rsm;
3557
3558	r->packets++;
3559	r->bytes += pd->tot_len;
3560	if (a != NULL) {
3561		a->packets++;
3562		a->bytes += pd->tot_len;
3563	}
3564	REASON_SET(&reason, PFRES_MATCH);
3565	if (r->log)
3566		PFLOG_PACKET(ifp, h, m, af, direction, reason, r, a, ruleset);
3567
3568	if (r->action != PF_PASS)
3569		return (PF_DROP);
3570
3571	if (pf_tag_packet(m, pftag, tag)) {
3572		REASON_SET(&reason, PFRES_MEMORY);
3573		return (PF_DROP);
3574	}
3575
3576	return (PF_PASS);
3577}
3578
3579int
3580pf_test_state_tcp(struct pf_state **state, int direction, struct ifnet *ifp,
3581    struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd,
3582    u_short *reason)
3583{
3584	struct pf_tree_node	 key;
3585	struct tcphdr		*th = pd->hdr.tcp;
3586	u_int16_t		 win = ntohs(th->th_win);
3587	u_int32_t		 ack, end, seq;
3588	u_int8_t		 sws, dws;
3589	int			 ackskew, dirndx;
3590	int			 copyback = 0;
3591	struct pf_state_peer	*src, *dst;
3592
3593	key.af = pd->af;
3594	key.proto = IPPROTO_TCP;
3595	PF_ACPY(&key.addr[0], pd->src, key.af);
3596	PF_ACPY(&key.addr[1], pd->dst, key.af);
3597	key.port[0] = th->th_sport;
3598	key.port[1] = th->th_dport;
3599
3600	STATE_LOOKUP();
3601
3602	if (direction == (*state)->direction) {
3603		src = &(*state)->src;
3604		dst = &(*state)->dst;
3605		dirndx = 0;
3606	} else {
3607		src = &(*state)->dst;
3608		dst = &(*state)->src;
3609		dirndx = 1;
3610	}
3611
3612	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3613		if (direction != (*state)->direction)
3614			return (PF_SYNPROXY_DROP);
3615		if (th->th_flags & TH_SYN) {
3616			if (ntohl(th->th_seq) != (*state)->src.seqlo)
3617				return (PF_DROP);
3618			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3619			    pd->src, th->th_dport, th->th_sport,
3620			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3621			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0);
3622			return (PF_SYNPROXY_DROP);
3623		} else if (!(th->th_flags & TH_ACK) ||
3624		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3625		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3626			return (PF_DROP);
3627		else
3628			(*state)->src.state = PF_TCPS_PROXY_DST;
3629	}
3630	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3631		struct pf_state_host *src, *dst;
3632
3633		if (direction == PF_OUT) {
3634			src = &(*state)->gwy;
3635			dst = &(*state)->ext;
3636		} else {
3637			src = &(*state)->ext;
3638			dst = &(*state)->lan;
3639		}
3640		if (direction == (*state)->direction) {
3641			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3642			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3643			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1))
3644				return (PF_DROP);
3645			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3646			if ((*state)->dst.seqhi == 1)
3647				(*state)->dst.seqhi = arc4random();
3648			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3649			    &dst->addr, src->port, dst->port,
3650			    (*state)->dst.seqhi, 0, TH_SYN, 0, (*state)->src.mss, 0);
3651			return (PF_SYNPROXY_DROP);
3652		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3653		    (TH_SYN|TH_ACK)) ||
3654		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1))
3655			return (PF_DROP);
3656		else {
3657			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3658			(*state)->dst.seqlo = ntohl(th->th_seq);
3659			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3660			    pd->src, th->th_dport, th->th_sport,
3661			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3662			    TH_ACK, (*state)->src.max_win, 0, 0);
3663			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3664			    &dst->addr, src->port, dst->port,
3665			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3666			    TH_ACK, (*state)->dst.max_win, 0, 0);
3667			(*state)->src.seqdiff = (*state)->dst.seqhi -
3668			    (*state)->src.seqlo;
3669			(*state)->dst.seqdiff = (*state)->src.seqhi -
3670			    (*state)->dst.seqlo;
3671			(*state)->src.seqhi = (*state)->src.seqlo +
3672			    (*state)->src.max_win;
3673			(*state)->dst.seqhi = (*state)->dst.seqlo +
3674			    (*state)->dst.max_win;
3675			(*state)->src.wscale = (*state)->dst.wscale = 0;
3676			(*state)->src.state = (*state)->dst.state =
3677			    TCPS_ESTABLISHED;
3678			return (PF_SYNPROXY_DROP);
3679		}
3680	}
3681
3682	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
3683		sws = src->wscale & PF_WSCALE_MASK;
3684		dws = dst->wscale & PF_WSCALE_MASK;
3685	} else
3686		sws = dws = 0;
3687
3688	/*
3689	 * Sequence tracking algorithm from Guido van Rooij's paper:
3690	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
3691	 *	tcp_filtering.ps
3692	 */
3693
3694	seq = ntohl(th->th_seq);
3695	if (src->seqlo == 0) {
3696		/* First packet from this end. Set its state */
3697
3698		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
3699		    src->scrub == NULL) {
3700			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
3701				REASON_SET(reason, PFRES_MEMORY);
3702				return (PF_DROP);
3703			}
3704		}
3705
3706		/* Deferred generation of sequence number modulator */
3707		if (dst->seqdiff && !src->seqdiff) {
3708			while ((src->seqdiff = arc4random()) == 0)
3709				;
3710			ack = ntohl(th->th_ack) - dst->seqdiff;
3711			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3712			    src->seqdiff), 0);
3713			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3714			copyback = 1;
3715		} else {
3716			ack = ntohl(th->th_ack);
3717		}
3718
3719		end = seq + pd->p_len;
3720		if (th->th_flags & TH_SYN) {
3721			end++;
3722			if (dst->wscale & PF_WSCALE_FLAG) {
3723				src->wscale = pf_get_wscale(m, off, th->th_off,
3724				    pd->af);
3725				if (src->wscale & PF_WSCALE_FLAG) {
3726					/* Remove scale factor from initial
3727					 * window */
3728					sws = src->wscale & PF_WSCALE_MASK;
3729					win = ((u_int32_t)win + (1 << sws) - 1)
3730					    >> sws;
3731					dws = dst->wscale & PF_WSCALE_MASK;
3732				} else {
3733					/* fixup other window */
3734					dst->max_win <<= dst->wscale &
3735					    PF_WSCALE_MASK;
3736					/* in case of a retrans SYN|ACK */
3737					dst->wscale = 0;
3738				}
3739			}
3740		}
3741		if (th->th_flags & TH_FIN)
3742			end++;
3743
3744		src->seqlo = seq;
3745		if (src->state < TCPS_SYN_SENT)
3746			src->state = TCPS_SYN_SENT;
3747
3748		/*
3749		 * May need to slide the window (seqhi may have been set by
3750		 * the crappy stack check or if we picked up the connection
3751		 * after establishment)
3752		 */
3753		if (src->seqhi == 1 ||
3754		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
3755			src->seqhi = end + MAX(1, dst->max_win << dws);
3756		if (win > src->max_win)
3757			src->max_win = win;
3758
3759	} else {
3760		ack = ntohl(th->th_ack) - dst->seqdiff;
3761		if (src->seqdiff) {
3762			/* Modulate sequence numbers */
3763			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3764			    src->seqdiff), 0);
3765			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3766			copyback = 1;
3767		}
3768		end = seq + pd->p_len;
3769		if (th->th_flags & TH_SYN)
3770			end++;
3771		if (th->th_flags & TH_FIN)
3772			end++;
3773	}
3774
3775	if ((th->th_flags & TH_ACK) == 0) {
3776		/* Let it pass through the ack skew check */
3777		ack = dst->seqlo;
3778	} else if ((ack == 0 &&
3779	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
3780	    /* broken tcp stacks do not set ack */
3781	    (dst->state < TCPS_SYN_SENT)) {
3782		/*
3783		 * Many stacks (ours included) will set the ACK number in an
3784		 * FIN|ACK if the SYN times out -- no sequence to ACK.
3785		 */
3786		ack = dst->seqlo;
3787	}
3788
3789	if (seq == end) {
3790		/* Ease sequencing restrictions on no data packets */
3791		seq = src->seqlo;
3792		end = seq;
3793	}
3794
3795	ackskew = dst->seqlo - ack;
3796
3797#define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
3798	if (SEQ_GEQ(src->seqhi, end) &&
3799	    /* Last octet inside other's window space */
3800	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
3801	    /* Retrans: not more than one window back */
3802	    (ackskew >= -MAXACKWINDOW) &&
3803	    /* Acking not more than one reassembled fragment backwards */
3804	    (ackskew <= (MAXACKWINDOW << sws))) {
3805	    /* Acking not more than one window forward */
3806
3807		(*state)->packets[dirndx]++;
3808		(*state)->bytes[dirndx] += pd->tot_len;
3809
3810		/* update max window */
3811		if (src->max_win < win)
3812			src->max_win = win;
3813		/* synchronize sequencing */
3814		if (SEQ_GT(end, src->seqlo))
3815			src->seqlo = end;
3816		/* slide the window of what the other end can send */
3817		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
3818			dst->seqhi = ack + MAX((win << sws), 1);
3819
3820
3821		/* update states */
3822		if (th->th_flags & TH_SYN)
3823			if (src->state < TCPS_SYN_SENT)
3824				src->state = TCPS_SYN_SENT;
3825		if (th->th_flags & TH_FIN)
3826			if (src->state < TCPS_CLOSING)
3827				src->state = TCPS_CLOSING;
3828		if (th->th_flags & TH_ACK) {
3829			if (dst->state == TCPS_SYN_SENT)
3830				dst->state = TCPS_ESTABLISHED;
3831			else if (dst->state == TCPS_CLOSING)
3832				dst->state = TCPS_FIN_WAIT_2;
3833		}
3834		if (th->th_flags & TH_RST)
3835			src->state = dst->state = TCPS_TIME_WAIT;
3836
3837		/* update expire time */
3838#if defined(__FreeBSD__)
3839		(*state)->expire = time_second;
3840#else
3841		(*state)->expire = time.tv_sec;
3842#endif
3843		if (src->state >= TCPS_FIN_WAIT_2 &&
3844		    dst->state >= TCPS_FIN_WAIT_2)
3845			(*state)->timeout = PFTM_TCP_CLOSED;
3846		else if (src->state >= TCPS_FIN_WAIT_2 ||
3847		    dst->state >= TCPS_FIN_WAIT_2)
3848			(*state)->timeout = PFTM_TCP_FIN_WAIT;
3849		else if (src->state < TCPS_ESTABLISHED ||
3850		    dst->state < TCPS_ESTABLISHED)
3851			(*state)->timeout = PFTM_TCP_OPENING;
3852		else if (src->state >= TCPS_CLOSING ||
3853		    dst->state >= TCPS_CLOSING)
3854			(*state)->timeout = PFTM_TCP_CLOSING;
3855		else
3856			(*state)->timeout = PFTM_TCP_ESTABLISHED;
3857
3858		/* Fall through to PASS packet */
3859
3860	} else if ((dst->state < TCPS_SYN_SENT ||
3861		dst->state >= TCPS_FIN_WAIT_2 ||
3862		src->state >= TCPS_FIN_WAIT_2) &&
3863	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
3864	    /* Within a window forward of the originating packet */
3865	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
3866	    /* Within a window backward of the originating packet */
3867
3868		/*
3869		 * This currently handles three situations:
3870		 *  1) Stupid stacks will shotgun SYNs before their peer
3871		 *     replies.
3872		 *  2) When PF catches an already established stream (the
3873		 *     firewall rebooted, the state table was flushed, routes
3874		 *     changed...)
3875		 *  3) Packets get funky immediately after the connection
3876		 *     closes (this should catch Solaris spurious ACK|FINs
3877		 *     that web servers like to spew after a close)
3878		 *
3879		 * This must be a little more careful than the above code
3880		 * since packet floods will also be caught here. We don't
3881		 * update the TTL here to mitigate the damage of a packet
3882		 * flood and so the same code can handle awkward establishment
3883		 * and a loosened connection close.
3884		 * In the establishment case, a correct peer response will
3885		 * validate the connection, go through the normal state code
3886		 * and keep updating the state TTL.
3887		 */
3888
3889		if (pf_status.debug >= PF_DEBUG_MISC) {
3890			printf("pf: loose state match: ");
3891			pf_print_state(*state);
3892			pf_print_flags(th->th_flags);
3893			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n",
3894			    seq, ack, pd->p_len, ackskew,
3895			    (*state)->packets[0], (*state)->packets[1]);
3896		}
3897
3898		(*state)->packets[dirndx]++;
3899		(*state)->bytes[dirndx] += pd->tot_len;
3900
3901		/* update max window */
3902		if (src->max_win < win)
3903			src->max_win = win;
3904		/* synchronize sequencing */
3905		if (SEQ_GT(end, src->seqlo))
3906			src->seqlo = end;
3907		/* slide the window of what the other end can send */
3908		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
3909			dst->seqhi = ack + MAX((win << sws), 1);
3910
3911		/*
3912		 * Cannot set dst->seqhi here since this could be a shotgunned
3913		 * SYN and not an already established connection.
3914		 */
3915
3916		if (th->th_flags & TH_FIN)
3917			if (src->state < TCPS_CLOSING)
3918				src->state = TCPS_CLOSING;
3919		if (th->th_flags & TH_RST)
3920			src->state = dst->state = TCPS_TIME_WAIT;
3921
3922		/* Fall through to PASS packet */
3923
3924	} else {
3925		if ((*state)->dst.state == TCPS_SYN_SENT &&
3926		    (*state)->src.state == TCPS_SYN_SENT) {
3927			/* Send RST for state mismatches during handshake */
3928			if (!(th->th_flags & TH_RST)) {
3929				u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
3930
3931				if (th->th_flags & TH_SYN)
3932					ack++;
3933				if (th->th_flags & TH_FIN)
3934					ack++;
3935				pf_send_tcp((*state)->rule.ptr, pd->af,
3936				    pd->dst, pd->src, th->th_dport,
3937				    th->th_sport, ntohl(th->th_ack), ack,
3938				    TH_RST|TH_ACK, 0, 0,
3939				    (*state)->rule.ptr->return_ttl);
3940			}
3941			src->seqlo = 0;
3942			src->seqhi = 1;
3943			src->max_win = 1;
3944		} else if (pf_status.debug >= PF_DEBUG_MISC) {
3945			printf("pf: BAD state: ");
3946			pf_print_state(*state);
3947			pf_print_flags(th->th_flags);
3948			printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d "
3949			    "dir=%s,%s\n", seq, ack, pd->p_len, ackskew,
3950			    (*state)->packets[0], (*state)->packets[1],
3951			    direction == PF_IN ? "in" : "out",
3952			    direction == (*state)->direction ? "fwd" : "rev");
3953			printf("pf: State failure on: %c %c %c %c | %c %c\n",
3954			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
3955			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
3956			    ' ': '2',
3957			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
3958			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
3959			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
3960			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
3961		}
3962		return (PF_DROP);
3963	}
3964
3965	if (dst->scrub || src->scrub) {
3966		if (pf_normalize_tcp_stateful(m, off, pd, reason, th, src, dst,
3967		    &copyback))
3968			return (PF_DROP);
3969	}
3970
3971	/* Any packets which have gotten here are to be passed */
3972
3973	/* translate source/destination address, if necessary */
3974	if (STATE_TRANSLATE(*state)) {
3975		if (direction == PF_OUT)
3976			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
3977			    &th->th_sum, &(*state)->gwy.addr,
3978			    (*state)->gwy.port, 0, pd->af);
3979		else
3980			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
3981			    &th->th_sum, &(*state)->lan.addr,
3982			    (*state)->lan.port, 0, pd->af);
3983		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3984	} else if (copyback) {
3985		/* Copyback sequence modulation or stateful scrub changes */
3986		m_copyback(m, off, sizeof(*th), (caddr_t)th);
3987	}
3988
3989	(*state)->rule.ptr->packets++;
3990	(*state)->rule.ptr->bytes += pd->tot_len;
3991	if ((*state)->nat_rule.ptr != NULL) {
3992		(*state)->nat_rule.ptr->packets++;
3993		(*state)->nat_rule.ptr->bytes += pd->tot_len;
3994	}
3995	if ((*state)->anchor.ptr != NULL) {
3996		(*state)->anchor.ptr->packets++;
3997		(*state)->anchor.ptr->bytes += pd->tot_len;
3998	}
3999	return (PF_PASS);
4000}
4001
4002int
4003pf_test_state_udp(struct pf_state **state, int direction, struct ifnet *ifp,
4004    struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd)
4005{
4006	struct pf_state_peer	*src, *dst;
4007	struct pf_tree_node	 key;
4008	struct udphdr		*uh = pd->hdr.udp;
4009	int			dirndx;
4010
4011	key.af = pd->af;
4012	key.proto = IPPROTO_UDP;
4013	PF_ACPY(&key.addr[0], pd->src, key.af);
4014	PF_ACPY(&key.addr[1], pd->dst, key.af);
4015	key.port[0] = uh->uh_sport;
4016	key.port[1] = uh->uh_dport;
4017
4018	STATE_LOOKUP();
4019
4020	if (direction == (*state)->direction) {
4021		src = &(*state)->src;
4022		dst = &(*state)->dst;
4023		dirndx = 0;
4024	} else {
4025		src = &(*state)->dst;
4026		dst = &(*state)->src;
4027		dirndx = 1;
4028	}
4029
4030	(*state)->packets[dirndx]++;
4031	(*state)->bytes[dirndx] += pd->tot_len;
4032
4033	/* update states */
4034	if (src->state < PFUDPS_SINGLE)
4035		src->state = PFUDPS_SINGLE;
4036	if (dst->state == PFUDPS_SINGLE)
4037		dst->state = PFUDPS_MULTIPLE;
4038
4039	/* update expire time */
4040#if defined(__FreeBSD__)
4041	(*state)->expire = time_second;
4042#else
4043	(*state)->expire = time.tv_sec;
4044#endif
4045	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4046		(*state)->timeout = PFTM_UDP_MULTIPLE;
4047	else
4048		(*state)->timeout = PFTM_UDP_SINGLE;
4049
4050	/* translate source/destination address, if necessary */
4051	if (STATE_TRANSLATE(*state)) {
4052		if (direction == PF_OUT)
4053			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4054			    &uh->uh_sum, &(*state)->gwy.addr,
4055			    (*state)->gwy.port, 1, pd->af);
4056		else
4057			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4058			    &uh->uh_sum, &(*state)->lan.addr,
4059			    (*state)->lan.port, 1, pd->af);
4060		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4061	}
4062
4063	(*state)->rule.ptr->packets++;
4064	(*state)->rule.ptr->bytes += pd->tot_len;
4065	if ((*state)->nat_rule.ptr != NULL) {
4066		(*state)->nat_rule.ptr->packets++;
4067		(*state)->nat_rule.ptr->bytes += pd->tot_len;
4068	}
4069	if ((*state)->anchor.ptr != NULL) {
4070		(*state)->anchor.ptr->packets++;
4071		(*state)->anchor.ptr->bytes += pd->tot_len;
4072	}
4073	return (PF_PASS);
4074}
4075
4076int
4077pf_test_state_icmp(struct pf_state **state, int direction, struct ifnet *ifp,
4078    struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd)
4079{
4080	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
4081	u_int16_t	 icmpid, *icmpsum;
4082	u_int8_t	 icmptype;
4083	int		 state_icmp = 0, dirndx;
4084
4085	switch (pd->proto) {
4086#ifdef INET
4087	case IPPROTO_ICMP:
4088		icmptype = pd->hdr.icmp->icmp_type;
4089		icmpid = pd->hdr.icmp->icmp_id;
4090		icmpsum = &pd->hdr.icmp->icmp_cksum;
4091
4092		if (icmptype == ICMP_UNREACH ||
4093		    icmptype == ICMP_SOURCEQUENCH ||
4094		    icmptype == ICMP_REDIRECT ||
4095		    icmptype == ICMP_TIMXCEED ||
4096		    icmptype == ICMP_PARAMPROB)
4097			state_icmp++;
4098		break;
4099#endif /* INET */
4100#ifdef INET6
4101	case IPPROTO_ICMPV6:
4102		icmptype = pd->hdr.icmp6->icmp6_type;
4103		icmpid = pd->hdr.icmp6->icmp6_id;
4104		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4105
4106		if (icmptype == ICMP6_DST_UNREACH ||
4107		    icmptype == ICMP6_PACKET_TOO_BIG ||
4108		    icmptype == ICMP6_TIME_EXCEEDED ||
4109		    icmptype == ICMP6_PARAM_PROB)
4110			state_icmp++;
4111		break;
4112#endif /* INET6 */
4113	}
4114
4115	if (!state_icmp) {
4116
4117		/*
4118		 * ICMP query/reply message not related to a TCP/UDP packet.
4119		 * Search for an ICMP state.
4120		 */
4121		struct pf_tree_node	key;
4122
4123		key.af = pd->af;
4124		key.proto = pd->proto;
4125		PF_ACPY(&key.addr[0], saddr, key.af);
4126		PF_ACPY(&key.addr[1], daddr, key.af);
4127		key.port[0] = icmpid;
4128		key.port[1] = icmpid;
4129
4130		STATE_LOOKUP();
4131
4132		dirndx = (direction == (*state)->direction) ? 0 : 1;
4133		(*state)->packets[dirndx]++;
4134		(*state)->bytes[dirndx] += pd->tot_len;
4135#if defined(__FreeBSD__)
4136		(*state)->expire = time_second;
4137#else
4138		(*state)->expire = time.tv_sec;
4139#endif
4140		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4141
4142		/* translate source/destination address, if necessary */
4143		if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) {
4144			if (direction == PF_OUT) {
4145				switch (pd->af) {
4146#ifdef INET
4147				case AF_INET:
4148					pf_change_a(&saddr->v4.s_addr,
4149					    pd->ip_sum,
4150					    (*state)->gwy.addr.v4.s_addr, 0);
4151					break;
4152#endif /* INET */
4153#ifdef INET6
4154				case AF_INET6:
4155					pf_change_a6(saddr,
4156					    &pd->hdr.icmp6->icmp6_cksum,
4157					    &(*state)->gwy.addr, 0);
4158					m_copyback(m, off,
4159					    sizeof(struct icmp6_hdr),
4160					    (caddr_t)pd->hdr.icmp6);
4161					break;
4162#endif /* INET6 */
4163				}
4164			} else {
4165				switch (pd->af) {
4166#ifdef INET
4167				case AF_INET:
4168					pf_change_a(&daddr->v4.s_addr,
4169					    pd->ip_sum,
4170					    (*state)->lan.addr.v4.s_addr, 0);
4171					break;
4172#endif /* INET */
4173#ifdef INET6
4174				case AF_INET6:
4175					pf_change_a6(daddr,
4176					    &pd->hdr.icmp6->icmp6_cksum,
4177					    &(*state)->lan.addr, 0);
4178					m_copyback(m, off,
4179					    sizeof(struct icmp6_hdr),
4180					    (caddr_t)pd->hdr.icmp6);
4181					break;
4182#endif /* INET6 */
4183				}
4184			}
4185		}
4186
4187		return (PF_PASS);
4188
4189	} else {
4190		/*
4191		 * ICMP error message in response to a TCP/UDP packet.
4192		 * Extract the inner TCP/UDP header and search for that state.
4193		 */
4194
4195		struct pf_pdesc	pd2;
4196#ifdef INET
4197		struct ip	h2;
4198#endif /* INET */
4199#ifdef INET6
4200		struct ip6_hdr	h2_6;
4201		int		terminal = 0;
4202#endif /* INET6 */
4203		int		ipoff2;
4204		int		off2;
4205
4206		pd2.af = pd->af;
4207		switch (pd->af) {
4208#ifdef INET
4209		case AF_INET:
4210			/* offset of h2 in mbuf chain */
4211			ipoff2 = off + ICMP_MINLEN;
4212
4213			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4214			    NULL, NULL, pd2.af)) {
4215				DPFPRINTF(PF_DEBUG_MISC,
4216				    ("pf: ICMP error message too short "
4217				    "(ip)\n"));
4218				return (PF_DROP);
4219			}
4220			/*
4221			 * ICMP error messages don't refer to non-first
4222			 * fragments
4223			 */
4224			if (h2.ip_off & htons(IP_OFFMASK))
4225				return (PF_DROP);
4226
4227			/* offset of protocol header that follows h2 */
4228			off2 = ipoff2 + (h2.ip_hl << 2);
4229
4230			pd2.proto = h2.ip_p;
4231			pd2.src = (struct pf_addr *)&h2.ip_src;
4232			pd2.dst = (struct pf_addr *)&h2.ip_dst;
4233			pd2.ip_sum = &h2.ip_sum;
4234			break;
4235#endif /* INET */
4236#ifdef INET6
4237		case AF_INET6:
4238			ipoff2 = off + sizeof(struct icmp6_hdr);
4239
4240			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4241			    NULL, NULL, pd2.af)) {
4242				DPFPRINTF(PF_DEBUG_MISC,
4243				    ("pf: ICMP error message too short "
4244				    "(ip6)\n"));
4245				return (PF_DROP);
4246			}
4247			pd2.proto = h2_6.ip6_nxt;
4248			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4249			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4250			pd2.ip_sum = NULL;
4251			off2 = ipoff2 + sizeof(h2_6);
4252			do {
4253				switch (pd2.proto) {
4254				case IPPROTO_FRAGMENT:
4255					/*
4256					 * ICMPv6 error messages for
4257					 * non-first fragments
4258					 */
4259					return (PF_DROP);
4260				case IPPROTO_AH:
4261				case IPPROTO_HOPOPTS:
4262				case IPPROTO_ROUTING:
4263				case IPPROTO_DSTOPTS: {
4264					/* get next header and header length */
4265					struct ip6_ext opt6;
4266
4267					if (!pf_pull_hdr(m, off2, &opt6,
4268					    sizeof(opt6), NULL, NULL, pd2.af)) {
4269						DPFPRINTF(PF_DEBUG_MISC,
4270						    ("pf: ICMPv6 short opt\n"));
4271						return (PF_DROP);
4272					}
4273					if (pd2.proto == IPPROTO_AH)
4274						off2 += (opt6.ip6e_len + 2) * 4;
4275					else
4276						off2 += (opt6.ip6e_len + 1) * 8;
4277					pd2.proto = opt6.ip6e_nxt;
4278					/* goto the next header */
4279					break;
4280				}
4281				default:
4282					terminal++;
4283					break;
4284				}
4285			} while (!terminal);
4286			break;
4287#endif /* INET6 */
4288		}
4289
4290		switch (pd2.proto) {
4291		case IPPROTO_TCP: {
4292			struct tcphdr		 th;
4293			u_int32_t		 seq;
4294			struct pf_tree_node	 key;
4295			struct pf_state_peer	*src, *dst;
4296			u_int8_t		 dws;
4297
4298			/*
4299			 * Only the first 8 bytes of the TCP header can be
4300			 * expected. Don't access any TCP header fields after
4301			 * th_seq, an ackskew test is not possible.
4302			 */
4303			if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) {
4304				DPFPRINTF(PF_DEBUG_MISC,
4305				    ("pf: ICMP error message too short "
4306				    "(tcp)\n"));
4307				return (PF_DROP);
4308			}
4309
4310			key.af = pd2.af;
4311			key.proto = IPPROTO_TCP;
4312			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4313			key.port[0] = th.th_dport;
4314			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4315			key.port[1] = th.th_sport;
4316
4317			STATE_LOOKUP();
4318
4319			if (direction == (*state)->direction) {
4320				src = &(*state)->dst;
4321				dst = &(*state)->src;
4322			} else {
4323				src = &(*state)->src;
4324				dst = &(*state)->dst;
4325			}
4326
4327			if (src->wscale && dst->wscale && !(th.th_flags & TH_SYN))
4328				dws = dst->wscale & PF_WSCALE_MASK;
4329			else
4330				dws = 0;
4331
4332			/* Demodulate sequence number */
4333			seq = ntohl(th.th_seq) - src->seqdiff;
4334			if (src->seqdiff)
4335				pf_change_a(&th.th_seq, &th.th_sum,
4336				    htonl(seq), 0);
4337
4338			if (!SEQ_GEQ(src->seqhi, seq) ||
4339			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4340				if (pf_status.debug >= PF_DEBUG_MISC) {
4341					printf("pf: BAD ICMP %d:%d ",
4342					    icmptype, pd->hdr.icmp->icmp_code);
4343					pf_print_host(pd->src, 0, pd->af);
4344					printf(" -> ");
4345					pf_print_host(pd->dst, 0, pd->af);
4346					printf(" state: ");
4347					pf_print_state(*state);
4348					printf(" seq=%u\n", seq);
4349				}
4350				return (PF_DROP);
4351			}
4352
4353			if (STATE_TRANSLATE(*state)) {
4354				if (direction == PF_IN) {
4355					pf_change_icmp(pd2.src, &th.th_sport,
4356					    saddr, &(*state)->lan.addr,
4357					    (*state)->lan.port, NULL,
4358					    pd2.ip_sum, icmpsum,
4359					    pd->ip_sum, 0, pd2.af);
4360				} else {
4361					pf_change_icmp(pd2.dst, &th.th_dport,
4362					    saddr, &(*state)->gwy.addr,
4363					    (*state)->gwy.port, NULL,
4364					    pd2.ip_sum, icmpsum,
4365					    pd->ip_sum, 0, pd2.af);
4366				}
4367				switch (pd2.af) {
4368#ifdef INET
4369				case AF_INET:
4370					m_copyback(m, off, ICMP_MINLEN,
4371					    (caddr_t)pd->hdr.icmp);
4372					m_copyback(m, ipoff2, sizeof(h2),
4373					    (caddr_t)&h2);
4374					break;
4375#endif /* INET */
4376#ifdef INET6
4377				case AF_INET6:
4378					m_copyback(m, off,
4379					    sizeof(struct icmp6_hdr),
4380					    (caddr_t)pd->hdr.icmp6);
4381					m_copyback(m, ipoff2, sizeof(h2_6),
4382					    (caddr_t)&h2_6);
4383					break;
4384#endif /* INET6 */
4385				}
4386				m_copyback(m, off2, 8, (caddr_t)&th);
4387			} else if (src->seqdiff) {
4388				m_copyback(m, off2, 8, (caddr_t)&th);
4389			}
4390
4391			return (PF_PASS);
4392			break;
4393		}
4394		case IPPROTO_UDP: {
4395			struct udphdr		uh;
4396			struct pf_tree_node	key;
4397
4398			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4399			    NULL, NULL, pd2.af)) {
4400				DPFPRINTF(PF_DEBUG_MISC,
4401				    ("pf: ICMP error message too short "
4402				    "(udp)\n"));
4403				return (PF_DROP);
4404			}
4405
4406			key.af = pd2.af;
4407			key.proto = IPPROTO_UDP;
4408			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4409			key.port[0] = uh.uh_dport;
4410			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4411			key.port[1] = uh.uh_sport;
4412
4413			STATE_LOOKUP();
4414
4415			if (STATE_TRANSLATE(*state)) {
4416				if (direction == PF_IN) {
4417					pf_change_icmp(pd2.src, &uh.uh_sport,
4418					    daddr, &(*state)->lan.addr,
4419					    (*state)->lan.port, &uh.uh_sum,
4420					    pd2.ip_sum, icmpsum,
4421					    pd->ip_sum, 1, pd2.af);
4422				} else {
4423					pf_change_icmp(pd2.dst, &uh.uh_dport,
4424					    saddr, &(*state)->gwy.addr,
4425					    (*state)->gwy.port, &uh.uh_sum,
4426					    pd2.ip_sum, icmpsum,
4427					    pd->ip_sum, 1, pd2.af);
4428				}
4429				switch (pd2.af) {
4430#ifdef INET
4431				case AF_INET:
4432					m_copyback(m, off, ICMP_MINLEN,
4433					    (caddr_t)pd->hdr.icmp);
4434					m_copyback(m, ipoff2, sizeof(h2),
4435					    (caddr_t)&h2);
4436					break;
4437#endif /* INET */
4438#ifdef INET6
4439				case AF_INET6:
4440					m_copyback(m, off,
4441					    sizeof(struct icmp6_hdr),
4442					    (caddr_t)pd->hdr.icmp6);
4443					m_copyback(m, ipoff2, sizeof(h2_6),
4444					    (caddr_t)&h2_6);
4445					break;
4446#endif /* INET6 */
4447				}
4448				m_copyback(m, off2, sizeof(uh),
4449				    (caddr_t)&uh);
4450			}
4451
4452			return (PF_PASS);
4453			break;
4454		}
4455#ifdef INET
4456		case IPPROTO_ICMP: {
4457			struct icmp		iih;
4458			struct pf_tree_node	key;
4459
4460			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4461			    NULL, NULL, pd2.af)) {
4462				DPFPRINTF(PF_DEBUG_MISC,
4463				    ("pf: ICMP error message too short i"
4464				    "(icmp)\n"));
4465				return (PF_DROP);
4466			}
4467
4468			key.af = pd2.af;
4469			key.proto = IPPROTO_ICMP;
4470			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4471			key.port[0] = iih.icmp_id;
4472			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4473			key.port[1] = iih.icmp_id;
4474
4475			STATE_LOOKUP();
4476
4477			if (STATE_TRANSLATE(*state)) {
4478				if (direction == PF_IN) {
4479					pf_change_icmp(pd2.src, &iih.icmp_id,
4480					    daddr, &(*state)->lan.addr,
4481					    (*state)->lan.port, NULL,
4482					    pd2.ip_sum, icmpsum,
4483					    pd->ip_sum, 0, AF_INET);
4484				} else {
4485					pf_change_icmp(pd2.dst, &iih.icmp_id,
4486					    saddr, &(*state)->gwy.addr,
4487					    (*state)->gwy.port, NULL,
4488					    pd2.ip_sum, icmpsum,
4489					    pd->ip_sum, 0, AF_INET);
4490				}
4491				m_copyback(m, off, ICMP_MINLEN,
4492				    (caddr_t)pd->hdr.icmp);
4493				m_copyback(m, ipoff2, sizeof(h2),
4494				    (caddr_t)&h2);
4495				m_copyback(m, off2, ICMP_MINLEN,
4496				    (caddr_t)&iih);
4497			}
4498
4499			return (PF_PASS);
4500			break;
4501		}
4502#endif /* INET */
4503#ifdef INET6
4504		case IPPROTO_ICMPV6: {
4505			struct icmp6_hdr	iih;
4506			struct pf_tree_node	key;
4507
4508			if (!pf_pull_hdr(m, off2, &iih,
4509			    sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) {
4510				DPFPRINTF(PF_DEBUG_MISC,
4511				    ("pf: ICMP error message too short "
4512				    "(icmp6)\n"));
4513				return (PF_DROP);
4514			}
4515
4516			key.af = pd2.af;
4517			key.proto = IPPROTO_ICMPV6;
4518			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4519			key.port[0] = iih.icmp6_id;
4520			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4521			key.port[1] = iih.icmp6_id;
4522
4523			STATE_LOOKUP();
4524
4525			if (STATE_TRANSLATE(*state)) {
4526				if (direction == PF_IN) {
4527					pf_change_icmp(pd2.src, &iih.icmp6_id,
4528					    daddr, &(*state)->lan.addr,
4529					    (*state)->lan.port, NULL,
4530					    pd2.ip_sum, icmpsum,
4531					    pd->ip_sum, 0, AF_INET6);
4532				} else {
4533					pf_change_icmp(pd2.dst, &iih.icmp6_id,
4534					    saddr, &(*state)->gwy.addr,
4535					    (*state)->gwy.port, NULL,
4536					    pd2.ip_sum, icmpsum,
4537					    pd->ip_sum, 0, AF_INET6);
4538				}
4539				m_copyback(m, off, sizeof(struct icmp6_hdr),
4540				    (caddr_t)pd->hdr.icmp6);
4541				m_copyback(m, ipoff2, sizeof(h2_6),
4542				    (caddr_t)&h2_6);
4543				m_copyback(m, off2, sizeof(struct icmp6_hdr),
4544				    (caddr_t)&iih);
4545			}
4546
4547			return (PF_PASS);
4548			break;
4549		}
4550#endif /* INET6 */
4551		default: {
4552			struct pf_tree_node	key;
4553
4554			key.af = pd2.af;
4555			key.proto = pd2.proto;
4556			PF_ACPY(&key.addr[0], pd2.dst, pd2.af);
4557			key.port[0] = 0;
4558			PF_ACPY(&key.addr[1], pd2.src, pd2.af);
4559			key.port[1] = 0;
4560
4561			STATE_LOOKUP();
4562
4563			if (STATE_TRANSLATE(*state)) {
4564				if (direction == PF_IN) {
4565					pf_change_icmp(pd2.src, NULL,
4566					    daddr, &(*state)->lan.addr,
4567					    0, NULL,
4568					    pd2.ip_sum, icmpsum,
4569					    pd->ip_sum, 0, pd2.af);
4570				} else {
4571					pf_change_icmp(pd2.dst, NULL,
4572					    saddr, &(*state)->gwy.addr,
4573					    0, NULL,
4574					    pd2.ip_sum, icmpsum,
4575					    pd->ip_sum, 0, pd2.af);
4576				}
4577				switch (pd2.af) {
4578#ifdef INET
4579				case AF_INET:
4580					m_copyback(m, off, ICMP_MINLEN,
4581					    (caddr_t)pd->hdr.icmp);
4582					m_copyback(m, ipoff2, sizeof(h2),
4583					    (caddr_t)&h2);
4584					break;
4585#endif /* INET */
4586#ifdef INET6
4587				case AF_INET6:
4588					m_copyback(m, off,
4589					    sizeof(struct icmp6_hdr),
4590					    (caddr_t)pd->hdr.icmp6);
4591					m_copyback(m, ipoff2, sizeof(h2_6),
4592					    (caddr_t)&h2_6);
4593					break;
4594#endif /* INET6 */
4595				}
4596			}
4597
4598			return (PF_PASS);
4599			break;
4600		}
4601		}
4602	}
4603}
4604
4605int
4606pf_test_state_other(struct pf_state **state, int direction, struct ifnet *ifp,
4607    struct pf_pdesc *pd)
4608{
4609	struct pf_state_peer	*src, *dst;
4610	struct pf_tree_node	 key;
4611	int			dirndx;
4612
4613	key.af = pd->af;
4614	key.proto = pd->proto;
4615	PF_ACPY(&key.addr[0], pd->src, key.af);
4616	PF_ACPY(&key.addr[1], pd->dst, key.af);
4617	key.port[0] = 0;
4618	key.port[1] = 0;
4619
4620	STATE_LOOKUP();
4621
4622	if (direction == (*state)->direction) {
4623		src = &(*state)->src;
4624		dst = &(*state)->dst;
4625		dirndx = 0;
4626	} else {
4627		src = &(*state)->dst;
4628		dst = &(*state)->src;
4629		dirndx = 1;
4630	}
4631
4632	(*state)->packets[dirndx]++;
4633	(*state)->bytes[dirndx] += pd->tot_len;
4634
4635	/* update states */
4636	if (src->state < PFOTHERS_SINGLE)
4637		src->state = PFOTHERS_SINGLE;
4638	if (dst->state == PFOTHERS_SINGLE)
4639		dst->state = PFOTHERS_MULTIPLE;
4640
4641	/* update expire time */
4642#if defined(__FreeBSD__)
4643	(*state)->expire = time_second;
4644#else
4645	(*state)->expire = time.tv_sec;
4646#endif
4647	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4648		(*state)->timeout = PFTM_OTHER_MULTIPLE;
4649	else
4650		(*state)->timeout = PFTM_OTHER_SINGLE;
4651
4652	/* translate source/destination address, if necessary */
4653	if (STATE_TRANSLATE(*state)) {
4654		if (direction == PF_OUT)
4655			switch (pd->af) {
4656#ifdef INET
4657			case AF_INET:
4658				pf_change_a(&pd->src->v4.s_addr,
4659				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
4660				    0);
4661				break;
4662#endif /* INET */
4663#ifdef INET6
4664			case AF_INET6:
4665				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
4666				break;
4667#endif /* INET6 */
4668			}
4669		else
4670			switch (pd->af) {
4671#ifdef INET
4672			case AF_INET:
4673				pf_change_a(&pd->dst->v4.s_addr,
4674				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
4675				    0);
4676				break;
4677#endif /* INET */
4678#ifdef INET6
4679			case AF_INET6:
4680				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
4681				break;
4682#endif /* INET6 */
4683			}
4684	}
4685
4686	(*state)->rule.ptr->packets++;
4687	(*state)->rule.ptr->bytes += pd->tot_len;
4688	if ((*state)->nat_rule.ptr != NULL) {
4689		(*state)->nat_rule.ptr->packets++;
4690		(*state)->nat_rule.ptr->bytes += pd->tot_len;
4691	}
4692	if ((*state)->anchor.ptr != NULL) {
4693		(*state)->anchor.ptr->packets++;
4694		(*state)->anchor.ptr->bytes += pd->tot_len;
4695	}
4696	return (PF_PASS);
4697}
4698
4699/*
4700 * ipoff and off are measured from the start of the mbuf chain.
4701 * h must be at "ipoff" on the mbuf chain.
4702 */
4703void *
4704pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
4705    u_short *actionp, u_short *reasonp, sa_family_t af)
4706{
4707	switch (af) {
4708#ifdef INET
4709	case AF_INET: {
4710		struct ip	*h = mtod(m, struct ip *);
4711		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
4712
4713		if (fragoff) {
4714			if (fragoff >= len)
4715				ACTION_SET(actionp, PF_PASS);
4716			else {
4717				ACTION_SET(actionp, PF_DROP);
4718				REASON_SET(reasonp, PFRES_FRAG);
4719			}
4720			return (NULL);
4721		}
4722		if (m->m_pkthdr.len < off + len || ntohs(h->ip_len) < off + len) {
4723			ACTION_SET(actionp, PF_DROP);
4724			REASON_SET(reasonp, PFRES_SHORT);
4725			return (NULL);
4726		}
4727		break;
4728	}
4729#endif /* INET */
4730#ifdef INET6
4731	case AF_INET6: {
4732		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
4733
4734		if (m->m_pkthdr.len < off + len ||
4735		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
4736		    (unsigned)(off + len)) {
4737			ACTION_SET(actionp, PF_DROP);
4738			REASON_SET(reasonp, PFRES_SHORT);
4739			return (NULL);
4740		}
4741		break;
4742	}
4743#endif /* INET6 */
4744	}
4745	m_copydata(m, off, len, p);
4746	return (p);
4747}
4748
4749int
4750pf_routable(struct pf_addr *addr, sa_family_t af)
4751{
4752	struct sockaddr_in	*dst;
4753	struct route		 ro;
4754	int			 ret = 0;
4755
4756	bzero(&ro, sizeof(ro));
4757	dst = satosin(&ro.ro_dst);
4758	dst->sin_family = af;
4759	dst->sin_len = sizeof(*dst);
4760	dst->sin_addr = addr->v4;
4761#if defined(__FreeBSD__)
4762#ifdef RTF_PRCLONING
4763	rtalloc_ign(&ro, (RTF_CLONING|RTF_PRCLONING));
4764#else /* !RTF_PRCLONING */
4765	rtalloc_ign(&ro, RTF_CLONING);
4766#endif
4767#else /* ! __FreeBSD__ */
4768	rtalloc_noclone(&ro, NO_CLONING);
4769#endif
4770
4771	if (ro.ro_rt != NULL) {
4772		ret = 1;
4773		RTFREE(ro.ro_rt);
4774	}
4775
4776	return (ret);
4777}
4778
4779#ifdef INET
4780
4781#if defined(__FreeBSD__) && (__FreeBSD_version < 501105)
4782int
4783ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
4784	    u_long if_hwassist_flags, int sw_csum)
4785{
4786	int error = 0;
4787	int hlen = ip->ip_hl << 2;
4788	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
4789	int off;
4790	struct mbuf *m0 = *m_frag;	/* the original packet		*/
4791	int firstlen;
4792	struct mbuf **mnext;
4793	int nfrags;
4794
4795	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
4796		ipstat.ips_cantfrag++;
4797		return EMSGSIZE;
4798	}
4799
4800	/*
4801	 * Must be able to put at least 8 bytes per fragment.
4802	 */
4803	if (len < 8)
4804		return EMSGSIZE;
4805
4806	/*
4807	 * If the interface will not calculate checksums on
4808	 * fragmented packets, then do it here.
4809	 */
4810	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
4811	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
4812		in_delayed_cksum(m0);
4813		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
4814	}
4815
4816	if (len > PAGE_SIZE) {
4817		/*
4818		 * Fragment large datagrams such that each segment
4819		 * contains a multiple of PAGE_SIZE amount of data,
4820		 * plus headers. This enables a receiver to perform
4821		 * page-flipping zero-copy optimizations.
4822		 *
4823		 * XXX When does this help given that sender and receiver
4824		 * could have different page sizes, and also mtu could
4825		 * be less than the receiver's page size ?
4826		 */
4827		int newlen;
4828		struct mbuf *m;
4829
4830		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
4831			off += m->m_len;
4832
4833		/*
4834		 * firstlen (off - hlen) must be aligned on an
4835		 * 8-byte boundary
4836		 */
4837		if (off < hlen)
4838			goto smart_frag_failure;
4839		off = ((off - hlen) & ~7) + hlen;
4840		newlen = (~PAGE_MASK) & mtu;
4841		if ((newlen + sizeof (struct ip)) > mtu) {
4842			/* we failed, go back the default */
4843smart_frag_failure:
4844			newlen = len;
4845			off = hlen + len;
4846		}
4847		len = newlen;
4848
4849	} else {
4850		off = hlen + len;
4851	}
4852
4853	firstlen = off - hlen;
4854	mnext = &m0->m_nextpkt;		/* pointer to next packet */
4855
4856	/*
4857	 * Loop through length of segment after first fragment,
4858	 * make new header and copy data of each part and link onto chain.
4859	 * Here, m0 is the original packet, m is the fragment being created.
4860	 * The fragments are linked off the m_nextpkt of the original
4861	 * packet, which after processing serves as the first fragment.
4862	 */
4863	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
4864		struct ip *mhip;	/* ip header on the fragment */
4865		struct mbuf *m;
4866		int mhlen = sizeof (struct ip);
4867
4868		MGETHDR(m, M_DONTWAIT, MT_HEADER);
4869		if (m == 0) {
4870			error = ENOBUFS;
4871			ipstat.ips_odropped++;
4872			goto done;
4873		}
4874		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
4875		/*
4876		 * In the first mbuf, leave room for the link header, then
4877		 * copy the original IP header including options. The payload
4878		 * goes into an additional mbuf chain returned by m_copy().
4879		 */
4880		m->m_data += max_linkhdr;
4881		mhip = mtod(m, struct ip *);
4882		*mhip = *ip;
4883		if (hlen > sizeof (struct ip)) {
4884			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
4885			mhip->ip_v = IPVERSION;
4886			mhip->ip_hl = mhlen >> 2;
4887		}
4888		m->m_len = mhlen;
4889		/* XXX do we need to add ip->ip_off below ? */
4890		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
4891		if (off + len >= ip->ip_len) {	/* last fragment */
4892			len = ip->ip_len - off;
4893			m->m_flags |= M_LASTFRAG;
4894		} else
4895			mhip->ip_off |= IP_MF;
4896		mhip->ip_len = htons((u_short)(len + mhlen));
4897		m->m_next = m_copy(m0, off, len);
4898		if (m->m_next == 0) {		/* copy failed */
4899			m_free(m);
4900			error = ENOBUFS;	/* ??? */
4901			ipstat.ips_odropped++;
4902			goto done;
4903		}
4904		m->m_pkthdr.len = mhlen + len;
4905		m->m_pkthdr.rcvif = (struct ifnet *)0;
4906#ifdef MAC
4907		mac_create_fragment(m0, m);
4908#endif
4909		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
4910		mhip->ip_off = htons(mhip->ip_off);
4911		mhip->ip_sum = 0;
4912		if (sw_csum & CSUM_DELAY_IP)
4913			mhip->ip_sum = in_cksum(m, mhlen);
4914		*mnext = m;
4915		mnext = &m->m_nextpkt;
4916	}
4917	ipstat.ips_ofragments += nfrags;
4918
4919	/* set first marker for fragment chain */
4920	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
4921	m0->m_pkthdr.csum_data = nfrags;
4922
4923	/*
4924	 * Update first fragment by trimming what's been copied out
4925	 * and updating header.
4926	 */
4927	m_adj(m0, hlen + firstlen - ip->ip_len);
4928	m0->m_pkthdr.len = hlen + firstlen;
4929	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
4930	ip->ip_off |= IP_MF;
4931	ip->ip_off = htons(ip->ip_off);
4932	ip->ip_sum = 0;
4933	if (sw_csum & CSUM_DELAY_IP)
4934		ip->ip_sum = in_cksum(m0, hlen);
4935
4936done:
4937	*m_frag = m0;
4938	return error;
4939}
4940#endif /* __FreeBSD__ && __FreeBSD_version > 501105 */
4941
4942void
4943pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
4944    struct pf_state *s)
4945{
4946	struct mbuf		*m0, *m1;
4947	struct route		 iproute;
4948	struct route		*ro;
4949	struct sockaddr_in	*dst;
4950	struct ip		*ip;
4951	struct ifnet		*ifp = NULL;
4952	struct m_tag		*mtag;
4953	struct pf_addr		 naddr;
4954	int			 error = 0;
4955#if defined(__FreeBSD__)
4956	int sw_csum;
4957#endif
4958
4959	if (m == NULL || *m == NULL || r == NULL ||
4960	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
4961		panic("pf_route: invalid parameters");
4962
4963	if (r->rt == PF_DUPTO) {
4964		m0 = *m;
4965		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
4966		if (mtag == NULL) {
4967			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
4968			if (mtag == NULL)
4969				goto bad;
4970			m_tag_prepend(m0, mtag);
4971		}
4972#if defined(__FreeBSD__)
4973		m0 = m_dup(*m, M_DONTWAIT);
4974#else
4975		m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT);
4976#endif
4977		if (m0 == NULL)
4978			return;
4979	} else {
4980		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
4981			return;
4982		m0 = *m;
4983	}
4984
4985	if (m0->m_len < sizeof(struct ip))
4986		panic("pf_route: m0->m_len < sizeof(struct ip)");
4987	ip = mtod(m0, struct ip *);
4988
4989	ro = &iproute;
4990	bzero((caddr_t)ro, sizeof(*ro));
4991	dst = satosin(&ro->ro_dst);
4992	dst->sin_family = AF_INET;
4993	dst->sin_len = sizeof(*dst);
4994	dst->sin_addr = ip->ip_dst;
4995
4996	if (r->rt == PF_FASTROUTE) {
4997		rtalloc(ro);
4998		if (ro->ro_rt == 0) {
4999			ipstat.ips_noroute++;
5000			goto bad;
5001		}
5002
5003		ifp = ro->ro_rt->rt_ifp;
5004		ro->ro_rt->rt_use++;
5005
5006		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5007			dst = satosin(ro->ro_rt->rt_gateway);
5008	} else {
5009		if (TAILQ_EMPTY(&r->rpool.list))
5010			panic("pf_route: TAILQ_EMPTY(&r->rpool.list)");
5011		if (s == NULL) {
5012			pf_map_addr(AF_INET, &r->rpool,
5013			    (struct pf_addr *)&ip->ip_src,
5014			    &naddr, NULL);
5015			if (!PF_AZERO(&naddr, AF_INET))
5016				dst->sin_addr.s_addr = naddr.v4.s_addr;
5017			ifp = r->rpool.cur->ifp;
5018		} else {
5019			if (!PF_AZERO(&s->rt_addr, AF_INET))
5020				dst->sin_addr.s_addr =
5021				    s->rt_addr.v4.s_addr;
5022			ifp = s->rt_ifp;
5023		}
5024	}
5025
5026	if (ifp == NULL)
5027		goto bad;
5028
5029	mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5030	if (mtag == NULL) {
5031		struct m_tag *mtag;
5032
5033		mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5034		if (mtag == NULL)
5035			goto bad;
5036		m_tag_prepend(m0, mtag);
5037	}
5038
5039	if (oifp != ifp && mtag == NULL) {
5040#if defined(__FreeBSD__)
5041		PF_UNLOCK();
5042		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) {
5043			PF_LOCK();
5044			goto bad;
5045		} else if (m0 == NULL) {
5046			PF_LOCK();
5047			goto done;
5048		}
5049		PF_LOCK();
5050#else
5051		if (pf_test(PF_OUT, ifp, &m0) != PF_PASS)
5052			goto bad;
5053		else if (m0 == NULL)
5054			goto done;
5055#endif
5056		if (m0->m_len < sizeof(struct ip))
5057			panic("pf_route: m0->m_len < sizeof(struct ip)");
5058		ip = mtod(m0, struct ip *);
5059	}
5060
5061#if defined(__FreeBSD__)
5062	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
5063	m0->m_pkthdr.csum_flags |= CSUM_IP;
5064	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5065	if (sw_csum & CSUM_DELAY_DATA) {
5066		/*
5067		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5068		 */
5069		NTOHS(ip->ip_len);
5070		NTOHS(ip->ip_off);	 /* XXX: needed? */
5071		in_delayed_cksum(m0);
5072		HTONS(ip->ip_len);
5073		HTONS(ip->ip_off);
5074		sw_csum &= ~CSUM_DELAY_DATA;
5075	}
5076	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5077
5078	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5079	    (ifp->if_hwassist & CSUM_FRAGMENT &&
5080		((ip->ip_off & htons(IP_DF)) == 0))) {
5081		/*
5082		 * ip->ip_len = htons(ip->ip_len);
5083		 * ip->ip_off = htons(ip->ip_off);
5084		 */
5085		ip->ip_sum = 0;
5086		if (sw_csum & CSUM_DELAY_IP) {
5087			/* From KAME */
5088			if (ip->ip_v == IPVERSION &&
5089			    (ip->ip_hl << 2) == sizeof(*ip)) {
5090				ip->ip_sum = in_cksum_hdr(ip);
5091			} else {
5092				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5093			}
5094		}
5095		PF_UNLOCK();
5096		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5097		PF_LOCK();
5098		goto done;
5099	}
5100
5101#else
5102	/* Copied from ip_output. */
5103	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
5104		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
5105		    ifp->if_bridge == NULL) {
5106			m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
5107			ipstat.ips_outhwcsum++;
5108		} else {
5109			ip->ip_sum = 0;
5110			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5111		}
5112		/* Update relevant hardware checksum stats for TCP/UDP */
5113		if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
5114			tcpstat.tcps_outhwcsum++;
5115		else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
5116			udpstat.udps_outhwcsum++;
5117		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
5118		goto done;
5119	}
5120#endif
5121	/*
5122	 * Too large for interface; fragment if possible.
5123	 * Must be able to put at least 8 bytes per fragment.
5124	 */
5125	if (ip->ip_off & htons(IP_DF)) {
5126		ipstat.ips_cantfrag++;
5127		if (r->rt != PF_DUPTO) {
5128#if defined(__FreeBSD__)
5129			/* icmp_error() expects host byte ordering */
5130			NTOHS(ip->ip_len);
5131			NTOHS(ip->ip_off);
5132			PF_UNLOCK();
5133#endif
5134			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5135			    ifp);
5136#if defined(__FreeBSD__)
5137			PF_LOCK();
5138#endif
5139			goto done;
5140		} else
5141			goto bad;
5142	}
5143
5144	m1 = m0;
5145#if defined(__FreeBSD__)
5146	/*
5147	 * XXX: is cheaper + less error prone than own function
5148	 */
5149	NTOHS(ip->ip_len);
5150	NTOHS(ip->ip_off);
5151	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5152#else
5153	error = ip_fragment(m0, ifp, ifp->if_mtu);
5154#endif
5155#if defined(__FreeBSD__)
5156	if (error)
5157#else
5158	if (error == EMSGSIZE)
5159#endif
5160		goto bad;
5161
5162	for (m0 = m1; m0; m0 = m1) {
5163		m1 = m0->m_nextpkt;
5164		m0->m_nextpkt = 0;
5165#if defined(__FreeBSD__)
5166		if (error == 0) {
5167			PF_UNLOCK();
5168			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5169			    NULL);
5170			PF_LOCK();
5171		} else
5172#else
5173		if (error == 0)
5174			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5175			    NULL);
5176		else
5177#endif
5178			m_freem(m0);
5179	}
5180
5181	if (error == 0)
5182		ipstat.ips_fragmented++;
5183
5184done:
5185	if (r->rt != PF_DUPTO)
5186		*m = NULL;
5187	if (ro == &iproute && ro->ro_rt)
5188		RTFREE(ro->ro_rt);
5189	return;
5190
5191bad:
5192	m_freem(m0);
5193	goto done;
5194}
5195#endif /* INET */
5196
5197#ifdef INET6
5198void
5199pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5200    struct pf_state *s)
5201{
5202	struct mbuf		*m0;
5203	struct m_tag		*mtag;
5204	struct route_in6	 ip6route;
5205	struct route_in6	*ro;
5206	struct sockaddr_in6	*dst;
5207	struct ip6_hdr		*ip6;
5208	struct ifnet		*ifp = NULL;
5209	struct pf_addr		 naddr;
5210	int			 error = 0;
5211
5212	if (m == NULL || *m == NULL || r == NULL ||
5213	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5214		panic("pf_route6: invalid parameters");
5215
5216	if (r->rt == PF_DUPTO) {
5217		m0 = *m;
5218		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5219		if (mtag == NULL) {
5220			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5221			if (mtag == NULL)
5222				goto bad;
5223			m_tag_prepend(m0, mtag);
5224		}
5225#if defined(__FreeBSD__)
5226		m0 = m_dup(*m, M_DONTWAIT);
5227#else
5228		m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT);
5229#endif
5230		if (m0 == NULL)
5231			return;
5232	} else {
5233		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5234			return;
5235		m0 = *m;
5236	}
5237
5238	if (m0->m_len < sizeof(struct ip6_hdr))
5239		panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
5240	ip6 = mtod(m0, struct ip6_hdr *);
5241
5242	ro = &ip6route;
5243	bzero((caddr_t)ro, sizeof(*ro));
5244	dst = (struct sockaddr_in6 *)&ro->ro_dst;
5245	dst->sin6_family = AF_INET6;
5246	dst->sin6_len = sizeof(*dst);
5247	dst->sin6_addr = ip6->ip6_dst;
5248
5249	/* Cheat. */
5250	if (r->rt == PF_FASTROUTE) {
5251		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
5252		if (mtag == NULL)
5253			goto bad;
5254		m_tag_prepend(m0, mtag);
5255#if defined(__FreeBSD__)
5256		PF_UNLOCK();
5257		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5258		PF_LOCK();
5259#else
5260		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
5261#endif
5262		return;
5263	}
5264
5265	if (TAILQ_EMPTY(&r->rpool.list))
5266		panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)");
5267	if (s == NULL) {
5268		pf_map_addr(AF_INET6, &r->rpool,
5269		    (struct pf_addr *)&ip6->ip6_src, &naddr, NULL);
5270		if (!PF_AZERO(&naddr, AF_INET6))
5271			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5272			    &naddr, AF_INET6);
5273		ifp = r->rpool.cur->ifp;
5274	} else {
5275		if (!PF_AZERO(&s->rt_addr, AF_INET6))
5276			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5277			    &s->rt_addr, AF_INET6);
5278		ifp = s->rt_ifp;
5279	}
5280
5281	if (ifp == NULL)
5282		goto bad;
5283
5284	if (oifp != ifp) {
5285		mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTED, NULL);
5286		if (mtag == NULL) {
5287			mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 0, M_NOWAIT);
5288			if (mtag == NULL)
5289				goto bad;
5290			m_tag_prepend(m0, mtag);
5291#if defined(__FreeBSD__)
5292			PF_UNLOCK();
5293			if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) {
5294				PF_LOCK();
5295				goto bad;
5296			} else if (m0 == NULL) {
5297				PF_LOCK();
5298				goto done;
5299			}
5300			PF_LOCK();
5301#else
5302			if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS)
5303				goto bad;
5304			else if (m0 == NULL)
5305				goto done;
5306#endif
5307		}
5308	}
5309
5310	/*
5311	 * If the packet is too large for the outgoing interface,
5312	 * send back an icmp6 error.
5313	 */
5314	if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5315		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5316	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5317#if defined(__FreeBSD__)
5318		PF_UNLOCK();
5319#endif
5320		error = nd6_output(ifp, ifp, m0, dst, NULL);
5321#if defined(__FreeBSD__)
5322		PF_LOCK();
5323#endif
5324	} else {
5325		in6_ifstat_inc(ifp, ifs6_in_toobig);
5326#if defined(__FreeBSD__)
5327		if (r->rt != PF_DUPTO) {
5328			PF_UNLOCK();
5329			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5330			PF_LOCK();
5331		 } else
5332#else
5333		if (r->rt != PF_DUPTO)
5334			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5335		else
5336#endif
5337			goto bad;
5338	}
5339
5340done:
5341	if (r->rt != PF_DUPTO)
5342		*m = NULL;
5343	return;
5344
5345bad:
5346	m_freem(m0);
5347	goto done;
5348}
5349#endif /* INET6 */
5350
5351
5352#if defined(__FreeBSD__)
5353/*
5354 * XXX
5355 * FreeBSD supports cksum offload for the following drivers.
5356 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4)
5357 * If we can make full use of it we would outperform ipfw/ipfilter in
5358 * very heavy traffic.
5359 * I have not tested 'cause I don't have NICs that supports cksum offload.
5360 * (There might be problems. Typical phenomena would be
5361 *   1. No route message for UDP packet.
5362 *   2. No connection acceptance from external hosts regardless of rule set.)
5363 */
5364int
5365pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5366{
5367	u_int16_t sum = 0;
5368	int hw_assist = 0;
5369	struct ip *ip;
5370
5371	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5372		return (1);
5373	if (m->m_pkthdr.len < off + len)
5374		return (1);
5375
5376	switch (p) {
5377	case IPPROTO_TCP:
5378		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5379			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5380				sum = m->m_pkthdr.csum_data;
5381			} else {
5382				ip = mtod(m, struct ip *);
5383				sum = in_pseudo(ip->ip_src.s_addr,
5384					ip->ip_dst.s_addr,
5385					htonl(m->m_pkthdr.csum_data +
5386					    IPPROTO_TCP) + ip->ip_len);
5387			}
5388			sum ^= 0xffff;
5389			++hw_assist;
5390		}
5391		break;
5392	case IPPROTO_UDP:
5393		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5394			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5395				sum = m->m_pkthdr.csum_data;
5396			} else {
5397				ip = mtod(m, struct ip *);
5398				sum = in_pseudo(ip->ip_src.s_addr,
5399					ip->ip_dst.s_addr, htonl((u_short)len +
5400					m->m_pkthdr.csum_data + IPPROTO_UDP));
5401			}
5402			sum ^= 0xffff;
5403			++hw_assist;
5404                }
5405		break;
5406	case IPPROTO_ICMP:
5407#ifdef INET6
5408	case IPPROTO_ICMPV6:
5409#endif /* INET6 */
5410		break;
5411	default:
5412		return (1);
5413	}
5414
5415	if (!hw_assist) {
5416		switch (af) {
5417		case AF_INET:
5418			if (p == IPPROTO_ICMP) {
5419				if (m->m_len < off)
5420					return (1);
5421				m->m_data += off;
5422				m->m_len -= off;
5423				sum = in_cksum(m, len);
5424				m->m_data -= off;
5425				m->m_len += off;
5426			} else {
5427				if (m->m_len < sizeof(struct ip))
5428					return (1);
5429				sum = in4_cksum(m, p, off, len);
5430				if (sum == 0) {
5431					m->m_pkthdr.csum_flags |=
5432					    (CSUM_DATA_VALID |
5433					     CSUM_PSEUDO_HDR);
5434					m->m_pkthdr.csum_data = 0xffff;
5435				}
5436			}
5437			break;
5438#ifdef INET6
5439		case AF_INET6:
5440			if (m->m_len < sizeof(struct ip6_hdr))
5441				return (1);
5442			sum = in6_cksum(m, p, off, len);
5443			/*
5444			 * XXX
5445			 * IPv6 H/W cksum off-load not supported yet!
5446			 *
5447			 * if (sum == 0) {
5448			 *	m->m_pkthdr.csum_flags |=
5449			 *	    (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
5450			 *	m->m_pkthdr.csum_data = 0xffff;
5451			 *}
5452			 */
5453			break;
5454#endif /* INET6 */
5455		default:
5456			return (1);
5457		}
5458	}
5459	if (sum) {
5460		switch (p) {
5461		case IPPROTO_TCP:
5462			tcpstat.tcps_rcvbadsum++;
5463			break;
5464		case IPPROTO_UDP:
5465			udpstat.udps_badsum++;
5466			break;
5467		case IPPROTO_ICMP:
5468			icmpstat.icps_checksum++;
5469			break;
5470#ifdef INET6
5471		case IPPROTO_ICMPV6:
5472			icmp6stat.icp6s_checksum++;
5473			break;
5474#endif /* INET6 */
5475		}
5476		return (1);
5477	}
5478	return (0);
5479}
5480#else
5481/*
5482 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5483 *   off is the offset where the protocol header starts
5484 *   len is the total length of protocol header plus payload
5485 * returns 0 when the checksum is valid, otherwise returns 1.
5486 */
5487int
5488pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
5489{
5490	u_int16_t flag_ok, flag_bad;
5491	u_int16_t sum;
5492
5493	switch (p) {
5494	case IPPROTO_TCP:
5495		flag_ok = M_TCP_CSUM_IN_OK;
5496		flag_bad = M_TCP_CSUM_IN_BAD;
5497		break;
5498	case IPPROTO_UDP:
5499		flag_ok = M_UDP_CSUM_IN_OK;
5500		flag_bad = M_UDP_CSUM_IN_BAD;
5501		break;
5502	case IPPROTO_ICMP:
5503#ifdef INET6
5504	case IPPROTO_ICMPV6:
5505#endif /* INET6 */
5506		flag_ok = flag_bad = 0;
5507		break;
5508	default:
5509		return (1);
5510	}
5511	if (m->m_pkthdr.csum & flag_ok)
5512		return (0);
5513	if (m->m_pkthdr.csum & flag_bad)
5514		return (1);
5515	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5516		return (1);
5517	if (m->m_pkthdr.len < off + len)
5518		return (1);
5519		switch (af) {
5520	case AF_INET:
5521		if (p == IPPROTO_ICMP) {
5522			if (m->m_len < off)
5523				return (1);
5524			m->m_data += off;
5525			m->m_len -= off;
5526			sum = in_cksum(m, len);
5527			m->m_data -= off;
5528			m->m_len += off;
5529		} else {
5530			if (m->m_len < sizeof(struct ip))
5531				return (1);
5532			sum = in4_cksum(m, p, off, len);
5533		}
5534		break;
5535#ifdef INET6
5536	case AF_INET6:
5537		if (m->m_len < sizeof(struct ip6_hdr))
5538			return (1);
5539		sum = in6_cksum(m, p, off, len);
5540		break;
5541#endif /* INET6 */
5542	default:
5543		return (1);
5544	}
5545	if (sum) {
5546		m->m_pkthdr.csum |= flag_bad;
5547		switch (p) {
5548		case IPPROTO_TCP:
5549			tcpstat.tcps_rcvbadsum++;
5550			break;
5551		case IPPROTO_UDP:
5552			udpstat.udps_badsum++;
5553			break;
5554		case IPPROTO_ICMP:
5555			icmpstat.icps_checksum++;
5556			break;
5557#ifdef INET6
5558		case IPPROTO_ICMPV6:
5559			icmp6stat.icp6s_checksum++;
5560			break;
5561#endif /* INET6 */
5562		}
5563		return (1);
5564	}
5565	m->m_pkthdr.csum |= flag_ok;
5566	return (0);
5567}
5568#endif
5569
5570#ifdef INET
5571int
5572pf_test(int dir, struct ifnet *ifp, struct mbuf **m0)
5573{
5574	u_short		   action, reason = 0, log = 0;
5575	struct mbuf	  *m = *m0;
5576	struct ip	  *h;
5577	struct pf_rule	  *a = NULL, *r = &pf_default_rule, *tr;
5578	struct pf_state	  *s = NULL;
5579	struct pf_ruleset *ruleset = NULL;
5580	struct pf_pdesc	   pd;
5581	int		   off;
5582	int		   pqid = 0;
5583
5584#if defined(__FreeBSD__)
5585	PF_LOCK();
5586#endif
5587	if (!pf_status.running ||
5588	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5589#if defined(__FreeBSD__)
5590		PF_UNLOCK();
5591#endif
5592	    	return (PF_PASS);
5593	}
5594
5595#if defined(__FreeBSD__) && (__FreeBSD_version >= 501000)
5596	M_ASSERTPKTHDR(m);
5597#else
5598#ifdef DIAGNOSTIC
5599	if ((m->m_flags & M_PKTHDR) == 0)
5600		panic("non-M_PKTHDR is passed to pf_test");
5601#endif
5602#endif
5603
5604	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5605		action = PF_DROP;
5606		REASON_SET(&reason, PFRES_SHORT);
5607		log = 1;
5608		goto done;
5609	}
5610
5611	/* We do IP header normalization and packet reassembly here */
5612	if (pf_normalize_ip(m0, dir, ifp, &reason) != PF_PASS) {
5613		action = PF_DROP;
5614		goto done;
5615	}
5616	m = *m0;
5617	h = mtod(m, struct ip *);
5618
5619	off = h->ip_hl << 2;
5620	if (off < (int)sizeof(*h)) {
5621		action = PF_DROP;
5622		REASON_SET(&reason, PFRES_SHORT);
5623		log = 1;
5624		goto done;
5625	}
5626
5627	memset(&pd, 0, sizeof(pd));
5628	pd.src = (struct pf_addr *)&h->ip_src;
5629	pd.dst = (struct pf_addr *)&h->ip_dst;
5630	pd.ip_sum = &h->ip_sum;
5631	pd.proto = h->ip_p;
5632	pd.af = AF_INET;
5633	pd.tos = h->ip_tos;
5634	pd.tot_len = ntohs(h->ip_len);
5635
5636	/* handle fragments that didn't get reassembled by normalization */
5637	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
5638		action = pf_test_fragment(&r, dir, ifp, m, h,
5639		    &pd, &a, &ruleset);
5640		goto done;
5641	}
5642
5643	switch (h->ip_p) {
5644
5645	case IPPROTO_TCP: {
5646		struct tcphdr	th;
5647
5648		pd.hdr.tcp = &th;
5649		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5650		    &action, &reason, AF_INET)) {
5651			log = action != PF_PASS;
5652			goto done;
5653		}
5654		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5655		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
5656			action = PF_DROP;
5657			goto done;
5658		}
5659		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5660		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5661			pqid = 1;
5662		action = pf_normalize_tcp(dir, ifp, m, 0, off, h, &pd);
5663		if (action == PF_DROP)
5664			break;
5665		action = pf_test_state_tcp(&s, dir, ifp, m, 0, off, h, &pd,
5666		    &reason);
5667		if (action == PF_PASS) {
5668			r = s->rule.ptr;
5669			log = s->log;
5670		} else if (s == NULL)
5671			action = pf_test_tcp(&r, &s, dir, ifp,
5672			    m, 0, off, h, &pd, &a, &ruleset);
5673		break;
5674	}
5675
5676	case IPPROTO_UDP: {
5677		struct udphdr	uh;
5678
5679		pd.hdr.udp = &uh;
5680		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5681		    &action, &reason, AF_INET)) {
5682			log = action != PF_PASS;
5683			goto done;
5684		}
5685		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
5686		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
5687			action = PF_DROP;
5688			goto done;
5689		}
5690		action = pf_test_state_udp(&s, dir, ifp, m, 0, off, h, &pd);
5691		if (action == PF_PASS) {
5692			r = s->rule.ptr;
5693			a = s->anchor.ptr;
5694			log = s->log;
5695		} else if (s == NULL)
5696			action = pf_test_udp(&r, &s, dir, ifp,
5697			    m, 0, off, h, &pd, &a, &ruleset);
5698		break;
5699	}
5700
5701	case IPPROTO_ICMP: {
5702		struct icmp	ih;
5703
5704		pd.hdr.icmp = &ih;
5705		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5706		    &action, &reason, AF_INET)) {
5707			log = action != PF_PASS;
5708			goto done;
5709		}
5710		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5711		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
5712			action = PF_DROP;
5713			goto done;
5714		}
5715		action = pf_test_state_icmp(&s, dir, ifp, m, 0, off, h, &pd);
5716		if (action == PF_PASS) {
5717			r = s->rule.ptr;
5718			r->packets++;
5719			r->bytes += ntohs(h->ip_len);
5720			a = s->anchor.ptr;
5721			if (a != NULL) {
5722				a->packets++;
5723				a->bytes += ntohs(h->ip_len);
5724			}
5725			log = s->log;
5726		} else if (s == NULL)
5727			action = pf_test_icmp(&r, &s, dir, ifp,
5728			    m, 0, off, h, &pd, &a, &ruleset);
5729		break;
5730	}
5731
5732	default:
5733		action = pf_test_state_other(&s, dir, ifp, &pd);
5734		if (action == PF_PASS) {
5735			r = s->rule.ptr;
5736			a = s->anchor.ptr;
5737			log = s->log;
5738		} else if (s == NULL)
5739			action = pf_test_other(&r, &s, dir, ifp, m, off, h,
5740			    &pd, &a, &ruleset);
5741		break;
5742	}
5743
5744	if (ifp == status_ifp) {
5745		pf_status.bcounters[0][dir == PF_OUT] += pd.tot_len;
5746		pf_status.pcounters[0][dir == PF_OUT][action != PF_PASS]++;
5747	}
5748
5749done:
5750	tr = r;
5751	if (r == &pf_default_rule && s != NULL && s->nat_rule.ptr != NULL)
5752		tr = s->nat_rule.ptr;
5753	if (tr->src.addr.type == PF_ADDR_TABLE)
5754		pfr_update_stats(tr->src.addr.p.tbl,
5755		    (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af,
5756		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
5757		    tr->src.not);
5758	if (tr->dst.addr.type == PF_ADDR_TABLE)
5759		pfr_update_stats(tr->dst.addr.p.tbl,
5760		    (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af,
5761		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
5762		    tr->dst.not);
5763
5764	if (action == PF_PASS && h->ip_hl > 5 &&
5765	    !((s && s->allow_opts) || r->allow_opts)) {
5766		action = PF_DROP;
5767		REASON_SET(&reason, PFRES_SHORT);
5768		log = 1;
5769		DPFPRINTF(PF_DEBUG_MISC,
5770		    ("pf: dropping packet with ip options\n"));
5771	}
5772
5773#ifdef ALTQ
5774	if (action == PF_PASS && r->qid) {
5775		struct m_tag	*mtag;
5776		struct altq_tag	*atag;
5777
5778		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
5779		if (mtag != NULL) {
5780			atag = (struct altq_tag *)(mtag + 1);
5781			if (pqid || pd.tos == IPTOS_LOWDELAY)
5782				atag->qid = r->pqid;
5783			else
5784				atag->qid = r->qid;
5785			/* add hints for ecn */
5786			atag->af = AF_INET;
5787			atag->hdr = h;
5788			m_tag_prepend(m, mtag);
5789		}
5790	}
5791#endif
5792
5793	if (log)
5794		PFLOG_PACKET(ifp, h, m, AF_INET, dir, reason, r, a, ruleset);
5795
5796	if (action == PF_SYNPROXY_DROP) {
5797		m_freem(*m0);
5798		*m0 = NULL;
5799		action = PF_PASS;
5800	} else if (r->rt)
5801		/* pf_route can free the mbuf causing *m0 to become NULL */
5802		pf_route(m0, r, dir, ifp, s);
5803
5804#if defined(__FreeBSD__)
5805	PF_UNLOCK();
5806#endif
5807
5808	return (action);
5809}
5810#endif /* INET */
5811
5812#ifdef INET6
5813int
5814pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0)
5815{
5816	u_short		   action, reason = 0, log = 0;
5817	struct mbuf	  *m = *m0;
5818	struct ip6_hdr	  *h;
5819	struct pf_rule	  *a = NULL, *r = &pf_default_rule, *tr;
5820	struct pf_state	  *s = NULL;
5821	struct pf_ruleset *ruleset = NULL;
5822	struct pf_pdesc    pd;
5823	int		   off, terminal = 0;
5824
5825#if defined(__FreeBSD__)
5826	PF_LOCK();
5827#endif
5828
5829	if (!pf_status.running ||
5830	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) {
5831#if defined(__FreeBSD__)
5832		PF_UNLOCK();
5833#endif
5834		return (PF_PASS);
5835	}
5836
5837#if defined(__FreeBSD__) && (__FreeBSD_version >= 501000)
5838	M_ASSERTPKTHDR(m);
5839#else
5840#ifdef DIAGNOSTIC
5841	if ((m->m_flags & M_PKTHDR) == 0)
5842		panic("non-M_PKTHDR is passed to pf_test");
5843#endif
5844#endif
5845
5846	if (m->m_pkthdr.len < (int)sizeof(*h)) {
5847		action = PF_DROP;
5848		REASON_SET(&reason, PFRES_SHORT);
5849		log = 1;
5850		goto done;
5851	}
5852
5853	/* We do IP header normalization and packet reassembly here */
5854	if (pf_normalize_ip6(m0, dir, ifp, &reason) != PF_PASS) {
5855		action = PF_DROP;
5856		goto done;
5857	}
5858	m = *m0;
5859	h = mtod(m, struct ip6_hdr *);
5860
5861	memset(&pd, 0, sizeof(pd));
5862	pd.src = (struct pf_addr *)&h->ip6_src;
5863	pd.dst = (struct pf_addr *)&h->ip6_dst;
5864	pd.ip_sum = NULL;
5865	pd.af = AF_INET6;
5866	pd.tos = 0;
5867	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
5868
5869	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
5870	pd.proto = h->ip6_nxt;
5871	do {
5872		switch (pd.proto) {
5873		case IPPROTO_FRAGMENT:
5874			action = pf_test_fragment(&r, dir, ifp, m, h,
5875			    &pd, &a, &ruleset);
5876			if (action == PF_DROP)
5877				REASON_SET(&reason, PFRES_FRAG);
5878			goto done;
5879		case IPPROTO_AH:
5880		case IPPROTO_HOPOPTS:
5881		case IPPROTO_ROUTING:
5882		case IPPROTO_DSTOPTS: {
5883			/* get next header and header length */
5884			struct ip6_ext	opt6;
5885
5886			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
5887			    NULL, NULL, pd.af)) {
5888				DPFPRINTF(PF_DEBUG_MISC,
5889				    ("pf: IPv6 short opt\n"));
5890				action = PF_DROP;
5891				REASON_SET(&reason, PFRES_SHORT);
5892				log = 1;
5893				goto done;
5894			}
5895			if (pd.proto == IPPROTO_AH)
5896				off += (opt6.ip6e_len + 2) * 4;
5897			else
5898				off += (opt6.ip6e_len + 1) * 8;
5899			pd.proto = opt6.ip6e_nxt;
5900			/* goto the next header */
5901			break;
5902		}
5903		default:
5904			terminal++;
5905			break;
5906		}
5907	} while (!terminal);
5908
5909	switch (pd.proto) {
5910
5911	case IPPROTO_TCP: {
5912		struct tcphdr	th;
5913
5914		pd.hdr.tcp = &th;
5915		if (!pf_pull_hdr(m, off, &th, sizeof(th),
5916		    &action, &reason, AF_INET6)) {
5917			log = action != PF_PASS;
5918			goto done;
5919		}
5920		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5921		    ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) {
5922			action = PF_DROP;
5923			goto done;
5924		}
5925		pd.p_len = pd.tot_len - off - (th.th_off << 2);
5926		action = pf_normalize_tcp(dir, ifp, m, 0, off, h, &pd);
5927		if (action == PF_DROP)
5928			break;
5929		action = pf_test_state_tcp(&s, dir, ifp, m, 0, off, h, &pd,
5930		    &reason);
5931		if (action == PF_PASS) {
5932			r = s->rule.ptr;
5933			log = s->log;
5934		} else if (s == NULL)
5935			action = pf_test_tcp(&r, &s, dir, ifp,
5936			    m, 0, off, h, &pd, &a, &ruleset);
5937		break;
5938	}
5939
5940	case IPPROTO_UDP: {
5941		struct udphdr	uh;
5942
5943		pd.hdr.udp = &uh;
5944		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5945		    &action, &reason, AF_INET6)) {
5946			log = action != PF_PASS;
5947			goto done;
5948		}
5949		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
5950		    off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) {
5951			action = PF_DROP;
5952			goto done;
5953		}
5954		action = pf_test_state_udp(&s, dir, ifp, m, 0, off, h, &pd);
5955		if (action == PF_PASS) {
5956			r = s->rule.ptr;
5957			log = s->log;
5958		} else if (s == NULL)
5959			action = pf_test_udp(&r, &s, dir, ifp,
5960			    m, 0, off, h, &pd, &a, &ruleset);
5961		break;
5962	}
5963
5964	case IPPROTO_ICMPV6: {
5965		struct icmp6_hdr	ih;
5966
5967		pd.hdr.icmp6 = &ih;
5968		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
5969		    &action, &reason, AF_INET6)) {
5970			log = action != PF_PASS;
5971			goto done;
5972		}
5973		if (dir == PF_IN && pf_check_proto_cksum(m, off,
5974		    ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) {
5975			action = PF_DROP;
5976			goto done;
5977		}
5978		action = pf_test_state_icmp(&s, dir, ifp,
5979		    m, 0, off, h, &pd);
5980		if (action == PF_PASS) {
5981			r = s->rule.ptr;
5982			r->packets++;
5983			r->bytes += h->ip6_plen;
5984			log = s->log;
5985		} else if (s == NULL)
5986			action = pf_test_icmp(&r, &s, dir, ifp,
5987			    m, 0, off, h, &pd, &a, &ruleset);
5988		break;
5989	}
5990
5991	default:
5992		action = pf_test_other(&r, &s, dir, ifp, m, off, h,
5993		    &pd, &a, &ruleset);
5994		break;
5995	}
5996
5997	if (ifp == status_ifp) {
5998		pf_status.bcounters[1][dir == PF_OUT] += pd.tot_len;
5999		pf_status.pcounters[1][dir == PF_OUT][action != PF_PASS]++;
6000	}
6001
6002done:
6003	tr = r;
6004	if (r == &pf_default_rule && s != NULL && s->nat_rule.ptr != NULL)
6005		tr = s->nat_rule.ptr;
6006	if (tr->src.addr.type == PF_ADDR_TABLE)
6007		pfr_update_stats(tr->src.addr.p.tbl,
6008		    (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af,
6009		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6010		    tr->src.not);
6011	if (tr->dst.addr.type == PF_ADDR_TABLE)
6012		pfr_update_stats(tr->dst.addr.p.tbl,
6013		    (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af,
6014		    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6015		    tr->dst.not);
6016
6017	/* XXX handle IPv6 options, if not allowed. not implemented. */
6018
6019#ifdef ALTQ
6020	if (action == PF_PASS && r->qid) {
6021		struct m_tag	*mtag;
6022		struct altq_tag	*atag;
6023
6024		mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT);
6025		if (mtag != NULL) {
6026			atag = (struct altq_tag *)(mtag + 1);
6027			if (pd.tos == IPTOS_LOWDELAY)
6028				atag->qid = r->pqid;
6029			else
6030				atag->qid = r->qid;
6031			/* add hints for ecn */
6032			atag->af = AF_INET6;
6033			atag->hdr = h;
6034			m_tag_prepend(m, mtag);
6035		}
6036	}
6037#endif
6038
6039	if (log)
6040		PFLOG_PACKET(ifp, h, m, AF_INET6, dir, reason, r, a, ruleset);
6041
6042	if (action == PF_SYNPROXY_DROP) {
6043		m_freem(*m0);
6044		*m0 = NULL;
6045		action = PF_PASS;
6046	} else if (r->rt)
6047		/* pf_route6 can free the mbuf causing *m0 to become NULL */
6048		pf_route6(m0, r, dir, ifp, s);
6049
6050#if defined(__FreeBSD__)
6051	PF_UNLOCK();
6052#endif
6053	return (action);
6054}
6055#endif /* INET6 */
6056