1244769Sglebius/*-
2126258Smlaier * Copyright 2001 Niels Provos <provos@citi.umich.edu>
3284569Skp * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org>
4126258Smlaier * All rights reserved.
5126258Smlaier *
6126258Smlaier * Redistribution and use in source and binary forms, with or without
7126258Smlaier * modification, are permitted provided that the following conditions
8126258Smlaier * are met:
9126258Smlaier * 1. Redistributions of source code must retain the above copyright
10126258Smlaier *    notice, this list of conditions and the following disclaimer.
11126258Smlaier * 2. Redistributions in binary form must reproduce the above copyright
12126258Smlaier *    notice, this list of conditions and the following disclaimer in the
13126258Smlaier *    documentation and/or other materials provided with the distribution.
14126258Smlaier *
15126258Smlaier * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16126258Smlaier * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17126258Smlaier * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18126258Smlaier * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19126258Smlaier * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20126258Smlaier * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21126258Smlaier * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22126258Smlaier * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23126258Smlaier * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24126258Smlaier * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25244769Sglebius *
26244769Sglebius *	$OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $
27126258Smlaier */
28126258Smlaier
29240233Sglebius#include <sys/cdefs.h>
30240233Sglebius__FBSDID("$FreeBSD: stable/10/sys/netpfil/pf/pf_norm.c 338106 2018-08-20 15:43:08Z kp $");
31240233Sglebius
32126261Smlaier#include "opt_inet.h"
33126261Smlaier#include "opt_inet6.h"
34126261Smlaier#include "opt_pf.h"
35171168Smlaier
36126258Smlaier#include <sys/param.h>
37240233Sglebius#include <sys/lock.h>
38126258Smlaier#include <sys/mbuf.h>
39240233Sglebius#include <sys/mutex.h>
40240233Sglebius#include <sys/refcount.h>
41240233Sglebius#include <sys/rwlock.h>
42126258Smlaier#include <sys/socket.h>
43126258Smlaier
44126258Smlaier#include <net/if.h>
45240233Sglebius#include <net/vnet.h>
46240233Sglebius#include <net/pfvar.h>
47126258Smlaier#include <net/if_pflog.h>
48126258Smlaier
49126258Smlaier#include <netinet/in.h>
50126258Smlaier#include <netinet/ip.h>
51126258Smlaier#include <netinet/ip_var.h>
52284569Skp#include <netinet6/ip6_var.h>
53126258Smlaier#include <netinet/tcp.h>
54240233Sglebius#include <netinet/tcp_fsm.h>
55126258Smlaier#include <netinet/tcp_seq.h>
56126258Smlaier
57126258Smlaier#ifdef INET6
58126258Smlaier#include <netinet/ip6.h>
59126258Smlaier#endif /* INET6 */
60126258Smlaier
61126258Smlaierstruct pf_frent {
62284569Skp	TAILQ_ENTRY(pf_frent)	fr_next;
63284569Skp	struct mbuf	*fe_m;
64284569Skp	uint16_t	fe_hdrlen;	/* ipv4 header lenght with ip options
65284569Skp					   ipv6, extension, fragment header */
66284569Skp	uint16_t	fe_extoff;	/* last extension header offset or 0 */
67284569Skp	uint16_t	fe_len;		/* fragment length */
68284569Skp	uint16_t	fe_off;		/* fragment offset */
69284569Skp	uint16_t	fe_mff;		/* more fragment flag */
70126258Smlaier};
71126258Smlaier
72284569Skpstruct pf_fragment_cmp {
73284569Skp	struct pf_addr	frc_src;
74284569Skp	struct pf_addr	frc_dst;
75284569Skp	uint32_t	frc_id;
76284569Skp	sa_family_t	frc_af;
77284569Skp	uint8_t		frc_proto;
78284569Skp};
79284569Skp
80126258Smlaierstruct pf_fragment {
81284569Skp	struct pf_fragment_cmp	fr_key;
82284569Skp#define fr_src	fr_key.frc_src
83284569Skp#define fr_dst	fr_key.frc_dst
84284569Skp#define fr_id	fr_key.frc_id
85284569Skp#define fr_af	fr_key.frc_af
86284569Skp#define fr_proto	fr_key.frc_proto
87284569Skp
88126258Smlaier	RB_ENTRY(pf_fragment) fr_entry;
89126258Smlaier	TAILQ_ENTRY(pf_fragment) frag_next;
90284569Skp	uint8_t		fr_flags;	/* status flags */
91284569Skp#define PFFRAG_SEENLAST		0x0001	/* Seen the last fragment for this */
92284569Skp#define PFFRAG_NOBUFFER		0x0002	/* Non-buffering fragment cache */
93284569Skp#define PFFRAG_DROP		0x0004	/* Drop all fragments */
94240233Sglebius#define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
95284569Skp	uint16_t	fr_max;		/* fragment data max */
96284569Skp	uint32_t	fr_timeout;
97284569Skp	uint16_t	fr_maxlen;	/* maximum length of single fragment */
98338106Skp	uint16_t	fr_entries;	/* Total number of pf_fragment entries */
99284569Skp	TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
100126258Smlaier};
101338106Skp#define PF_MAX_FRENT_PER_FRAGMENT	64
102126258Smlaier
103284569Skpstruct pf_fragment_tag {
104284569Skp	uint16_t	ft_hdrlen;	/* header length of reassembled pkt */
105284569Skp	uint16_t	ft_extoff;	/* last extension header offset or 0 */
106284569Skp	uint16_t	ft_maxlen;	/* maximum fragment payload length */
107284572Skp	uint32_t	ft_id;		/* fragment id */
108284569Skp};
109284569Skp
110240233Sglebiusstatic struct mtx pf_frag_mtx;
111240233Sglebius#define PF_FRAG_LOCK()		mtx_lock(&pf_frag_mtx)
112240233Sglebius#define PF_FRAG_UNLOCK()	mtx_unlock(&pf_frag_mtx)
113240233Sglebius#define PF_FRAG_ASSERT()	mtx_assert(&pf_frag_mtx, MA_OWNED)
114240233Sglebius
115240233SglebiusVNET_DEFINE(uma_zone_t, pf_state_scrub_z);	/* XXX: shared with pfsync */
116240233Sglebius
117240233Sglebiusstatic VNET_DEFINE(uma_zone_t, pf_frent_z);
118240233Sglebius#define	V_pf_frent_z	VNET(pf_frent_z)
119240233Sglebiusstatic VNET_DEFINE(uma_zone_t, pf_frag_z);
120240233Sglebius#define	V_pf_frag_z	VNET(pf_frag_z)
121240233Sglebius
122223637SbzTAILQ_HEAD(pf_fragqueue, pf_fragment);
123223637SbzTAILQ_HEAD(pf_cachequeue, pf_fragment);
124240233Sglebiusstatic VNET_DEFINE(struct pf_fragqueue,	pf_fragqueue);
125223637Sbz#define	V_pf_fragqueue			VNET(pf_fragqueue)
126240233Sglebiusstatic VNET_DEFINE(struct pf_cachequeue,	pf_cachequeue);
127223637Sbz#define	V_pf_cachequeue			VNET(pf_cachequeue)
128223637SbzRB_HEAD(pf_frag_tree, pf_fragment);
129240233Sglebiusstatic VNET_DEFINE(struct pf_frag_tree,	pf_frag_tree);
130223637Sbz#define	V_pf_frag_tree			VNET(pf_frag_tree)
131240233Sglebiusstatic VNET_DEFINE(struct pf_frag_tree,	pf_cache_tree);
132223637Sbz#define	V_pf_cache_tree			VNET(pf_cache_tree)
133240233Sglebiusstatic int		 pf_frag_compare(struct pf_fragment *,
134240233Sglebius			    struct pf_fragment *);
135240233Sglebiusstatic RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
136240233Sglebiusstatic RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
137126258Smlaier
138284579Skpstatic void	pf_flush_fragments(void);
139284579Skpstatic void	pf_free_fragment(struct pf_fragment *);
140284579Skpstatic void	pf_remove_fragment(struct pf_fragment *);
141284579Skpstatic int	pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
142284579Skp		    struct tcphdr *, int, sa_family_t);
143284579Skpstatic struct pf_frent *pf_create_fragment(u_short *);
144284569Skpstatic struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
145284579Skp		    struct pf_frag_tree *tree);
146284569Skpstatic struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
147284579Skp		    struct pf_frent *, u_short *);
148284579Skpstatic int	pf_isfull_fragment(struct pf_fragment *);
149284579Skpstatic struct mbuf *pf_join_fragment(struct pf_fragment *);
150284579Skp#ifdef INET
151284579Skpstatic void	pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t);
152284579Skpstatic int	pf_reassemble(struct mbuf **, struct ip *, int, u_short *);
153284579Skpstatic struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
154284579Skp		    struct pf_fragment **, int, int, int *);
155284579Skp#endif	/* INET */
156284579Skp#ifdef INET6
157284579Skpstatic int	pf_reassemble6(struct mbuf **, struct ip6_hdr *,
158284581Skp		    struct ip6_frag *, uint16_t, uint16_t, u_short *);
159284579Skpstatic void	pf_scrub_ip6(struct mbuf **, uint8_t);
160284579Skp#endif	/* INET6 */
161284569Skp
162145836Smlaier#define	DPFPRINTF(x) do {				\
163223637Sbz	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
164223637Sbz		printf("%s: ", __func__);		\
165223637Sbz		printf x ;				\
166223637Sbz	}						\
167223637Sbz} while(0)
168126258Smlaier
169284579Skp#ifdef INET
170284569Skpstatic void
171284569Skppf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key)
172284569Skp{
173284569Skp
174284569Skp	key->frc_src.v4 = ip->ip_src;
175284569Skp	key->frc_dst.v4 = ip->ip_dst;
176284569Skp	key->frc_af = AF_INET;
177284569Skp	key->frc_proto = ip->ip_p;
178284569Skp	key->frc_id = ip->ip_id;
179284569Skp}
180284579Skp#endif	/* INET */
181284569Skp
182126258Smlaiervoid
183126258Smlaierpf_normalize_init(void)
184126258Smlaier{
185126258Smlaier
186240233Sglebius	V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment),
187240233Sglebius	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
188240233Sglebius	V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent),
189240233Sglebius	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
190240233Sglebius	V_pf_state_scrub_z = uma_zcreate("pf state scrubs",
191240233Sglebius	    sizeof(struct pf_state_scrub),  NULL, NULL, NULL, NULL,
192240233Sglebius	    UMA_ALIGN_PTR, 0);
193126258Smlaier
194240233Sglebius	V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z;
195240233Sglebius	V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
196240233Sglebius	uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT);
197244347Spjd	uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached");
198240233Sglebius
199240233Sglebius	mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF);
200240233Sglebius
201223637Sbz	TAILQ_INIT(&V_pf_fragqueue);
202223637Sbz	TAILQ_INIT(&V_pf_cachequeue);
203126258Smlaier}
204126258Smlaier
205240233Sglebiusvoid
206240233Sglebiuspf_normalize_cleanup(void)
207240233Sglebius{
208240233Sglebius
209240233Sglebius	uma_zdestroy(V_pf_state_scrub_z);
210240233Sglebius	uma_zdestroy(V_pf_frent_z);
211240233Sglebius	uma_zdestroy(V_pf_frag_z);
212240233Sglebius
213240233Sglebius	mtx_destroy(&pf_frag_mtx);
214240233Sglebius}
215240233Sglebius
216126409Smlaierstatic int
217126258Smlaierpf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
218126258Smlaier{
219126258Smlaier	int	diff;
220126258Smlaier
221284569Skp	if ((diff = a->fr_id - b->fr_id) != 0)
222126258Smlaier		return (diff);
223284569Skp	if ((diff = a->fr_proto - b->fr_proto) != 0)
224126258Smlaier		return (diff);
225284569Skp	if ((diff = a->fr_af - b->fr_af) != 0)
226284569Skp		return (diff);
227284569Skp	if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
228284569Skp		return (diff);
229284569Skp	if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
230284569Skp		return (diff);
231126258Smlaier	return (0);
232126258Smlaier}
233126258Smlaier
234126258Smlaiervoid
235126258Smlaierpf_purge_expired_fragments(void)
236126258Smlaier{
237126258Smlaier	struct pf_fragment	*frag;
238240233Sglebius	u_int32_t		 expire = time_uptime -
239223637Sbz				    V_pf_default_rule.timeout[PFTM_FRAG];
240126258Smlaier
241240233Sglebius	PF_FRAG_LOCK();
242223637Sbz	while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
243126261Smlaier		KASSERT((BUFFER_FRAGMENTS(frag)),
244223637Sbz		    ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
245126258Smlaier		if (frag->fr_timeout > expire)
246126258Smlaier			break;
247126258Smlaier
248126258Smlaier		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
249126258Smlaier		pf_free_fragment(frag);
250126258Smlaier	}
251126258Smlaier
252223637Sbz	while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
253126261Smlaier		KASSERT((!BUFFER_FRAGMENTS(frag)),
254223637Sbz		    ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
255126258Smlaier		if (frag->fr_timeout > expire)
256126258Smlaier			break;
257126258Smlaier
258126258Smlaier		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
259126258Smlaier		pf_free_fragment(frag);
260223637Sbz		KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
261223637Sbz		    TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
262126261Smlaier		    ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
263126261Smlaier		    __FUNCTION__));
264126258Smlaier	}
265240233Sglebius	PF_FRAG_UNLOCK();
266126258Smlaier}
267126258Smlaier
268126258Smlaier/*
269126258Smlaier * Try to flush old fragments to make space for new ones
270126258Smlaier */
271240233Sglebiusstatic void
272126258Smlaierpf_flush_fragments(void)
273126258Smlaier{
274240233Sglebius	struct pf_fragment	*frag, *cache;
275126258Smlaier	int			 goal;
276126258Smlaier
277240233Sglebius	PF_FRAG_ASSERT();
278240233Sglebius
279240233Sglebius	goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
280240233Sglebius	DPFPRINTF(("trying to free %d frag entriess\n", goal));
281240233Sglebius	while (goal < uma_zone_get_cur(V_pf_frent_z)) {
282223637Sbz		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
283240233Sglebius		if (frag)
284240233Sglebius			pf_free_fragment(frag);
285240233Sglebius		cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
286240233Sglebius		if (cache)
287240233Sglebius			pf_free_fragment(cache);
288240233Sglebius		if (frag == NULL && cache == NULL)
289126258Smlaier			break;
290126258Smlaier	}
291126258Smlaier}
292126258Smlaier
293126258Smlaier/* Frees the fragments and all associated entries */
294240233Sglebiusstatic void
295126258Smlaierpf_free_fragment(struct pf_fragment *frag)
296126258Smlaier{
297126258Smlaier	struct pf_frent		*frent;
298126258Smlaier
299240233Sglebius	PF_FRAG_ASSERT();
300240233Sglebius
301126258Smlaier	/* Free all fragments */
302126258Smlaier	if (BUFFER_FRAGMENTS(frag)) {
303284569Skp		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
304284569Skp		    frent = TAILQ_FIRST(&frag->fr_queue)) {
305284569Skp			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
306126258Smlaier
307284569Skp			m_freem(frent->fe_m);
308240233Sglebius			uma_zfree(V_pf_frent_z, frent);
309126258Smlaier		}
310126258Smlaier	} else {
311284569Skp		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
312284569Skp		    frent = TAILQ_FIRST(&frag->fr_queue)) {
313284569Skp			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
314126258Smlaier
315284569Skp			KASSERT((TAILQ_EMPTY(&frag->fr_queue) ||
316284569Skp			    TAILQ_FIRST(&frag->fr_queue)->fe_off >
317284569Skp			    frent->fe_len),
318284569Skp			    ("! (TAILQ_EMPTY() || TAILQ_FIRST()->fe_off >"
319284569Skp			    " frent->fe_len): %s", __func__));
320223637Sbz
321240233Sglebius			uma_zfree(V_pf_frent_z, frent);
322126258Smlaier		}
323126258Smlaier	}
324126258Smlaier
325126258Smlaier	pf_remove_fragment(frag);
326126258Smlaier}
327126258Smlaier
328240233Sglebiusstatic struct pf_fragment *
329284569Skppf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
330126258Smlaier{
331126258Smlaier	struct pf_fragment	*frag;
332126258Smlaier
333240233Sglebius	PF_FRAG_ASSERT();
334240233Sglebius
335284569Skp	frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
336126258Smlaier	if (frag != NULL) {
337126258Smlaier		/* XXX Are we sure we want to update the timeout? */
338240233Sglebius		frag->fr_timeout = time_uptime;
339126258Smlaier		if (BUFFER_FRAGMENTS(frag)) {
340223637Sbz			TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
341223637Sbz			TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
342126258Smlaier		} else {
343223637Sbz			TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
344223637Sbz			TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
345126258Smlaier		}
346126258Smlaier	}
347126258Smlaier
348126258Smlaier	return (frag);
349126258Smlaier}
350126258Smlaier
351126258Smlaier/* Removes a fragment from the fragment queue and frees the fragment */
352240233Sglebiusstatic void
353126258Smlaierpf_remove_fragment(struct pf_fragment *frag)
354126258Smlaier{
355240233Sglebius
356240233Sglebius	PF_FRAG_ASSERT();
357240233Sglebius
358126258Smlaier	if (BUFFER_FRAGMENTS(frag)) {
359223637Sbz		RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
360223637Sbz		TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
361240233Sglebius		uma_zfree(V_pf_frag_z, frag);
362126258Smlaier	} else {
363223637Sbz		RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
364223637Sbz		TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
365240233Sglebius		uma_zfree(V_pf_frag_z, frag);
366126258Smlaier	}
367126258Smlaier}
368126258Smlaier
369284579Skpstatic struct pf_frent *
370284569Skppf_create_fragment(u_short *reason)
371126258Smlaier{
372284569Skp	struct pf_frent *frent;
373126258Smlaier
374240233Sglebius	PF_FRAG_ASSERT();
375126258Smlaier
376284569Skp	frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
377284569Skp	if (frent == NULL) {
378284569Skp		pf_flush_fragments();
379284569Skp		frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
380284569Skp		if (frent == NULL) {
381284569Skp			REASON_SET(reason, PFRES_MEMORY);
382284569Skp			return (NULL);
383284569Skp		}
384284569Skp	}
385126258Smlaier
386284569Skp	return (frent);
387284569Skp}
388284569Skp
389284569Skpstruct pf_fragment *
390284569Skppf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
391284569Skp		u_short *reason)
392284569Skp{
393284569Skp	struct pf_frent		*after, *next, *prev;
394284569Skp	struct pf_fragment	*frag;
395284569Skp	uint16_t		total;
396284569Skp
397284569Skp	PF_FRAG_ASSERT();
398284569Skp
399284569Skp	/* No empty fragments. */
400284569Skp	if (frent->fe_len == 0) {
401284569Skp		DPFPRINTF(("bad fragment: len 0"));
402284569Skp		goto bad_fragment;
403284569Skp	}
404284569Skp
405284569Skp	/* All fragments are 8 byte aligned. */
406284569Skp	if (frent->fe_mff && (frent->fe_len & 0x7)) {
407284569Skp		DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len));
408284569Skp		goto bad_fragment;
409284569Skp	}
410284569Skp
411284569Skp	/* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
412284569Skp	if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
413284569Skp		DPFPRINTF(("bad fragment: max packet %d",
414284569Skp		    frent->fe_off + frent->fe_len));
415284569Skp		goto bad_fragment;
416284569Skp	}
417284569Skp
418284569Skp	DPFPRINTF((key->frc_af == AF_INET ?
419284569Skp	    "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
420284569Skp	    key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len));
421284569Skp
422284569Skp	/* Fully buffer all of the fragments in this fragment queue. */
423284569Skp	frag = pf_find_fragment(key, &V_pf_frag_tree);
424284569Skp
425284569Skp	/* Create a new reassembly queue for this packet. */
426284569Skp	if (frag == NULL) {
427284569Skp		frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
428284569Skp		if (frag == NULL) {
429126258Smlaier			pf_flush_fragments();
430284569Skp			frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
431284569Skp			if (frag == NULL) {
432284569Skp				REASON_SET(reason, PFRES_MEMORY);
433126258Smlaier				goto drop_fragment;
434284569Skp			}
435126258Smlaier		}
436126258Smlaier
437284569Skp		*(struct pf_fragment_cmp *)frag = *key;
438286079Sgjb		frag->fr_flags = 0;
439300552Skp		frag->fr_timeout = time_uptime;
440284569Skp		frag->fr_maxlen = frent->fe_len;
441338106Skp		frag->fr_entries = 0;
442284569Skp		TAILQ_INIT(&frag->fr_queue);
443126258Smlaier
444284569Skp		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
445284569Skp		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
446126258Smlaier
447284569Skp		/* We do not have a previous fragment. */
448284569Skp		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
449284569Skp
450284569Skp		return (frag);
451126258Smlaier	}
452126258Smlaier
453338106Skp	if (frag->fr_entries >= PF_MAX_FRENT_PER_FRAGMENT)
454338106Skp		goto bad_fragment;
455338106Skp
456284569Skp	KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
457284569Skp
458284569Skp	/* Remember maximum fragment len for refragmentation. */
459284569Skp	if (frent->fe_len > frag->fr_maxlen)
460284569Skp		frag->fr_maxlen = frent->fe_len;
461284569Skp
462284569Skp	/* Maximum data we have seen already. */
463284569Skp	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
464284569Skp		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
465284569Skp
466284569Skp	/* Non terminal fragments must have more fragments flag. */
467284569Skp	if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
468284569Skp		goto bad_fragment;
469284569Skp
470284569Skp	/* Check if we saw the last fragment already. */
471284569Skp	if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
472284569Skp		if (frent->fe_off + frent->fe_len > total ||
473284569Skp		    (frent->fe_off + frent->fe_len == total && frent->fe_mff))
474284569Skp			goto bad_fragment;
475284569Skp	} else {
476284569Skp		if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
477284569Skp			goto bad_fragment;
478284569Skp	}
479284569Skp
480284569Skp	/* Find a fragment after the current one. */
481284569Skp	prev = NULL;
482284569Skp	TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
483284569Skp		if (after->fe_off > frent->fe_off)
484126258Smlaier			break;
485284569Skp		prev = after;
486126258Smlaier	}
487126258Smlaier
488284569Skp	KASSERT(prev != NULL || after != NULL,
489284569Skp	    ("prev != NULL || after != NULL"));
490126258Smlaier
491284569Skp	if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
492284569Skp		uint16_t precut;
493126258Smlaier
494284569Skp		precut = prev->fe_off + prev->fe_len - frent->fe_off;
495284569Skp		if (precut >= frent->fe_len)
496284569Skp			goto bad_fragment;
497284569Skp		DPFPRINTF(("overlap -%d", precut));
498284569Skp		m_adj(frent->fe_m, precut);
499284569Skp		frent->fe_off += precut;
500284569Skp		frent->fe_len -= precut;
501126258Smlaier	}
502126258Smlaier
503284569Skp	for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
504284569Skp	    after = next) {
505284569Skp		uint16_t aftercut;
506126258Smlaier
507284569Skp		aftercut = frent->fe_off + frent->fe_len - after->fe_off;
508284569Skp		DPFPRINTF(("adjust overlap %d", aftercut));
509284569Skp		if (aftercut < after->fe_len) {
510284569Skp			m_adj(after->fe_m, aftercut);
511284569Skp			after->fe_off += aftercut;
512284569Skp			after->fe_len -= aftercut;
513126258Smlaier			break;
514126258Smlaier		}
515126258Smlaier
516284569Skp		/* This fragment is completely overlapped, lose it. */
517284569Skp		next = TAILQ_NEXT(after, fr_next);
518284569Skp		m_freem(after->fe_m);
519284569Skp		TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
520284569Skp		uma_zfree(V_pf_frent_z, after);
521126258Smlaier	}
522126258Smlaier
523284569Skp	if (prev == NULL)
524284569Skp		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
525126258Smlaier	else
526284569Skp		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
527126258Smlaier
528338106Skp	frag->fr_entries++;
529338106Skp
530284569Skp	return (frag);
531284569Skp
532284569Skpbad_fragment:
533284569Skp	REASON_SET(reason, PFRES_FRAG);
534284569Skpdrop_fragment:
535284569Skp	uma_zfree(V_pf_frent_z, frent);
536284569Skp	return (NULL);
537284569Skp}
538284569Skp
539284579Skpstatic int
540284569Skppf_isfull_fragment(struct pf_fragment *frag)
541284569Skp{
542284569Skp	struct pf_frent	*frent, *next;
543284569Skp	uint16_t off, total;
544284569Skp
545126258Smlaier	/* Check if we are completely reassembled */
546284569Skp	if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
547284569Skp		return (0);
548126258Smlaier
549284569Skp	/* Maximum data we have seen already */
550284569Skp	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
551284569Skp		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
552284569Skp
553126258Smlaier	/* Check if we have all the data */
554126258Smlaier	off = 0;
555284569Skp	for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
556284569Skp		next = TAILQ_NEXT(frent, fr_next);
557126258Smlaier
558284569Skp		off += frent->fe_len;
559284569Skp		if (off < total && (next == NULL || next->fe_off != off)) {
560284569Skp			DPFPRINTF(("missing fragment at %d, next %d, total %d",
561284569Skp			    off, next == NULL ? -1 : next->fe_off, total));
562284569Skp			return (0);
563126258Smlaier		}
564126258Smlaier	}
565284569Skp	DPFPRINTF(("%d < %d?", off, total));
566284569Skp	if (off < total)
567284569Skp		return (0);
568284569Skp	KASSERT(off == total, ("off == total"));
569126258Smlaier
570284569Skp	return (1);
571284569Skp}
572126258Smlaier
573284579Skpstatic struct mbuf *
574284569Skppf_join_fragment(struct pf_fragment *frag)
575284569Skp{
576284569Skp	struct mbuf *m, *m2;
577284569Skp	struct pf_frent	*frent, *next;
578284569Skp
579284569Skp	frent = TAILQ_FIRST(&frag->fr_queue);
580284569Skp	next = TAILQ_NEXT(frent, fr_next);
581284569Skp
582284569Skp	m = frent->fe_m;
583284573Skp	m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len);
584240233Sglebius	uma_zfree(V_pf_frent_z, frent);
585126258Smlaier	for (frent = next; frent != NULL; frent = next) {
586284569Skp		next = TAILQ_NEXT(frent, fr_next);
587126258Smlaier
588284569Skp		m2 = frent->fe_m;
589284569Skp		/* Strip off ip header. */
590284569Skp		m_adj(m2, frent->fe_hdrlen);
591284573Skp		/* Strip off any trailing bytes. */
592284573Skp		m_adj(m2, frent->fe_len - m2->m_pkthdr.len);
593284573Skp
594240233Sglebius		uma_zfree(V_pf_frent_z, frent);
595126258Smlaier		m_cat(m, m2);
596126258Smlaier	}
597223637Sbz
598284569Skp	/* Remove from fragment queue. */
599284569Skp	pf_remove_fragment(frag);
600126258Smlaier
601284569Skp	return (m);
602284569Skp}
603126258Smlaier
604284579Skp#ifdef INET
605284569Skpstatic int
606284569Skppf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
607284569Skp{
608284569Skp	struct mbuf		*m = *m0;
609284569Skp	struct pf_frent		*frent;
610284569Skp	struct pf_fragment	*frag;
611284569Skp	struct pf_fragment_cmp	key;
612284569Skp	uint16_t		total, hdrlen;
613126258Smlaier
614284569Skp	/* Get an entry for the fragment queue */
615284569Skp	if ((frent = pf_create_fragment(reason)) == NULL)
616284569Skp		return (PF_DROP);
617284569Skp
618284569Skp	frent->fe_m = m;
619284569Skp	frent->fe_hdrlen = ip->ip_hl << 2;
620284569Skp	frent->fe_extoff = 0;
621284569Skp	frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
622284569Skp	frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
623284569Skp	frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
624284569Skp
625284569Skp	pf_ip2key(ip, dir, &key);
626284569Skp
627284569Skp	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
628284569Skp		return (PF_DROP);
629284569Skp
630284569Skp	/* The mbuf is part of the fragment entry, no direct free or access */
631284569Skp	m = *m0 = NULL;
632284569Skp
633284569Skp	if (!pf_isfull_fragment(frag))
634284569Skp		return (PF_PASS);  /* drop because *m0 is NULL, no error */
635284569Skp
636284569Skp	/* We have all the data */
637284569Skp	frent = TAILQ_FIRST(&frag->fr_queue);
638284569Skp	KASSERT(frent != NULL, ("frent != NULL"));
639284569Skp	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
640284569Skp		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
641284569Skp	hdrlen = frent->fe_hdrlen;
642284569Skp
643284569Skp	m = *m0 = pf_join_fragment(frag);
644284569Skp	frag = NULL;
645284569Skp
646126258Smlaier	if (m->m_flags & M_PKTHDR) {
647126258Smlaier		int plen = 0;
648284569Skp		for (m = *m0; m; m = m->m_next)
649284569Skp			plen += m->m_len;
650284569Skp		m = *m0;
651126258Smlaier		m->m_pkthdr.len = plen;
652126258Smlaier	}
653126258Smlaier
654284569Skp	ip = mtod(m, struct ip *);
655284569Skp	ip->ip_len = htons(hdrlen + total);
656284569Skp	ip->ip_off &= ~(IP_MF|IP_OFFMASK);
657284569Skp
658284569Skp	if (hdrlen + total > IP_MAXPACKET) {
659284569Skp		DPFPRINTF(("drop: too big: %d", total));
660284569Skp		ip->ip_len = 0;
661284569Skp		REASON_SET(reason, PFRES_SHORT);
662284569Skp		/* PF_DROP requires a valid mbuf *m0 in pf_test() */
663284569Skp		return (PF_DROP);
664284569Skp	}
665284569Skp
666126258Smlaier	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
667284569Skp	return (PF_PASS);
668284569Skp}
669284579Skp#endif	/* INET */
670126258Smlaier
671284569Skp#ifdef INET6
672284579Skpstatic int
673284569Skppf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
674284581Skp    uint16_t hdrlen, uint16_t extoff, u_short *reason)
675284569Skp{
676284569Skp	struct mbuf		*m = *m0;
677284569Skp	struct pf_frent		*frent;
678284569Skp	struct pf_fragment	*frag;
679284569Skp	struct pf_fragment_cmp	 key;
680284571Skp	struct m_tag		*mtag;
681284571Skp	struct pf_fragment_tag	*ftag;
682284569Skp	int			 off;
683284572Skp	uint32_t		 frag_id;
684284569Skp	uint16_t		 total, maxlen;
685284569Skp	uint8_t			 proto;
686284569Skp
687284569Skp	PF_FRAG_LOCK();
688284569Skp
689284569Skp	/* Get an entry for the fragment queue. */
690284569Skp	if ((frent = pf_create_fragment(reason)) == NULL) {
691284569Skp		PF_FRAG_UNLOCK();
692284569Skp		return (PF_DROP);
693284569Skp	}
694284569Skp
695284569Skp	frent->fe_m = m;
696284569Skp	frent->fe_hdrlen = hdrlen;
697284569Skp	frent->fe_extoff = extoff;
698284569Skp	frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
699284569Skp	frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
700284569Skp	frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
701284569Skp
702284569Skp	key.frc_src.v6 = ip6->ip6_src;
703284569Skp	key.frc_dst.v6 = ip6->ip6_dst;
704284569Skp	key.frc_af = AF_INET6;
705284569Skp	/* Only the first fragment's protocol is relevant. */
706284569Skp	key.frc_proto = 0;
707284569Skp	key.frc_id = fraghdr->ip6f_ident;
708284569Skp
709284569Skp	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
710284569Skp		PF_FRAG_UNLOCK();
711284569Skp		return (PF_DROP);
712284569Skp	}
713284569Skp
714284569Skp	/* The mbuf is part of the fragment entry, no direct free or access. */
715284569Skp	m = *m0 = NULL;
716284569Skp
717284569Skp	if (!pf_isfull_fragment(frag)) {
718284569Skp		PF_FRAG_UNLOCK();
719284569Skp		return (PF_PASS);  /* Drop because *m0 is NULL, no error. */
720284569Skp	}
721284569Skp
722284569Skp	/* We have all the data. */
723284569Skp	extoff = frent->fe_extoff;
724284569Skp	maxlen = frag->fr_maxlen;
725284572Skp	frag_id = frag->fr_id;
726284569Skp	frent = TAILQ_FIRST(&frag->fr_queue);
727284569Skp	KASSERT(frent != NULL, ("frent != NULL"));
728284569Skp	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
729284569Skp		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
730284569Skp	hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
731284569Skp
732284569Skp	m = *m0 = pf_join_fragment(frag);
733284569Skp	frag = NULL;
734284569Skp
735284569Skp	PF_FRAG_UNLOCK();
736284569Skp
737284569Skp	/* Take protocol from first fragment header. */
738284569Skp	m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off);
739284569Skp	KASSERT(m, ("%s: short mbuf chain", __func__));
740284569Skp	proto = *(mtod(m, caddr_t) + off);
741284569Skp	m = *m0;
742284569Skp
743284569Skp	/* Delete frag6 header */
744284569Skp	if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0)
745284569Skp		goto fail;
746284569Skp
747284569Skp	if (m->m_flags & M_PKTHDR) {
748284569Skp		int plen = 0;
749284569Skp		for (m = *m0; m; m = m->m_next)
750284569Skp			plen += m->m_len;
751284569Skp		m = *m0;
752284569Skp		m->m_pkthdr.len = plen;
753284569Skp	}
754284569Skp
755284571Skp	if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag),
756284571Skp	    M_NOWAIT)) == NULL)
757284571Skp		goto fail;
758284571Skp	ftag = (struct pf_fragment_tag *)(mtag + 1);
759284571Skp	ftag->ft_hdrlen = hdrlen;
760284571Skp	ftag->ft_extoff = extoff;
761284571Skp	ftag->ft_maxlen = maxlen;
762284572Skp	ftag->ft_id = frag_id;
763284571Skp	m_tag_prepend(m, mtag);
764284571Skp
765284569Skp	ip6 = mtod(m, struct ip6_hdr *);
766284569Skp	ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
767284569Skp	if (extoff) {
768284569Skp		/* Write protocol into next field of last extension header. */
769284569Skp		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
770284569Skp		    &off);
771284569Skp		KASSERT(m, ("%s: short mbuf chain", __func__));
772284569Skp		*(mtod(m, char *) + off) = proto;
773284569Skp		m = *m0;
774284569Skp	} else
775284569Skp		ip6->ip6_nxt = proto;
776284569Skp
777284569Skp	if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
778284569Skp		DPFPRINTF(("drop: too big: %d", total));
779284569Skp		ip6->ip6_plen = 0;
780284569Skp		REASON_SET(reason, PFRES_SHORT);
781284569Skp		/* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
782284569Skp		return (PF_DROP);
783284569Skp	}
784284569Skp
785284569Skp	DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen)));
786284569Skp	return (PF_PASS);
787284569Skp
788284569Skpfail:
789284569Skp	REASON_SET(reason, PFRES_MEMORY);
790284569Skp	/* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */
791284569Skp	return (PF_DROP);
792126258Smlaier}
793284579Skp#endif	/* INET6 */
794126258Smlaier
795284579Skp#ifdef INET
796240233Sglebiusstatic struct mbuf *
797126258Smlaierpf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
798126258Smlaier    int drop, int *nomem)
799126258Smlaier{
800126258Smlaier	struct mbuf		*m = *m0;
801240233Sglebius	struct pf_frent		*frp, *fra, *cur = NULL;
802126258Smlaier	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
803126258Smlaier	u_int16_t		 off = ntohs(h->ip_off) << 3;
804126258Smlaier	u_int16_t		 max = ip_len + off;
805126258Smlaier	int			 hosed = 0;
806126258Smlaier
807240233Sglebius	PF_FRAG_ASSERT();
808126261Smlaier	KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
809126261Smlaier	    ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
810126258Smlaier
811126258Smlaier	/* Create a new range queue for this packet */
812126258Smlaier	if (*frag == NULL) {
813240233Sglebius		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
814126258Smlaier		if (*frag == NULL) {
815126258Smlaier			pf_flush_fragments();
816240233Sglebius			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
817126258Smlaier			if (*frag == NULL)
818126258Smlaier				goto no_mem;
819126258Smlaier		}
820126258Smlaier
821126258Smlaier		/* Get an entry for the queue */
822240233Sglebius		cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
823223637Sbz		if (cur == NULL) {
824240233Sglebius			uma_zfree(V_pf_frag_z, *frag);
825126258Smlaier			*frag = NULL;
826126258Smlaier			goto no_mem;
827126258Smlaier		}
828126258Smlaier
829126258Smlaier		(*frag)->fr_flags = PFFRAG_NOBUFFER;
830126258Smlaier		(*frag)->fr_max = 0;
831284569Skp		(*frag)->fr_src.v4 = h->ip_src;
832284569Skp		(*frag)->fr_dst.v4 = h->ip_dst;
833284580Skp		(*frag)->fr_af = AF_INET;
834284580Skp		(*frag)->fr_proto = h->ip_p;
835126258Smlaier		(*frag)->fr_id = h->ip_id;
836240233Sglebius		(*frag)->fr_timeout = time_uptime;
837126258Smlaier
838284569Skp		cur->fe_off = off;
839284569Skp		cur->fe_len = max; /* TODO: fe_len = max - off ? */
840284569Skp		TAILQ_INIT(&(*frag)->fr_queue);
841284569Skp		TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
842126258Smlaier
843223637Sbz		RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
844223637Sbz		TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
845126258Smlaier
846126258Smlaier		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
847126258Smlaier
848126258Smlaier		goto pass;
849126258Smlaier	}
850126258Smlaier
851126258Smlaier	/*
852126258Smlaier	 * Find a fragment after the current one:
853126258Smlaier	 *  - off contains the real shifted offset.
854126258Smlaier	 */
855126258Smlaier	frp = NULL;
856284569Skp	TAILQ_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
857284569Skp		if (fra->fe_off > off)
858126258Smlaier			break;
859126258Smlaier		frp = fra;
860126258Smlaier	}
861126258Smlaier
862126261Smlaier	KASSERT((frp != NULL || fra != NULL),
863126261Smlaier	    ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
864126258Smlaier
865126258Smlaier	if (frp != NULL) {
866126258Smlaier		int	precut;
867126258Smlaier
868284569Skp		precut = frp->fe_len - off;
869126258Smlaier		if (precut >= ip_len) {
870126258Smlaier			/* Fragment is entirely a duplicate */
871126258Smlaier			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
872284569Skp			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
873126258Smlaier			goto drop_fragment;
874126258Smlaier		}
875126258Smlaier		if (precut == 0) {
876126258Smlaier			/* They are adjacent.  Fixup cache entry */
877126258Smlaier			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
878284569Skp			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
879284569Skp			frp->fe_len = max;
880126258Smlaier		} else if (precut > 0) {
881126258Smlaier			/* The first part of this payload overlaps with a
882126258Smlaier			 * fragment that has already been passed.
883126258Smlaier			 * Need to trim off the first part of the payload.
884126258Smlaier			 * But to do so easily, we need to create another
885126258Smlaier			 * mbuf to throw the original header into.
886126258Smlaier			 */
887126258Smlaier
888126258Smlaier			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
889284569Skp			    h->ip_id, precut, frp->fe_off, frp->fe_len, off,
890126258Smlaier			    max));
891126258Smlaier
892126258Smlaier			off += precut;
893126258Smlaier			max -= precut;
894126258Smlaier			/* Update the previous frag to encompass this one */
895284569Skp			frp->fe_len = max;
896126258Smlaier
897126258Smlaier			if (!drop) {
898126258Smlaier				/* XXX Optimization opportunity
899126258Smlaier				 * This is a very heavy way to trim the payload.
900126258Smlaier				 * we could do it much faster by diddling mbuf
901126258Smlaier				 * internals but that would be even less legible
902126258Smlaier				 * than this mbuf magic.  For my next trick,
903126258Smlaier				 * I'll pull a rabbit out of my laptop.
904126258Smlaier				 */
905240233Sglebius				*m0 = m_dup(m, M_NOWAIT);
906126258Smlaier				if (*m0 == NULL)
907126258Smlaier					goto no_mem;
908154377Smlaier				/* From KAME Project : We have missed this! */
909154377Smlaier				m_adj(*m0, (h->ip_hl << 2) -
910154377Smlaier				    (*m0)->m_pkthdr.len);
911154377Smlaier
912240233Sglebius				KASSERT(((*m0)->m_next == NULL),
913240233Sglebius				    ("(*m0)->m_next != NULL: %s",
914126261Smlaier				    __FUNCTION__));
915126258Smlaier				m_adj(m, precut + (h->ip_hl << 2));
916126258Smlaier				m_cat(*m0, m);
917126258Smlaier				m = *m0;
918126258Smlaier				if (m->m_flags & M_PKTHDR) {
919126258Smlaier					int plen = 0;
920126258Smlaier					struct mbuf *t;
921126258Smlaier					for (t = m; t; t = t->m_next)
922126258Smlaier						plen += t->m_len;
923126258Smlaier					m->m_pkthdr.len = plen;
924126258Smlaier				}
925126258Smlaier
926126258Smlaier
927126258Smlaier				h = mtod(m, struct ip *);
928126258Smlaier
929130613Smlaier				KASSERT(((int)m->m_len ==
930130613Smlaier				    ntohs(h->ip_len) - precut),
931126261Smlaier				    ("m->m_len != ntohs(h->ip_len) - precut: %s",
932126261Smlaier				    __FUNCTION__));
933130613Smlaier				h->ip_off = htons(ntohs(h->ip_off) +
934130613Smlaier				    (precut >> 3));
935126258Smlaier				h->ip_len = htons(ntohs(h->ip_len) - precut);
936126258Smlaier			} else {
937126258Smlaier				hosed++;
938126258Smlaier			}
939126258Smlaier		} else {
940126258Smlaier			/* There is a gap between fragments */
941126258Smlaier
942126258Smlaier			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
943284569Skp			    h->ip_id, -precut, frp->fe_off, frp->fe_len, off,
944126258Smlaier			    max));
945126258Smlaier
946240233Sglebius			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
947126258Smlaier			if (cur == NULL)
948126258Smlaier				goto no_mem;
949126258Smlaier
950284569Skp			cur->fe_off = off;
951284569Skp			cur->fe_len = max;
952284569Skp			TAILQ_INSERT_AFTER(&(*frag)->fr_queue, frp, cur, fr_next);
953126258Smlaier		}
954126258Smlaier	}
955126258Smlaier
956126258Smlaier	if (fra != NULL) {
957126258Smlaier		int	aftercut;
958126258Smlaier		int	merge = 0;
959126258Smlaier
960284569Skp		aftercut = max - fra->fe_off;
961126258Smlaier		if (aftercut == 0) {
962126258Smlaier			/* Adjacent fragments */
963126258Smlaier			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
964284569Skp			    h->ip_id, off, max, fra->fe_off, fra->fe_len));
965284569Skp			fra->fe_off = off;
966126258Smlaier			merge = 1;
967126258Smlaier		} else if (aftercut > 0) {
968126258Smlaier			/* Need to chop off the tail of this fragment */
969126258Smlaier			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
970284569Skp			    h->ip_id, aftercut, off, max, fra->fe_off,
971284569Skp			    fra->fe_len));
972284569Skp			fra->fe_off = off;
973126258Smlaier			max -= aftercut;
974126258Smlaier
975126258Smlaier			merge = 1;
976126258Smlaier
977126258Smlaier			if (!drop) {
978126258Smlaier				m_adj(m, -aftercut);
979126258Smlaier				if (m->m_flags & M_PKTHDR) {
980126258Smlaier					int plen = 0;
981126258Smlaier					struct mbuf *t;
982126258Smlaier					for (t = m; t; t = t->m_next)
983126258Smlaier						plen += t->m_len;
984126258Smlaier					m->m_pkthdr.len = plen;
985126258Smlaier				}
986126258Smlaier				h = mtod(m, struct ip *);
987126261Smlaier				KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
988126261Smlaier				    ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
989126261Smlaier				    __FUNCTION__));
990126258Smlaier				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
991126258Smlaier			} else {
992126258Smlaier				hosed++;
993126258Smlaier			}
994154551Sdhartmei		} else if (frp == NULL) {
995126258Smlaier			/* There is a gap between fragments */
996126258Smlaier			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
997284569Skp			    h->ip_id, -aftercut, off, max, fra->fe_off,
998284569Skp			    fra->fe_len));
999126258Smlaier
1000240233Sglebius			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
1001126258Smlaier			if (cur == NULL)
1002126258Smlaier				goto no_mem;
1003126258Smlaier
1004284569Skp			cur->fe_off = off;
1005284569Skp			cur->fe_len = max;
1006284569Skp			TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
1007126258Smlaier		}
1008126258Smlaier
1009126258Smlaier
1010126258Smlaier		/* Need to glue together two separate fragment descriptors */
1011126258Smlaier		if (merge) {
1012284569Skp			if (cur && fra->fe_off <= cur->fe_len) {
1013126258Smlaier				/* Need to merge in a previous 'cur' */
1014126258Smlaier				DPFPRINTF(("fragcache[%d]: adjacent(merge "
1015126258Smlaier				    "%d-%d) %d-%d (%d-%d)\n",
1016284569Skp				    h->ip_id, cur->fe_off, cur->fe_len, off,
1017284569Skp				    max, fra->fe_off, fra->fe_len));
1018284569Skp				fra->fe_off = cur->fe_off;
1019284569Skp				TAILQ_REMOVE(&(*frag)->fr_queue, cur, fr_next);
1020240233Sglebius				uma_zfree(V_pf_frent_z, cur);
1021126258Smlaier				cur = NULL;
1022126258Smlaier
1023284569Skp			} else if (frp && fra->fe_off <= frp->fe_len) {
1024126258Smlaier				/* Need to merge in a modified 'frp' */
1025126261Smlaier				KASSERT((cur == NULL), ("cur != NULL: %s",
1026126261Smlaier				    __FUNCTION__));
1027126258Smlaier				DPFPRINTF(("fragcache[%d]: adjacent(merge "
1028126258Smlaier				    "%d-%d) %d-%d (%d-%d)\n",
1029284569Skp				    h->ip_id, frp->fe_off, frp->fe_len, off,
1030284569Skp				    max, fra->fe_off, fra->fe_len));
1031284569Skp				fra->fe_off = frp->fe_off;
1032284569Skp				TAILQ_REMOVE(&(*frag)->fr_queue, frp, fr_next);
1033240233Sglebius				uma_zfree(V_pf_frent_z, frp);
1034126258Smlaier				frp = NULL;
1035126258Smlaier
1036126258Smlaier			}
1037126258Smlaier		}
1038126258Smlaier	}
1039126258Smlaier
1040126258Smlaier	if (hosed) {
1041126258Smlaier		/*
1042126258Smlaier		 * We must keep tracking the overall fragment even when
1043126258Smlaier		 * we're going to drop it anyway so that we know when to
1044126258Smlaier		 * free the overall descriptor.  Thus we drop the frag late.
1045126258Smlaier		 */
1046126258Smlaier		goto drop_fragment;
1047126258Smlaier	}
1048126258Smlaier
1049126258Smlaier
1050126258Smlaier pass:
1051126258Smlaier	/* Update maximum data size */
1052126258Smlaier	if ((*frag)->fr_max < max)
1053126258Smlaier		(*frag)->fr_max = max;
1054126258Smlaier
1055126258Smlaier	/* This is the last segment */
1056126258Smlaier	if (!mff)
1057126258Smlaier		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1058126258Smlaier
1059126258Smlaier	/* Check if we are completely reassembled */
1060126258Smlaier	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1061284569Skp	    TAILQ_FIRST(&(*frag)->fr_queue)->fe_off == 0 &&
1062284569Skp	    TAILQ_FIRST(&(*frag)->fr_queue)->fe_len == (*frag)->fr_max) {
1063126258Smlaier		/* Remove from fragment queue */
1064126258Smlaier		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1065126258Smlaier		    (*frag)->fr_max));
1066126258Smlaier		pf_free_fragment(*frag);
1067126258Smlaier		*frag = NULL;
1068126258Smlaier	}
1069126258Smlaier
1070126258Smlaier	return (m);
1071126258Smlaier
1072126258Smlaier no_mem:
1073126258Smlaier	*nomem = 1;
1074126258Smlaier
1075126258Smlaier	/* Still need to pay attention to !IP_MF */
1076126258Smlaier	if (!mff && *frag != NULL)
1077126258Smlaier		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1078126258Smlaier
1079126258Smlaier	m_freem(m);
1080126258Smlaier	return (NULL);
1081126258Smlaier
1082126258Smlaier drop_fragment:
1083126258Smlaier
1084126258Smlaier	/* Still need to pay attention to !IP_MF */
1085126258Smlaier	if (!mff && *frag != NULL)
1086126258Smlaier		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1087126258Smlaier
1088126258Smlaier	if (drop) {
1089126258Smlaier		/* This fragment has been deemed bad.  Don't reass */
1090126258Smlaier		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1091126258Smlaier			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1092126258Smlaier			    h->ip_id));
1093126258Smlaier		(*frag)->fr_flags |= PFFRAG_DROP;
1094126258Smlaier	}
1095126258Smlaier
1096126258Smlaier	m_freem(m);
1097126258Smlaier	return (NULL);
1098126258Smlaier}
1099284579Skp#endif	/* INET */
1100126258Smlaier
1101284579Skp#ifdef INET6
1102126258Smlaierint
1103284571Skppf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag)
1104284571Skp{
1105284571Skp	struct mbuf		*m = *m0, *t;
1106284571Skp	struct pf_fragment_tag	*ftag = (struct pf_fragment_tag *)(mtag + 1);
1107284571Skp	struct pf_pdesc		 pd;
1108284572Skp	uint32_t		 frag_id;
1109284571Skp	uint16_t		 hdrlen, extoff, maxlen;
1110284571Skp	uint8_t			 proto;
1111284571Skp	int			 error, action;
1112284571Skp
1113284571Skp	hdrlen = ftag->ft_hdrlen;
1114284571Skp	extoff = ftag->ft_extoff;
1115284571Skp	maxlen = ftag->ft_maxlen;
1116284572Skp	frag_id = ftag->ft_id;
1117284571Skp	m_tag_delete(m, mtag);
1118284571Skp	mtag = NULL;
1119284571Skp	ftag = NULL;
1120284571Skp
1121284571Skp	if (extoff) {
1122284571Skp		int off;
1123284571Skp
1124284571Skp		/* Use protocol from next field of last extension header */
1125284571Skp		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
1126284571Skp		    &off);
1127284571Skp		KASSERT((m != NULL), ("pf_refragment6: short mbuf chain"));
1128284571Skp		proto = *(mtod(m, caddr_t) + off);
1129284571Skp		*(mtod(m, char *) + off) = IPPROTO_FRAGMENT;
1130284571Skp		m = *m0;
1131284571Skp	} else {
1132284571Skp		struct ip6_hdr *hdr;
1133284571Skp
1134284571Skp		hdr = mtod(m, struct ip6_hdr *);
1135284571Skp		proto = hdr->ip6_nxt;
1136284571Skp		hdr->ip6_nxt = IPPROTO_FRAGMENT;
1137284571Skp	}
1138284571Skp
1139317335Skp	/* The MTU must be a multiple of 8 bytes, or we risk doing the
1140317335Skp	 * fragmentation wrong. */
1141317335Skp	maxlen = maxlen & ~7;
1142317335Skp
1143284571Skp	/*
1144284571Skp	 * Maxlen may be less than 8 if there was only a single
1145284571Skp	 * fragment.  As it was fragmented before, add a fragment
1146284571Skp	 * header also for a single fragment.  If total or maxlen
1147284571Skp	 * is less than 8, ip6_fragment() will return EMSGSIZE and
1148284571Skp	 * we drop the packet.
1149284571Skp	 */
1150284572Skp	error = ip6_fragment(ifp, m, hdrlen, proto, maxlen, frag_id);
1151284571Skp	m = (*m0)->m_nextpkt;
1152284571Skp	(*m0)->m_nextpkt = NULL;
1153284571Skp	if (error == 0) {
1154284571Skp		/* The first mbuf contains the unfragmented packet. */
1155284571Skp		m_freem(*m0);
1156284571Skp		*m0 = NULL;
1157284571Skp		action = PF_PASS;
1158284571Skp	} else {
1159284571Skp		/* Drop expects an mbuf to free. */
1160284571Skp		DPFPRINTF(("refragment error %d", error));
1161284571Skp		action = PF_DROP;
1162284571Skp	}
1163284571Skp	for (t = m; m; m = t) {
1164284571Skp		t = m->m_nextpkt;
1165284571Skp		m->m_nextpkt = NULL;
1166284574Skp		m->m_flags |= M_SKIP_FIREWALL;
1167284571Skp		memset(&pd, 0, sizeof(pd));
1168284571Skp		pd.pf_mtag = pf_find_mtag(m);
1169284571Skp		if (error == 0)
1170284571Skp			ip6_forward(m, 0);
1171284571Skp		else
1172284571Skp			m_freem(m);
1173284571Skp	}
1174284571Skp
1175284571Skp	return (action);
1176284571Skp}
1177284579Skp#endif /* INET6 */
1178284571Skp
1179284579Skp#ifdef INET
1180284571Skpint
1181145836Smlaierpf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1182145836Smlaier    struct pf_pdesc *pd)
1183126258Smlaier{
1184126258Smlaier	struct mbuf		*m = *m0;
1185126258Smlaier	struct pf_rule		*r;
1186126258Smlaier	struct pf_fragment	*frag = NULL;
1187284569Skp	struct pf_fragment_cmp	key;
1188126258Smlaier	struct ip		*h = mtod(m, struct ip *);
1189126258Smlaier	int			 mff = (ntohs(h->ip_off) & IP_MF);
1190126258Smlaier	int			 hlen = h->ip_hl << 2;
1191126258Smlaier	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1192126258Smlaier	u_int16_t		 max;
1193126258Smlaier	int			 ip_len;
1194126258Smlaier	int			 ip_off;
1195223637Sbz	int			 tag = -1;
1196284569Skp	int			 verdict;
1197126258Smlaier
1198240233Sglebius	PF_RULES_RASSERT();
1199240233Sglebius
1200126258Smlaier	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1201126258Smlaier	while (r != NULL) {
1202126258Smlaier		r->evaluations++;
1203171168Smlaier		if (pfi_kif_match(r->kif, kif) == r->ifnot)
1204126258Smlaier			r = r->skip[PF_SKIP_IFP].ptr;
1205126258Smlaier		else if (r->direction && r->direction != dir)
1206126258Smlaier			r = r->skip[PF_SKIP_DIR].ptr;
1207126258Smlaier		else if (r->af && r->af != AF_INET)
1208126258Smlaier			r = r->skip[PF_SKIP_AF].ptr;
1209126258Smlaier		else if (r->proto && r->proto != h->ip_p)
1210126258Smlaier			r = r->skip[PF_SKIP_PROTO].ptr;
1211126258Smlaier		else if (PF_MISMATCHAW(&r->src.addr,
1212171168Smlaier		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1213231852Sbz		    r->src.neg, kif, M_GETFIB(m)))
1214126258Smlaier			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1215126258Smlaier		else if (PF_MISMATCHAW(&r->dst.addr,
1216171168Smlaier		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1217231852Sbz		    r->dst.neg, NULL, M_GETFIB(m)))
1218126258Smlaier			r = r->skip[PF_SKIP_DST_ADDR].ptr;
1219240233Sglebius		else if (r->match_tag && !pf_match_tag(m, r, &tag,
1220240233Sglebius		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
1221223637Sbz			r = TAILQ_NEXT(r, entries);
1222126258Smlaier		else
1223126258Smlaier			break;
1224126258Smlaier	}
1225126258Smlaier
1226171168Smlaier	if (r == NULL || r->action == PF_NOSCRUB)
1227126258Smlaier		return (PF_PASS);
1228171168Smlaier	else {
1229171168Smlaier		r->packets[dir == PF_OUT]++;
1230171168Smlaier		r->bytes[dir == PF_OUT] += pd->tot_len;
1231171168Smlaier	}
1232126258Smlaier
1233126258Smlaier	/* Check for illegal packets */
1234126258Smlaier	if (hlen < (int)sizeof(struct ip))
1235126258Smlaier		goto drop;
1236126258Smlaier
1237126258Smlaier	if (hlen > ntohs(h->ip_len))
1238126258Smlaier		goto drop;
1239126258Smlaier
1240126258Smlaier	/* Clear IP_DF if the rule uses the no-df option */
1241157131Smlaier	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1242157131Smlaier		u_int16_t ip_off = h->ip_off;
1243157131Smlaier
1244126258Smlaier		h->ip_off &= htons(~IP_DF);
1245157131Smlaier		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1246157131Smlaier	}
1247126258Smlaier
1248126258Smlaier	/* We will need other tests here */
1249126258Smlaier	if (!fragoff && !mff)
1250126258Smlaier		goto no_fragment;
1251126258Smlaier
1252126258Smlaier	/* We're dealing with a fragment now. Don't allow fragments
1253126258Smlaier	 * with IP_DF to enter the cache. If the flag was cleared by
1254126258Smlaier	 * no-df above, fine. Otherwise drop it.
1255126258Smlaier	 */
1256126258Smlaier	if (h->ip_off & htons(IP_DF)) {
1257126258Smlaier		DPFPRINTF(("IP_DF\n"));
1258126258Smlaier		goto bad;
1259126258Smlaier	}
1260126258Smlaier
1261126258Smlaier	ip_len = ntohs(h->ip_len) - hlen;
1262126258Smlaier	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1263126258Smlaier
1264126258Smlaier	/* All fragments are 8 byte aligned */
1265126258Smlaier	if (mff && (ip_len & 0x7)) {
1266126258Smlaier		DPFPRINTF(("mff and %d\n", ip_len));
1267126258Smlaier		goto bad;
1268126258Smlaier	}
1269126258Smlaier
1270126258Smlaier	/* Respect maximum length */
1271126258Smlaier	if (fragoff + ip_len > IP_MAXPACKET) {
1272126258Smlaier		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1273126258Smlaier		goto bad;
1274126258Smlaier	}
1275126258Smlaier	max = fragoff + ip_len;
1276126258Smlaier
1277126258Smlaier	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1278240233Sglebius
1279126258Smlaier		/* Fully buffer all of the fragments */
1280240233Sglebius		PF_FRAG_LOCK();
1281126258Smlaier
1282284569Skp		pf_ip2key(h, dir, &key);
1283284569Skp		frag = pf_find_fragment(&key, &V_pf_frag_tree);
1284284569Skp
1285126258Smlaier		/* Check if we saw the last fragment already */
1286126258Smlaier		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1287126258Smlaier		    max > frag->fr_max)
1288126258Smlaier			goto bad;
1289126258Smlaier
1290126258Smlaier		/* Might return a completely reassembled mbuf, or NULL */
1291126258Smlaier		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
1292284569Skp		verdict = pf_reassemble(m0, h, dir, reason);
1293240233Sglebius		PF_FRAG_UNLOCK();
1294126258Smlaier
1295284569Skp		if (verdict != PF_PASS)
1296284569Skp			return (PF_DROP);
1297284569Skp
1298284569Skp		m = *m0;
1299126258Smlaier		if (m == NULL)
1300126258Smlaier			return (PF_DROP);
1301126258Smlaier
1302171168Smlaier		/* use mtag from concatenated mbuf chain */
1303171168Smlaier		pd->pf_mtag = pf_find_mtag(m);
1304171168Smlaier#ifdef DIAGNOSTIC
1305171168Smlaier		if (pd->pf_mtag == NULL) {
1306171168Smlaier			printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1307171168Smlaier			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1308171168Smlaier				m_freem(m);
1309171168Smlaier				*m0 = NULL;
1310171168Smlaier				goto no_mem;
1311171168Smlaier			}
1312171168Smlaier		}
1313171168Smlaier#endif
1314126258Smlaier		h = mtod(m, struct ip *);
1315126258Smlaier	} else {
1316126258Smlaier		/* non-buffering fragment cache (drops or masks overlaps) */
1317126258Smlaier		int	nomem = 0;
1318126258Smlaier
1319171168Smlaier		if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
1320171168Smlaier			/*
1321171168Smlaier			 * Already passed the fragment cache in the
1322171168Smlaier			 * input direction.  If we continued, it would
1323171168Smlaier			 * appear to be a dup and would be dropped.
1324171168Smlaier			 */
1325171168Smlaier			goto fragment_pass;
1326126258Smlaier		}
1327126258Smlaier
1328240233Sglebius		PF_FRAG_LOCK();
1329284569Skp		pf_ip2key(h, dir, &key);
1330284569Skp		frag = pf_find_fragment(&key, &V_pf_cache_tree);
1331126258Smlaier
1332126258Smlaier		/* Check if we saw the last fragment already */
1333126258Smlaier		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1334126258Smlaier		    max > frag->fr_max) {
1335126258Smlaier			if (r->rule_flag & PFRULE_FRAGDROP)
1336126258Smlaier				frag->fr_flags |= PFFRAG_DROP;
1337126258Smlaier			goto bad;
1338126258Smlaier		}
1339126258Smlaier
1340126258Smlaier		*m0 = m = pf_fragcache(m0, h, &frag, mff,
1341126258Smlaier		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1342240233Sglebius		PF_FRAG_UNLOCK();
1343126258Smlaier		if (m == NULL) {
1344126258Smlaier			if (nomem)
1345126258Smlaier				goto no_mem;
1346126258Smlaier			goto drop;
1347126258Smlaier		}
1348126258Smlaier
1349171168Smlaier		/* use mtag from copied and trimmed mbuf chain */
1350171168Smlaier		pd->pf_mtag = pf_find_mtag(m);
1351171168Smlaier#ifdef DIAGNOSTIC
1352171168Smlaier		if (pd->pf_mtag == NULL) {
1353171168Smlaier			printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1354171168Smlaier			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1355171168Smlaier				m_freem(m);
1356171168Smlaier				*m0 = NULL;
1357126258Smlaier				goto no_mem;
1358171168Smlaier			}
1359126258Smlaier		}
1360171168Smlaier#endif
1361171168Smlaier		if (dir == PF_IN)
1362171168Smlaier			pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
1363171168Smlaier
1364126258Smlaier		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1365126258Smlaier			goto drop;
1366126258Smlaier		goto fragment_pass;
1367126258Smlaier	}
1368126258Smlaier
1369126258Smlaier no_fragment:
1370126258Smlaier	/* At this point, only IP_DF is allowed in ip_off */
1371157131Smlaier	if (h->ip_off & ~htons(IP_DF)) {
1372157131Smlaier		u_int16_t ip_off = h->ip_off;
1373126258Smlaier
1374157131Smlaier		h->ip_off &= htons(IP_DF);
1375157131Smlaier		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1376157131Smlaier	}
1377157131Smlaier
1378223637Sbz	/* not missing a return here */
1379157131Smlaier
1380126258Smlaier fragment_pass:
1381223637Sbz	pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
1382157131Smlaier
1383145836Smlaier	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1384145836Smlaier		pd->flags |= PFDESC_IP_REAS;
1385126258Smlaier	return (PF_PASS);
1386126258Smlaier
1387126258Smlaier no_mem:
1388126258Smlaier	REASON_SET(reason, PFRES_MEMORY);
1389126258Smlaier	if (r != NULL && r->log)
1390240233Sglebius		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
1391240233Sglebius		    1);
1392126258Smlaier	return (PF_DROP);
1393126258Smlaier
1394126258Smlaier drop:
1395126258Smlaier	REASON_SET(reason, PFRES_NORM);
1396126258Smlaier	if (r != NULL && r->log)
1397240233Sglebius		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
1398240233Sglebius		    1);
1399126258Smlaier	return (PF_DROP);
1400126258Smlaier
1401126258Smlaier bad:
1402126258Smlaier	DPFPRINTF(("dropping bad fragment\n"));
1403126258Smlaier
1404126258Smlaier	/* Free associated fragments */
1405240233Sglebius	if (frag != NULL) {
1406126258Smlaier		pf_free_fragment(frag);
1407240233Sglebius		PF_FRAG_UNLOCK();
1408240233Sglebius	}
1409126258Smlaier
1410126258Smlaier	REASON_SET(reason, PFRES_FRAG);
1411126258Smlaier	if (r != NULL && r->log)
1412240233Sglebius		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
1413240233Sglebius		    1);
1414126258Smlaier
1415126258Smlaier	return (PF_DROP);
1416126258Smlaier}
1417222529Sbz#endif
1418126258Smlaier
1419126258Smlaier#ifdef INET6
1420126258Smlaierint
1421130613Smlaierpf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1422145836Smlaier    u_short *reason, struct pf_pdesc *pd)
1423126258Smlaier{
1424126258Smlaier	struct mbuf		*m = *m0;
1425126258Smlaier	struct pf_rule		*r;
1426126258Smlaier	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
1427284569Skp	int			 extoff;
1428126258Smlaier	int			 off;
1429126258Smlaier	struct ip6_ext		 ext;
1430126258Smlaier	struct ip6_opt		 opt;
1431126258Smlaier	struct ip6_opt_jumbo	 jumbo;
1432126258Smlaier	struct ip6_frag		 frag;
1433126258Smlaier	u_int32_t		 jumbolen = 0, plen;
1434126258Smlaier	int			 optend;
1435126258Smlaier	int			 ooff;
1436126258Smlaier	u_int8_t		 proto;
1437126258Smlaier	int			 terminal;
1438126258Smlaier
1439240233Sglebius	PF_RULES_RASSERT();
1440240233Sglebius
1441126258Smlaier	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1442126258Smlaier	while (r != NULL) {
1443126258Smlaier		r->evaluations++;
1444171168Smlaier		if (pfi_kif_match(r->kif, kif) == r->ifnot)
1445126258Smlaier			r = r->skip[PF_SKIP_IFP].ptr;
1446126258Smlaier		else if (r->direction && r->direction != dir)
1447126258Smlaier			r = r->skip[PF_SKIP_DIR].ptr;
1448126258Smlaier		else if (r->af && r->af != AF_INET6)
1449126258Smlaier			r = r->skip[PF_SKIP_AF].ptr;
1450126258Smlaier#if 0 /* header chain! */
1451126258Smlaier		else if (r->proto && r->proto != h->ip6_nxt)
1452126258Smlaier			r = r->skip[PF_SKIP_PROTO].ptr;
1453126258Smlaier#endif
1454126258Smlaier		else if (PF_MISMATCHAW(&r->src.addr,
1455171168Smlaier		    (struct pf_addr *)&h->ip6_src, AF_INET6,
1456231852Sbz		    r->src.neg, kif, M_GETFIB(m)))
1457126258Smlaier			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1458126258Smlaier		else if (PF_MISMATCHAW(&r->dst.addr,
1459171168Smlaier		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
1460231852Sbz		    r->dst.neg, NULL, M_GETFIB(m)))
1461126258Smlaier			r = r->skip[PF_SKIP_DST_ADDR].ptr;
1462126258Smlaier		else
1463126258Smlaier			break;
1464126258Smlaier	}
1465126258Smlaier
1466171168Smlaier	if (r == NULL || r->action == PF_NOSCRUB)
1467126258Smlaier		return (PF_PASS);
1468171168Smlaier	else {
1469171168Smlaier		r->packets[dir == PF_OUT]++;
1470171168Smlaier		r->bytes[dir == PF_OUT] += pd->tot_len;
1471171168Smlaier	}
1472126258Smlaier
1473126258Smlaier	/* Check for illegal packets */
1474126258Smlaier	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1475126258Smlaier		goto drop;
1476126258Smlaier
1477284569Skp	extoff = 0;
1478126258Smlaier	off = sizeof(struct ip6_hdr);
1479126258Smlaier	proto = h->ip6_nxt;
1480126258Smlaier	terminal = 0;
1481126258Smlaier	do {
1482126258Smlaier		switch (proto) {
1483126258Smlaier		case IPPROTO_FRAGMENT:
1484126258Smlaier			goto fragment;
1485126258Smlaier			break;
1486126258Smlaier		case IPPROTO_AH:
1487126258Smlaier		case IPPROTO_ROUTING:
1488126258Smlaier		case IPPROTO_DSTOPTS:
1489126258Smlaier			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1490126258Smlaier			    NULL, AF_INET6))
1491126258Smlaier				goto shortpkt;
1492284569Skp			extoff = off;
1493126258Smlaier			if (proto == IPPROTO_AH)
1494126258Smlaier				off += (ext.ip6e_len + 2) * 4;
1495126258Smlaier			else
1496126258Smlaier				off += (ext.ip6e_len + 1) * 8;
1497126258Smlaier			proto = ext.ip6e_nxt;
1498126258Smlaier			break;
1499126258Smlaier		case IPPROTO_HOPOPTS:
1500126258Smlaier			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1501126258Smlaier			    NULL, AF_INET6))
1502126258Smlaier				goto shortpkt;
1503284569Skp			extoff = off;
1504126258Smlaier			optend = off + (ext.ip6e_len + 1) * 8;
1505126258Smlaier			ooff = off + sizeof(ext);
1506126258Smlaier			do {
1507126258Smlaier				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1508126258Smlaier				    sizeof(opt.ip6o_type), NULL, NULL,
1509126258Smlaier				    AF_INET6))
1510126258Smlaier					goto shortpkt;
1511126258Smlaier				if (opt.ip6o_type == IP6OPT_PAD1) {
1512126258Smlaier					ooff++;
1513126258Smlaier					continue;
1514126258Smlaier				}
1515126258Smlaier				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1516126258Smlaier				    NULL, NULL, AF_INET6))
1517126258Smlaier					goto shortpkt;
1518126258Smlaier				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1519126258Smlaier					goto drop;
1520126258Smlaier				switch (opt.ip6o_type) {
1521126258Smlaier				case IP6OPT_JUMBO:
1522126258Smlaier					if (h->ip6_plen != 0)
1523126258Smlaier						goto drop;
1524126258Smlaier					if (!pf_pull_hdr(m, ooff, &jumbo,
1525126258Smlaier					    sizeof(jumbo), NULL, NULL,
1526126258Smlaier					    AF_INET6))
1527126258Smlaier						goto shortpkt;
1528126258Smlaier					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1529126258Smlaier					    sizeof(jumbolen));
1530126258Smlaier					jumbolen = ntohl(jumbolen);
1531126258Smlaier					if (jumbolen <= IPV6_MAXPACKET)
1532126258Smlaier						goto drop;
1533126258Smlaier					if (sizeof(struct ip6_hdr) + jumbolen !=
1534126258Smlaier					    m->m_pkthdr.len)
1535126258Smlaier						goto drop;
1536126258Smlaier					break;
1537126258Smlaier				default:
1538126258Smlaier					break;
1539126258Smlaier				}
1540126258Smlaier				ooff += sizeof(opt) + opt.ip6o_len;
1541126258Smlaier			} while (ooff < optend);
1542126258Smlaier
1543126258Smlaier			off = optend;
1544126258Smlaier			proto = ext.ip6e_nxt;
1545126258Smlaier			break;
1546126258Smlaier		default:
1547126258Smlaier			terminal = 1;
1548126258Smlaier			break;
1549126258Smlaier		}
1550126258Smlaier	} while (!terminal);
1551126258Smlaier
1552126258Smlaier	/* jumbo payload option must be present, or plen > 0 */
1553126258Smlaier	if (ntohs(h->ip6_plen) == 0)
1554126258Smlaier		plen = jumbolen;
1555126258Smlaier	else
1556126258Smlaier		plen = ntohs(h->ip6_plen);
1557126258Smlaier	if (plen == 0)
1558126258Smlaier		goto drop;
1559126258Smlaier	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1560126258Smlaier		goto shortpkt;
1561126258Smlaier
1562223637Sbz	pf_scrub_ip6(&m, r->min_ttl);
1563126258Smlaier
1564126258Smlaier	return (PF_PASS);
1565126258Smlaier
1566126258Smlaier fragment:
1567284569Skp	/* Jumbo payload packets cannot be fragmented. */
1568284569Skp	plen = ntohs(h->ip6_plen);
1569284569Skp	if (plen == 0 || jumbolen)
1570126258Smlaier		goto drop;
1571284569Skp	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1572284569Skp		goto shortpkt;
1573126258Smlaier
1574126258Smlaier	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1575126258Smlaier		goto shortpkt;
1576126258Smlaier
1577284569Skp	/* Offset now points to data portion. */
1578284569Skp	off += sizeof(frag);
1579284569Skp
1580284569Skp	/* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */
1581284581Skp	if (pf_reassemble6(m0, h, &frag, off, extoff, reason) != PF_PASS)
1582284569Skp		return (PF_DROP);
1583284569Skp	m = *m0;
1584284569Skp	if (m == NULL)
1585284569Skp		return (PF_DROP);
1586284569Skp
1587284569Skp	pd->flags |= PFDESC_IP_REAS;
1588126258Smlaier	return (PF_PASS);
1589126258Smlaier
1590126258Smlaier shortpkt:
1591126258Smlaier	REASON_SET(reason, PFRES_SHORT);
1592126258Smlaier	if (r != NULL && r->log)
1593240233Sglebius		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
1594240233Sglebius		    1);
1595126258Smlaier	return (PF_DROP);
1596126258Smlaier
1597126258Smlaier drop:
1598126258Smlaier	REASON_SET(reason, PFRES_NORM);
1599126258Smlaier	if (r != NULL && r->log)
1600240233Sglebius		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
1601240233Sglebius		    1);
1602126258Smlaier	return (PF_DROP);
1603126258Smlaier}
1604145836Smlaier#endif /* INET6 */
1605126258Smlaier
1606126258Smlaierint
1607130613Smlaierpf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1608126258Smlaier    int off, void *h, struct pf_pdesc *pd)
1609126258Smlaier{
1610126258Smlaier	struct pf_rule	*r, *rm = NULL;
1611126258Smlaier	struct tcphdr	*th = pd->hdr.tcp;
1612126258Smlaier	int		 rewrite = 0;
1613126258Smlaier	u_short		 reason;
1614126258Smlaier	u_int8_t	 flags;
1615126258Smlaier	sa_family_t	 af = pd->af;
1616126258Smlaier
1617240233Sglebius	PF_RULES_RASSERT();
1618240233Sglebius
1619126258Smlaier	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1620126258Smlaier	while (r != NULL) {
1621126258Smlaier		r->evaluations++;
1622171168Smlaier		if (pfi_kif_match(r->kif, kif) == r->ifnot)
1623126258Smlaier			r = r->skip[PF_SKIP_IFP].ptr;
1624126258Smlaier		else if (r->direction && r->direction != dir)
1625126258Smlaier			r = r->skip[PF_SKIP_DIR].ptr;
1626126258Smlaier		else if (r->af && r->af != af)
1627126258Smlaier			r = r->skip[PF_SKIP_AF].ptr;
1628126258Smlaier		else if (r->proto && r->proto != pd->proto)
1629126258Smlaier			r = r->skip[PF_SKIP_PROTO].ptr;
1630171168Smlaier		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1631231852Sbz		    r->src.neg, kif, M_GETFIB(m)))
1632126258Smlaier			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1633126258Smlaier		else if (r->src.port_op && !pf_match_port(r->src.port_op,
1634126258Smlaier			    r->src.port[0], r->src.port[1], th->th_sport))
1635126258Smlaier			r = r->skip[PF_SKIP_SRC_PORT].ptr;
1636171168Smlaier		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1637231852Sbz		    r->dst.neg, NULL, M_GETFIB(m)))
1638126258Smlaier			r = r->skip[PF_SKIP_DST_ADDR].ptr;
1639126258Smlaier		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1640126258Smlaier			    r->dst.port[0], r->dst.port[1], th->th_dport))
1641126258Smlaier			r = r->skip[PF_SKIP_DST_PORT].ptr;
1642126258Smlaier		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1643126258Smlaier			    pf_osfp_fingerprint(pd, m, off, th),
1644126258Smlaier			    r->os_fingerprint))
1645126258Smlaier			r = TAILQ_NEXT(r, entries);
1646126258Smlaier		else {
1647126258Smlaier			rm = r;
1648126258Smlaier			break;
1649126258Smlaier		}
1650126258Smlaier	}
1651126258Smlaier
1652145836Smlaier	if (rm == NULL || rm->action == PF_NOSCRUB)
1653126258Smlaier		return (PF_PASS);
1654171168Smlaier	else {
1655171168Smlaier		r->packets[dir == PF_OUT]++;
1656171168Smlaier		r->bytes[dir == PF_OUT] += pd->tot_len;
1657171168Smlaier	}
1658126258Smlaier
1659126258Smlaier	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1660126258Smlaier		pd->flags |= PFDESC_TCP_NORM;
1661126258Smlaier
1662126258Smlaier	flags = th->th_flags;
1663126258Smlaier	if (flags & TH_SYN) {
1664126258Smlaier		/* Illegal packet */
1665126258Smlaier		if (flags & TH_RST)
1666126258Smlaier			goto tcp_drop;
1667126258Smlaier
1668126258Smlaier		if (flags & TH_FIN)
1669282688Sgnn			goto tcp_drop;
1670126258Smlaier	} else {
1671126258Smlaier		/* Illegal packet */
1672126258Smlaier		if (!(flags & (TH_ACK|TH_RST)))
1673126258Smlaier			goto tcp_drop;
1674126258Smlaier	}
1675126258Smlaier
1676126258Smlaier	if (!(flags & TH_ACK)) {
1677126258Smlaier		/* These flags are only valid if ACK is set */
1678126258Smlaier		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1679126258Smlaier			goto tcp_drop;
1680126258Smlaier	}
1681126258Smlaier
1682126258Smlaier	/* Check for illegal header length */
1683126258Smlaier	if (th->th_off < (sizeof(struct tcphdr) >> 2))
1684126258Smlaier		goto tcp_drop;
1685126258Smlaier
1686126258Smlaier	/* If flags changed, or reserved data set, then adjust */
1687126258Smlaier	if (flags != th->th_flags || th->th_x2 != 0) {
1688126258Smlaier		u_int16_t	ov, nv;
1689126258Smlaier
1690126258Smlaier		ov = *(u_int16_t *)(&th->th_ack + 1);
1691126258Smlaier		th->th_flags = flags;
1692126258Smlaier		th->th_x2 = 0;
1693126258Smlaier		nv = *(u_int16_t *)(&th->th_ack + 1);
1694126258Smlaier
1695289703Skp		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0);
1696126258Smlaier		rewrite = 1;
1697126258Smlaier	}
1698126258Smlaier
1699126258Smlaier	/* Remove urgent pointer, if TH_URG is not set */
1700126258Smlaier	if (!(flags & TH_URG) && th->th_urp) {
1701289703Skp		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp,
1702289703Skp		    0, 0);
1703126258Smlaier		th->th_urp = 0;
1704126258Smlaier		rewrite = 1;
1705126258Smlaier	}
1706126258Smlaier
1707126258Smlaier	/* Process options */
1708223637Sbz	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
1709126258Smlaier		rewrite = 1;
1710126258Smlaier
1711126258Smlaier	/* copy back packet headers if we sanitized */
1712126258Smlaier	if (rewrite)
1713126261Smlaier		m_copyback(m, off, sizeof(*th), (caddr_t)th);
1714126258Smlaier
1715126258Smlaier	return (PF_PASS);
1716126258Smlaier
1717126258Smlaier tcp_drop:
1718126258Smlaier	REASON_SET(&reason, PFRES_NORM);
1719126258Smlaier	if (rm != NULL && r->log)
1720240233Sglebius		PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd,
1721240233Sglebius		    1);
1722126258Smlaier	return (PF_DROP);
1723126258Smlaier}
1724126258Smlaier
1725126258Smlaierint
1726126258Smlaierpf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1727126258Smlaier    struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1728126258Smlaier{
1729145836Smlaier	u_int32_t tsval, tsecr;
1730126258Smlaier	u_int8_t hdr[60];
1731126258Smlaier	u_int8_t *opt;
1732126258Smlaier
1733240233Sglebius	KASSERT((src->scrub == NULL),
1734126261Smlaier	    ("pf_normalize_tcp_init: src->scrub != NULL"));
1735223637Sbz
1736240233Sglebius	src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT);
1737126258Smlaier	if (src->scrub == NULL)
1738126258Smlaier		return (1);
1739126258Smlaier
1740126258Smlaier	switch (pd->af) {
1741126258Smlaier#ifdef INET
1742126258Smlaier	case AF_INET: {
1743126258Smlaier		struct ip *h = mtod(m, struct ip *);
1744126258Smlaier		src->scrub->pfss_ttl = h->ip_ttl;
1745126258Smlaier		break;
1746126258Smlaier	}
1747126258Smlaier#endif /* INET */
1748126258Smlaier#ifdef INET6
1749126258Smlaier	case AF_INET6: {
1750126258Smlaier		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1751126258Smlaier		src->scrub->pfss_ttl = h->ip6_hlim;
1752126258Smlaier		break;
1753126258Smlaier	}
1754126258Smlaier#endif /* INET6 */
1755126258Smlaier	}
1756126258Smlaier
1757126258Smlaier
1758126258Smlaier	/*
1759126258Smlaier	 * All normalizations below are only begun if we see the start of
1760126258Smlaier	 * the connections.  They must all set an enabled bit in pfss_flags
1761126258Smlaier	 */
1762126258Smlaier	if ((th->th_flags & TH_SYN) == 0)
1763130613Smlaier		return (0);
1764126258Smlaier
1765126258Smlaier
1766126258Smlaier	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1767126258Smlaier	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1768126258Smlaier		/* Diddle with TCP options */
1769126258Smlaier		int hlen;
1770126258Smlaier		opt = hdr + sizeof(struct tcphdr);
1771126258Smlaier		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1772126258Smlaier		while (hlen >= TCPOLEN_TIMESTAMP) {
1773126258Smlaier			switch (*opt) {
1774126258Smlaier			case TCPOPT_EOL:	/* FALLTHROUGH */
1775126258Smlaier			case TCPOPT_NOP:
1776126258Smlaier				opt++;
1777126258Smlaier				hlen--;
1778126258Smlaier				break;
1779126258Smlaier			case TCPOPT_TIMESTAMP:
1780126258Smlaier				if (opt[1] >= TCPOLEN_TIMESTAMP) {
1781126258Smlaier					src->scrub->pfss_flags |=
1782126258Smlaier					    PFSS_TIMESTAMP;
1783145836Smlaier					src->scrub->pfss_ts_mod =
1784145836Smlaier					    htonl(arc4random());
1785145836Smlaier
1786145836Smlaier					/* note PFSS_PAWS not set yet */
1787145836Smlaier					memcpy(&tsval, &opt[2],
1788145836Smlaier					    sizeof(u_int32_t));
1789145836Smlaier					memcpy(&tsecr, &opt[6],
1790145836Smlaier					    sizeof(u_int32_t));
1791145836Smlaier					src->scrub->pfss_tsval0 = ntohl(tsval);
1792145836Smlaier					src->scrub->pfss_tsval = ntohl(tsval);
1793145836Smlaier					src->scrub->pfss_tsecr = ntohl(tsecr);
1794145836Smlaier					getmicrouptime(&src->scrub->pfss_last);
1795126258Smlaier				}
1796126258Smlaier				/* FALLTHROUGH */
1797126258Smlaier			default:
1798130639Smlaier				hlen -= MAX(opt[1], 2);
1799130639Smlaier				opt += MAX(opt[1], 2);
1800126258Smlaier				break;
1801126258Smlaier			}
1802126258Smlaier		}
1803126258Smlaier	}
1804126258Smlaier
1805126258Smlaier	return (0);
1806126258Smlaier}
1807126258Smlaier
1808126258Smlaiervoid
1809126258Smlaierpf_normalize_tcp_cleanup(struct pf_state *state)
1810126258Smlaier{
1811126258Smlaier	if (state->src.scrub)
1812240233Sglebius		uma_zfree(V_pf_state_scrub_z, state->src.scrub);
1813223637Sbz	if (state->dst.scrub)
1814240233Sglebius		uma_zfree(V_pf_state_scrub_z, state->dst.scrub);
1815126258Smlaier
1816126258Smlaier	/* Someday... flush the TCP segment reassembly descriptors. */
1817126258Smlaier}
1818126258Smlaier
1819126258Smlaierint
1820126258Smlaierpf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1821145836Smlaier    u_short *reason, struct tcphdr *th, struct pf_state *state,
1822145836Smlaier    struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1823126258Smlaier{
1824145836Smlaier	struct timeval uptime;
1825145836Smlaier	u_int32_t tsval, tsecr;
1826145836Smlaier	u_int tsval_from_last;
1827126258Smlaier	u_int8_t hdr[60];
1828126258Smlaier	u_int8_t *opt;
1829126258Smlaier	int copyback = 0;
1830145836Smlaier	int got_ts = 0;
1831126258Smlaier
1832240233Sglebius	KASSERT((src->scrub || dst->scrub),
1833240233Sglebius	    ("%s: src->scrub && dst->scrub!", __func__));
1834126258Smlaier
1835126258Smlaier	/*
1836126258Smlaier	 * Enforce the minimum TTL seen for this connection.  Negate a common
1837126258Smlaier	 * technique to evade an intrusion detection system and confuse
1838126258Smlaier	 * firewall state code.
1839126258Smlaier	 */
1840126258Smlaier	switch (pd->af) {
1841126258Smlaier#ifdef INET
1842126258Smlaier	case AF_INET: {
1843126258Smlaier		if (src->scrub) {
1844126258Smlaier			struct ip *h = mtod(m, struct ip *);
1845126258Smlaier			if (h->ip_ttl > src->scrub->pfss_ttl)
1846126258Smlaier				src->scrub->pfss_ttl = h->ip_ttl;
1847126258Smlaier			h->ip_ttl = src->scrub->pfss_ttl;
1848126258Smlaier		}
1849126258Smlaier		break;
1850126258Smlaier	}
1851126258Smlaier#endif /* INET */
1852126258Smlaier#ifdef INET6
1853126258Smlaier	case AF_INET6: {
1854130613Smlaier		if (src->scrub) {
1855126258Smlaier			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1856126258Smlaier			if (h->ip6_hlim > src->scrub->pfss_ttl)
1857126258Smlaier				src->scrub->pfss_ttl = h->ip6_hlim;
1858126258Smlaier			h->ip6_hlim = src->scrub->pfss_ttl;
1859126258Smlaier		}
1860126258Smlaier		break;
1861126258Smlaier	}
1862126258Smlaier#endif /* INET6 */
1863126258Smlaier	}
1864126258Smlaier
1865126258Smlaier	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1866126258Smlaier	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1867126258Smlaier	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1868126258Smlaier	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1869126258Smlaier		/* Diddle with TCP options */
1870126258Smlaier		int hlen;
1871126258Smlaier		opt = hdr + sizeof(struct tcphdr);
1872126258Smlaier		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1873126258Smlaier		while (hlen >= TCPOLEN_TIMESTAMP) {
1874126258Smlaier			switch (*opt) {
1875126258Smlaier			case TCPOPT_EOL:	/* FALLTHROUGH */
1876126258Smlaier			case TCPOPT_NOP:
1877126258Smlaier				opt++;
1878126258Smlaier				hlen--;
1879126258Smlaier				break;
1880126258Smlaier			case TCPOPT_TIMESTAMP:
1881126258Smlaier				/* Modulate the timestamps.  Can be used for
1882126258Smlaier				 * NAT detection, OS uptime determination or
1883126258Smlaier				 * reboot detection.
1884126258Smlaier				 */
1885145836Smlaier
1886145836Smlaier				if (got_ts) {
1887145836Smlaier					/* Huh?  Multiple timestamps!? */
1888223637Sbz					if (V_pf_status.debug >= PF_DEBUG_MISC) {
1889145836Smlaier						DPFPRINTF(("multiple TS??"));
1890145836Smlaier						pf_print_state(state);
1891145836Smlaier						printf("\n");
1892145836Smlaier					}
1893145836Smlaier					REASON_SET(reason, PFRES_TS);
1894145836Smlaier					return (PF_DROP);
1895145836Smlaier				}
1896126258Smlaier				if (opt[1] >= TCPOLEN_TIMESTAMP) {
1897145836Smlaier					memcpy(&tsval, &opt[2],
1898145836Smlaier					    sizeof(u_int32_t));
1899145836Smlaier					if (tsval && src->scrub &&
1900126258Smlaier					    (src->scrub->pfss_flags &
1901126258Smlaier					    PFSS_TIMESTAMP)) {
1902145836Smlaier						tsval = ntohl(tsval);
1903289703Skp						pf_change_proto_a(m, &opt[2],
1904145836Smlaier						    &th->th_sum,
1905145836Smlaier						    htonl(tsval +
1906145836Smlaier						    src->scrub->pfss_ts_mod),
1907145836Smlaier						    0);
1908126258Smlaier						copyback = 1;
1909126258Smlaier					}
1910130613Smlaier
1911130613Smlaier					/* Modulate TS reply iff valid (!0) */
1912145836Smlaier					memcpy(&tsecr, &opt[6],
1913130613Smlaier					    sizeof(u_int32_t));
1914145836Smlaier					if (tsecr && dst->scrub &&
1915126258Smlaier					    (dst->scrub->pfss_flags &
1916126258Smlaier					    PFSS_TIMESTAMP)) {
1917145836Smlaier						tsecr = ntohl(tsecr)
1918145836Smlaier						    - dst->scrub->pfss_ts_mod;
1919289703Skp						pf_change_proto_a(m, &opt[6],
1920145836Smlaier						    &th->th_sum, htonl(tsecr),
1921145836Smlaier						    0);
1922126258Smlaier						copyback = 1;
1923126258Smlaier					}
1924145836Smlaier					got_ts = 1;
1925126258Smlaier				}
1926126258Smlaier				/* FALLTHROUGH */
1927126258Smlaier			default:
1928130639Smlaier				hlen -= MAX(opt[1], 2);
1929130639Smlaier				opt += MAX(opt[1], 2);
1930126258Smlaier				break;
1931126258Smlaier			}
1932126258Smlaier		}
1933126258Smlaier		if (copyback) {
1934126258Smlaier			/* Copyback the options, caller copys back header */
1935126258Smlaier			*writeback = 1;
1936126258Smlaier			m_copyback(m, off + sizeof(struct tcphdr),
1937126258Smlaier			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1938126258Smlaier			    sizeof(struct tcphdr));
1939126258Smlaier		}
1940126258Smlaier	}
1941126258Smlaier
1942126258Smlaier
1943145836Smlaier	/*
1944145836Smlaier	 * Must invalidate PAWS checks on connections idle for too long.
1945145836Smlaier	 * The fastest allowed timestamp clock is 1ms.  That turns out to
1946145836Smlaier	 * be about 24 days before it wraps.  XXX Right now our lowerbound
1947145836Smlaier	 * TS echo check only works for the first 12 days of a connection
1948145836Smlaier	 * when the TS has exhausted half its 32bit space
1949145836Smlaier	 */
1950145836Smlaier#define TS_MAX_IDLE	(24*24*60*60)
1951145836Smlaier#define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
1952145836Smlaier
1953145836Smlaier	getmicrouptime(&uptime);
1954145836Smlaier	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1955145836Smlaier	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1956240233Sglebius	    time_uptime - state->creation > TS_MAX_CONN))  {
1957223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1958145836Smlaier			DPFPRINTF(("src idled out of PAWS\n"));
1959145836Smlaier			pf_print_state(state);
1960145836Smlaier			printf("\n");
1961145836Smlaier		}
1962145836Smlaier		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1963145836Smlaier		    | PFSS_PAWS_IDLED;
1964145836Smlaier	}
1965145836Smlaier	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1966145836Smlaier	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1967223637Sbz		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1968145836Smlaier			DPFPRINTF(("dst idled out of PAWS\n"));
1969145836Smlaier			pf_print_state(state);
1970145836Smlaier			printf("\n");
1971145836Smlaier		}
1972145836Smlaier		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1973145836Smlaier		    | PFSS_PAWS_IDLED;
1974145836Smlaier	}
1975145836Smlaier
1976145836Smlaier	if (got_ts && src->scrub && dst->scrub &&
1977145836Smlaier	    (src->scrub->pfss_flags & PFSS_PAWS) &&
1978145836Smlaier	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
1979145836Smlaier		/* Validate that the timestamps are "in-window".
1980145836Smlaier		 * RFC1323 describes TCP Timestamp options that allow
1981145836Smlaier		 * measurement of RTT (round trip time) and PAWS
1982145836Smlaier		 * (protection against wrapped sequence numbers).  PAWS
1983145836Smlaier		 * gives us a set of rules for rejecting packets on
1984240233Sglebius		 * long fat pipes (packets that were somehow delayed
1985145836Smlaier		 * in transit longer than the time it took to send the
1986145836Smlaier		 * full TCP sequence space of 4Gb).  We can use these
1987145836Smlaier		 * rules and infer a few others that will let us treat
1988145836Smlaier		 * the 32bit timestamp and the 32bit echoed timestamp
1989145836Smlaier		 * as sequence numbers to prevent a blind attacker from
1990145836Smlaier		 * inserting packets into a connection.
1991145836Smlaier		 *
1992145836Smlaier		 * RFC1323 tells us:
1993145836Smlaier		 *  - The timestamp on this packet must be greater than
1994145836Smlaier		 *    or equal to the last value echoed by the other
1995145836Smlaier		 *    endpoint.  The RFC says those will be discarded
1996145836Smlaier		 *    since it is a dup that has already been acked.
1997145836Smlaier		 *    This gives us a lowerbound on the timestamp.
1998145836Smlaier		 *        timestamp >= other last echoed timestamp
1999145836Smlaier		 *  - The timestamp will be less than or equal to
2000145836Smlaier		 *    the last timestamp plus the time between the
2001145836Smlaier		 *    last packet and now.  The RFC defines the max
2002145836Smlaier		 *    clock rate as 1ms.  We will allow clocks to be
2003145836Smlaier		 *    up to 10% fast and will allow a total difference
2004145836Smlaier		 *    or 30 seconds due to a route change.  And this
2005145836Smlaier		 *    gives us an upperbound on the timestamp.
2006145836Smlaier		 *        timestamp <= last timestamp + max ticks
2007145836Smlaier		 *    We have to be careful here.  Windows will send an
2008145836Smlaier		 *    initial timestamp of zero and then initialize it
2009145836Smlaier		 *    to a random value after the 3whs; presumably to
2010145836Smlaier		 *    avoid a DoS by having to call an expensive RNG
2011145836Smlaier		 *    during a SYN flood.  Proof MS has at least one
2012145836Smlaier		 *    good security geek.
2013145836Smlaier		 *
2014145836Smlaier		 *  - The TCP timestamp option must also echo the other
2015145836Smlaier		 *    endpoints timestamp.  The timestamp echoed is the
2016145836Smlaier		 *    one carried on the earliest unacknowledged segment
2017145836Smlaier		 *    on the left edge of the sequence window.  The RFC
2018145836Smlaier		 *    states that the host will reject any echoed
2019145836Smlaier		 *    timestamps that were larger than any ever sent.
2020145836Smlaier		 *    This gives us an upperbound on the TS echo.
2021145836Smlaier		 *        tescr <= largest_tsval
2022145836Smlaier		 *  - The lowerbound on the TS echo is a little more
2023145836Smlaier		 *    tricky to determine.  The other endpoint's echoed
2024145836Smlaier		 *    values will not decrease.  But there may be
2025145836Smlaier		 *    network conditions that re-order packets and
2026145836Smlaier		 *    cause our view of them to decrease.  For now the
2027145836Smlaier		 *    only lowerbound we can safely determine is that
2028223637Sbz		 *    the TS echo will never be less than the original
2029145836Smlaier		 *    TS.  XXX There is probably a better lowerbound.
2030145836Smlaier		 *    Remove TS_MAX_CONN with better lowerbound check.
2031145836Smlaier		 *        tescr >= other original TS
2032145836Smlaier		 *
2033145836Smlaier		 * It is also important to note that the fastest
2034145836Smlaier		 * timestamp clock of 1ms will wrap its 32bit space in
2035145836Smlaier		 * 24 days.  So we just disable TS checking after 24
2036145836Smlaier		 * days of idle time.  We actually must use a 12d
2037145836Smlaier		 * connection limit until we can come up with a better
2038145836Smlaier		 * lowerbound to the TS echo check.
2039145836Smlaier		 */
2040145836Smlaier		struct timeval delta_ts;
2041145836Smlaier		int ts_fudge;
2042145836Smlaier
2043145836Smlaier
2044145836Smlaier		/*
2045145836Smlaier		 * PFTM_TS_DIFF is how many seconds of leeway to allow
2046145836Smlaier		 * a host's timestamp.  This can happen if the previous
2047145836Smlaier		 * packet got delayed in transit for much longer than
2048145836Smlaier		 * this packet.
2049145836Smlaier		 */
2050145836Smlaier		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2051223637Sbz			ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
2052145836Smlaier
2053145836Smlaier		/* Calculate max ticks since the last timestamp */
2054145836Smlaier#define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
2055145836Smlaier#define TS_MICROSECS	1000000		/* microseconds per second */
2056240233Sglebius		delta_ts = uptime;
2057240233Sglebius		timevalsub(&delta_ts, &src->scrub->pfss_last);
2058145836Smlaier		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2059145836Smlaier		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2060145836Smlaier
2061145836Smlaier		if ((src->state >= TCPS_ESTABLISHED &&
2062145836Smlaier		    dst->state >= TCPS_ESTABLISHED) &&
2063145836Smlaier		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2064145836Smlaier		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2065145836Smlaier		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2066145836Smlaier		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2067145836Smlaier			/* Bad RFC1323 implementation or an insertion attack.
2068145836Smlaier			 *
2069145836Smlaier			 * - Solaris 2.6 and 2.7 are known to send another ACK
2070145836Smlaier			 *   after the FIN,FIN|ACK,ACK closing that carries
2071145836Smlaier			 *   an old timestamp.
2072145836Smlaier			 */
2073145836Smlaier
2074145836Smlaier			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2075145836Smlaier			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2076145836Smlaier			    SEQ_GT(tsval, src->scrub->pfss_tsval +
2077145836Smlaier			    tsval_from_last) ? '1' : ' ',
2078145836Smlaier			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2079145836Smlaier			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2080145836Smlaier			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
2081153722Smlaier			    "idle: %jus %lums\n",
2082153722Smlaier			    tsval, tsecr, tsval_from_last,
2083153722Smlaier			    (uintmax_t)delta_ts.tv_sec,
2084145836Smlaier			    delta_ts.tv_usec / 1000));
2085145836Smlaier			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
2086145836Smlaier			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2087145836Smlaier			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
2088145836Smlaier			    "\n", dst->scrub->pfss_tsval,
2089145836Smlaier			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
2090223637Sbz			if (V_pf_status.debug >= PF_DEBUG_MISC) {
2091145836Smlaier				pf_print_state(state);
2092145836Smlaier				pf_print_flags(th->th_flags);
2093145836Smlaier				printf("\n");
2094145836Smlaier			}
2095145836Smlaier			REASON_SET(reason, PFRES_TS);
2096145836Smlaier			return (PF_DROP);
2097145836Smlaier		}
2098145836Smlaier
2099145836Smlaier		/* XXX I'd really like to require tsecr but it's optional */
2100145836Smlaier
2101145836Smlaier	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2102145836Smlaier	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2103145836Smlaier	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2104145836Smlaier	    src->scrub && dst->scrub &&
2105145836Smlaier	    (src->scrub->pfss_flags & PFSS_PAWS) &&
2106145836Smlaier	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
2107145836Smlaier		/* Didn't send a timestamp.  Timestamps aren't really useful
2108145836Smlaier		 * when:
2109145836Smlaier		 *  - connection opening or closing (often not even sent).
2110145836Smlaier		 *    but we must not let an attacker to put a FIN on a
2111145836Smlaier		 *    data packet to sneak it through our ESTABLISHED check.
2112145836Smlaier		 *  - on a TCP reset.  RFC suggests not even looking at TS.
2113145836Smlaier		 *  - on an empty ACK.  The TS will not be echoed so it will
2114145836Smlaier		 *    probably not help keep the RTT calculation in sync and
2115145836Smlaier		 *    there isn't as much danger when the sequence numbers
2116145836Smlaier		 *    got wrapped.  So some stacks don't include TS on empty
2117145836Smlaier		 *    ACKs :-(
2118145836Smlaier		 *
2119145836Smlaier		 * To minimize the disruption to mostly RFC1323 conformant
2120145836Smlaier		 * stacks, we will only require timestamps on data packets.
2121145836Smlaier		 *
2122145836Smlaier		 * And what do ya know, we cannot require timestamps on data
2123145836Smlaier		 * packets.  There appear to be devices that do legitimate
2124145836Smlaier		 * TCP connection hijacking.  There are HTTP devices that allow
2125145836Smlaier		 * a 3whs (with timestamps) and then buffer the HTTP request.
2126145836Smlaier		 * If the intermediate device has the HTTP response cache, it
2127145836Smlaier		 * will spoof the response but not bother timestamping its
2128145836Smlaier		 * packets.  So we can look for the presence of a timestamp in
2129145836Smlaier		 * the first data packet and if there, require it in all future
2130145836Smlaier		 * packets.
2131145836Smlaier		 */
2132145836Smlaier
2133145836Smlaier		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2134145836Smlaier			/*
2135145836Smlaier			 * Hey!  Someone tried to sneak a packet in.  Or the
2136145836Smlaier			 * stack changed its RFC1323 behavior?!?!
2137145836Smlaier			 */
2138223637Sbz			if (V_pf_status.debug >= PF_DEBUG_MISC) {
2139145836Smlaier				DPFPRINTF(("Did not receive expected RFC1323 "
2140145836Smlaier				    "timestamp\n"));
2141145836Smlaier				pf_print_state(state);
2142145836Smlaier				pf_print_flags(th->th_flags);
2143145836Smlaier				printf("\n");
2144145836Smlaier			}
2145145836Smlaier			REASON_SET(reason, PFRES_TS);
2146145836Smlaier			return (PF_DROP);
2147145836Smlaier		}
2148145836Smlaier	}
2149145836Smlaier
2150145836Smlaier
2151145836Smlaier	/*
2152145836Smlaier	 * We will note if a host sends his data packets with or without
2153145836Smlaier	 * timestamps.  And require all data packets to contain a timestamp
2154145836Smlaier	 * if the first does.  PAWS implicitly requires that all data packets be
2155145836Smlaier	 * timestamped.  But I think there are middle-man devices that hijack
2156171168Smlaier	 * TCP streams immediately after the 3whs and don't timestamp their
2157145836Smlaier	 * packets (seen in a WWW accelerator or cache).
2158145836Smlaier	 */
2159145836Smlaier	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2160145836Smlaier	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2161145836Smlaier		if (got_ts)
2162145836Smlaier			src->scrub->pfss_flags |= PFSS_DATA_TS;
2163145836Smlaier		else {
2164145836Smlaier			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2165223637Sbz			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2166145836Smlaier			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2167145836Smlaier				/* Don't warn if other host rejected RFC1323 */
2168145836Smlaier				DPFPRINTF(("Broken RFC1323 stack did not "
2169145836Smlaier				    "timestamp data packet. Disabled PAWS "
2170145836Smlaier				    "security.\n"));
2171145836Smlaier				pf_print_state(state);
2172145836Smlaier				pf_print_flags(th->th_flags);
2173145836Smlaier				printf("\n");
2174145836Smlaier			}
2175145836Smlaier		}
2176145836Smlaier	}
2177145836Smlaier
2178145836Smlaier
2179145836Smlaier	/*
2180145836Smlaier	 * Update PAWS values
2181145836Smlaier	 */
2182145836Smlaier	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2183145836Smlaier	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2184145836Smlaier		getmicrouptime(&src->scrub->pfss_last);
2185145836Smlaier		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2186145836Smlaier		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2187145836Smlaier			src->scrub->pfss_tsval = tsval;
2188145836Smlaier
2189145836Smlaier		if (tsecr) {
2190145836Smlaier			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2191145836Smlaier			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2192145836Smlaier				src->scrub->pfss_tsecr = tsecr;
2193145836Smlaier
2194145836Smlaier			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2195145836Smlaier			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2196145836Smlaier			    src->scrub->pfss_tsval0 == 0)) {
2197145836Smlaier				/* tsval0 MUST be the lowest timestamp */
2198145836Smlaier				src->scrub->pfss_tsval0 = tsval;
2199145836Smlaier			}
2200145836Smlaier
2201145836Smlaier			/* Only fully initialized after a TS gets echoed */
2202145836Smlaier			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2203145836Smlaier				src->scrub->pfss_flags |= PFSS_PAWS;
2204145836Smlaier		}
2205145836Smlaier	}
2206145836Smlaier
2207126258Smlaier	/* I have a dream....  TCP segment reassembly.... */
2208126258Smlaier	return (0);
2209126258Smlaier}
2210145836Smlaier
2211240233Sglebiusstatic int
2212126258Smlaierpf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
2213223637Sbz    int off, sa_family_t af)
2214126258Smlaier{
2215126258Smlaier	u_int16_t	*mss;
2216126258Smlaier	int		 thoff;
2217126258Smlaier	int		 opt, cnt, optlen = 0;
2218126258Smlaier	int		 rewrite = 0;
2219223637Sbz	u_char		 opts[TCP_MAXOLEN];
2220223637Sbz	u_char		*optp = opts;
2221126258Smlaier
2222126258Smlaier	thoff = th->th_off << 2;
2223126258Smlaier	cnt = thoff - sizeof(struct tcphdr);
2224126258Smlaier
2225223637Sbz	if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
2226223637Sbz	    NULL, NULL, af))
2227223637Sbz		return (rewrite);
2228223637Sbz
2229126258Smlaier	for (; cnt > 0; cnt -= optlen, optp += optlen) {
2230126258Smlaier		opt = optp[0];
2231126258Smlaier		if (opt == TCPOPT_EOL)
2232126258Smlaier			break;
2233126258Smlaier		if (opt == TCPOPT_NOP)
2234126258Smlaier			optlen = 1;
2235126258Smlaier		else {
2236126258Smlaier			if (cnt < 2)
2237126258Smlaier				break;
2238126258Smlaier			optlen = optp[1];
2239126258Smlaier			if (optlen < 2 || optlen > cnt)
2240126258Smlaier				break;
2241126258Smlaier		}
2242126258Smlaier		switch (opt) {
2243126258Smlaier		case TCPOPT_MAXSEG:
2244126258Smlaier			mss = (u_int16_t *)(optp + 2);
2245126258Smlaier			if ((ntohs(*mss)) > r->max_mss) {
2246289703Skp				th->th_sum = pf_proto_cksum_fixup(m,
2247289703Skp				    th->th_sum, *mss, htons(r->max_mss), 0);
2248126258Smlaier				*mss = htons(r->max_mss);
2249126258Smlaier				rewrite = 1;
2250126258Smlaier			}
2251126258Smlaier			break;
2252126258Smlaier		default:
2253126258Smlaier			break;
2254126258Smlaier		}
2255126258Smlaier	}
2256126258Smlaier
2257223637Sbz	if (rewrite)
2258223637Sbz		m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
2259223637Sbz
2260126258Smlaier	return (rewrite);
2261126258Smlaier}
2262223637Sbz
2263240233Sglebius#ifdef INET
2264240233Sglebiusstatic void
2265223637Sbzpf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
2266223637Sbz{
2267223637Sbz	struct mbuf		*m = *m0;
2268223637Sbz	struct ip		*h = mtod(m, struct ip *);
2269223637Sbz
2270223637Sbz	/* Clear IP_DF if no-df was requested */
2271223637Sbz	if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
2272223637Sbz		u_int16_t ip_off = h->ip_off;
2273223637Sbz
2274223637Sbz		h->ip_off &= htons(~IP_DF);
2275223637Sbz		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
2276223637Sbz	}
2277223637Sbz
2278223637Sbz	/* Enforce a minimum ttl, may cause endless packet loops */
2279223637Sbz	if (min_ttl && h->ip_ttl < min_ttl) {
2280223637Sbz		u_int16_t ip_ttl = h->ip_ttl;
2281223637Sbz
2282223637Sbz		h->ip_ttl = min_ttl;
2283223637Sbz		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2284223637Sbz	}
2285223637Sbz
2286223637Sbz	/* Enforce tos */
2287223637Sbz	if (flags & PFRULE_SET_TOS) {
2288223637Sbz		u_int16_t	ov, nv;
2289223637Sbz
2290223637Sbz		ov = *(u_int16_t *)h;
2291223637Sbz		h->ip_tos = tos;
2292223637Sbz		nv = *(u_int16_t *)h;
2293223637Sbz
2294223637Sbz		h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
2295223637Sbz	}
2296223637Sbz
2297223637Sbz	/* random-id, but not for fragments */
2298223637Sbz	if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
2299223637Sbz		u_int16_t ip_id = h->ip_id;
2300223637Sbz
2301223637Sbz		h->ip_id = ip_randomid();
2302223637Sbz		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
2303223637Sbz	}
2304223637Sbz}
2305240233Sglebius#endif /* INET */
2306223637Sbz
2307223637Sbz#ifdef INET6
2308240233Sglebiusstatic void
2309223637Sbzpf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
2310223637Sbz{
2311223637Sbz	struct mbuf		*m = *m0;
2312223637Sbz	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
2313223637Sbz
2314223637Sbz	/* Enforce a minimum ttl, may cause endless packet loops */
2315223637Sbz	if (min_ttl && h->ip6_hlim < min_ttl)
2316223637Sbz		h->ip6_hlim = min_ttl;
2317223637Sbz}
2318223637Sbz#endif
2319