ip_frag.c revision 170268
1/*	$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_frag.c 170268 2007-06-04 02:54:36Z darrenr $	*/
2
3/*
4 * Copyright (C) 1993-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#if defined(KERNEL) || defined(_KERNEL)
9# undef KERNEL
10# undef _KERNEL
11# define        KERNEL	1
12# define        _KERNEL	1
13#endif
14#include <sys/errno.h>
15#include <sys/types.h>
16#include <sys/param.h>
17#include <sys/time.h>
18#include <sys/file.h>
19#ifdef __hpux
20# include <sys/timeout.h>
21#endif
22#if !defined(_KERNEL)
23# include <stdio.h>
24# include <string.h>
25# include <stdlib.h>
26# define _KERNEL
27# ifdef __OpenBSD__
28struct file;
29# endif
30# include <sys/uio.h>
31# undef _KERNEL
32#endif
33#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
34# include <sys/filio.h>
35# include <sys/fcntl.h>
36#else
37# include <sys/ioctl.h>
38#endif
39#if !defined(linux)
40# include <sys/protosw.h>
41#endif
42#include <sys/socket.h>
43#if defined(_KERNEL)
44# include <sys/systm.h>
45# if !defined(__SVR4) && !defined(__svr4__)
46#  include <sys/mbuf.h>
47# endif
48#endif
49#if !defined(__SVR4) && !defined(__svr4__)
50# if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
51#  include <sys/kernel.h>
52# endif
53#else
54# include <sys/byteorder.h>
55# ifdef _KERNEL
56#  include <sys/dditypes.h>
57# endif
58# include <sys/stream.h>
59# include <sys/kmem.h>
60#endif
61#include <net/if.h>
62#ifdef sun
63# include <net/af.h>
64#endif
65#include <net/route.h>
66#include <netinet/in.h>
67#include <netinet/in_systm.h>
68#include <netinet/ip.h>
69#if !defined(linux)
70# include <netinet/ip_var.h>
71#endif
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74#include <netinet/ip_icmp.h>
75#include "netinet/ip_compat.h"
76#include <netinet/tcpip.h>
77#include "netinet/ip_fil.h"
78#include "netinet/ip_nat.h"
79#include "netinet/ip_frag.h"
80#include "netinet/ip_state.h"
81#include "netinet/ip_auth.h"
82#include "netinet/ip_proxy.h"
83#if (__FreeBSD_version >= 300000)
84# include <sys/malloc.h>
85# if defined(_KERNEL)
86#  ifndef IPFILTER_LKM
87#   include <sys/libkern.h>
88#   include <sys/systm.h>
89#  endif
90extern struct callout_handle fr_slowtimer_ch;
91# endif
92#endif
93#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
94# include <sys/callout.h>
95extern struct callout fr_slowtimer_ch;
96#endif
97#if defined(__OpenBSD__)
98# include <sys/timeout.h>
99extern struct timeout fr_slowtimer_ch;
100#endif
101/* END OF INCLUDES */
102
103#if !defined(lint)
104static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-2000 Darren Reed";
105static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_frag.c 170268 2007-06-04 02:54:36Z darrenr $";
106/* static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2006/02/26 08:26:54 darrenr Exp $";*/
107#endif
108
109
110ipfr_t   *ipfr_list = NULL;
111ipfr_t   **ipfr_tail = &ipfr_list;
112
113ipfr_t   *ipfr_natlist = NULL;
114ipfr_t   **ipfr_nattail = &ipfr_natlist;
115
116ipfr_t   *ipfr_ipidlist = NULL;
117ipfr_t   **ipfr_ipidtail = &ipfr_ipidlist;
118
119static ipfr_t	**ipfr_heads;
120static ipfr_t	**ipfr_nattab;
121static ipfr_t	**ipfr_ipidtab;
122
123static ipfrstat_t ipfr_stats;
124static int	ipfr_inuse = 0;
125int		ipfr_size = IPFT_SIZE;
126
127int	fr_ipfrttl = 120;	/* 60 seconds */
128int	fr_frag_lock = 0;
129int	fr_frag_init = 0;
130u_long	fr_ticks = 0;
131
132
133static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
134static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
135static void fr_fragdelete __P((ipfr_t *, ipfr_t ***));
136static void fr_fragfree __P((ipfr_t *));
137
138
139/* ------------------------------------------------------------------------ */
140/* Function:    fr_fraginit                                                 */
141/* Returns:     int - 0 == success, -1 == error                             */
142/* Parameters:  Nil                                                         */
143/*                                                                          */
144/* Initialise the hash tables for the fragment cache lookups.               */
145/* ------------------------------------------------------------------------ */
146int fr_fraginit()
147{
148	KMALLOCS(ipfr_heads, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
149	if (ipfr_heads == NULL)
150		return -1;
151	bzero((char *)ipfr_heads, ipfr_size * sizeof(ipfr_t *));
152
153	KMALLOCS(ipfr_nattab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
154	if (ipfr_nattab == NULL)
155		return -1;
156	bzero((char *)ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
157
158	KMALLOCS(ipfr_ipidtab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
159	if (ipfr_ipidtab == NULL)
160		return -1;
161	bzero((char *)ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
162
163	RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock");
164	fr_frag_init = 1;
165
166	return 0;
167}
168
169
170/* ------------------------------------------------------------------------ */
171/* Function:    fr_fragunload                                               */
172/* Returns:     Nil                                                         */
173/* Parameters:  Nil                                                         */
174/*                                                                          */
175/* Free all memory allocated whilst running and from initialisation.        */
176/* ------------------------------------------------------------------------ */
177void fr_fragunload()
178{
179	if (fr_frag_init == 1) {
180		fr_fragclear();
181
182		RW_DESTROY(&ipf_frag);
183		fr_frag_init = 0;
184	}
185
186	if (ipfr_heads != NULL)
187		KFREES(ipfr_heads, ipfr_size * sizeof(ipfr_t *));
188	ipfr_heads = NULL;
189
190	if (ipfr_nattab != NULL)
191		KFREES(ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
192	ipfr_nattab = NULL;
193
194	if (ipfr_ipidtab != NULL)
195		KFREES(ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
196	ipfr_ipidtab = NULL;
197}
198
199
200/* ------------------------------------------------------------------------ */
201/* Function:    fr_fragstats                                                */
202/* Returns:     ipfrstat_t* - pointer to struct with current frag stats     */
203/* Parameters:  Nil                                                         */
204/*                                                                          */
205/* Updates ipfr_stats with current information and returns a pointer to it  */
206/* ------------------------------------------------------------------------ */
207ipfrstat_t *fr_fragstats()
208{
209	ipfr_stats.ifs_table = ipfr_heads;
210	ipfr_stats.ifs_nattab = ipfr_nattab;
211	ipfr_stats.ifs_inuse = ipfr_inuse;
212	return &ipfr_stats;
213}
214
215
216/* ------------------------------------------------------------------------ */
217/* Function:    ipfr_newfrag                                                */
218/* Returns:     ipfr_t * - pointer to fragment cache state info or NULL     */
219/* Parameters:  fin(I)   - pointer to packet information                    */
220/*              table(I) - pointer to frag table to add to                  */
221/*                                                                          */
222/* Add a new entry to the fragment cache, registering it as having come     */
223/* through this box, with the result of the filter operation.               */
224/* ------------------------------------------------------------------------ */
225static ipfr_t *ipfr_newfrag(fin, pass, table)
226fr_info_t *fin;
227u_32_t pass;
228ipfr_t *table[];
229{
230	ipfr_t *fra, frag;
231	u_int idx, off;
232	frentry_t *fr;
233	ip_t *ip;
234
235	if (ipfr_inuse >= IPFT_SIZE)
236		return NULL;
237
238	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
239		return NULL;
240
241	ip = fin->fin_ip;
242
243	if (pass & FR_FRSTRICT)
244		if (fin->fin_off != 0)
245			return NULL;
246
247	frag.ipfr_p = ip->ip_p;
248	idx = ip->ip_p;
249	frag.ipfr_id = ip->ip_id;
250	idx += ip->ip_id;
251	frag.ipfr_tos = ip->ip_tos;
252	frag.ipfr_src.s_addr = ip->ip_src.s_addr;
253	idx += ip->ip_src.s_addr;
254	frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
255	idx += ip->ip_dst.s_addr;
256	frag.ipfr_ifp = fin->fin_ifp;
257	idx *= 127;
258	idx %= IPFT_SIZE;
259
260	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
261	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
262	frag.ipfr_auth = fin->fin_fi.fi_auth;
263
264	/*
265	 * first, make sure it isn't already there...
266	 */
267	for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
268		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
269			  IPFR_CMPSZ)) {
270			ipfr_stats.ifs_exists++;
271			return NULL;
272		}
273
274	/*
275	 * allocate some memory, if possible, if not, just record that we
276	 * failed to do so.
277	 */
278	KMALLOC(fra, ipfr_t *);
279	if (fra == NULL) {
280		ipfr_stats.ifs_nomem++;
281		return NULL;
282	}
283
284	fr = fin->fin_fr;
285	fra->ipfr_rule = fr;
286	if (fr != NULL) {
287		MUTEX_ENTER(&fr->fr_lock);
288		fr->fr_ref++;
289		MUTEX_EXIT(&fr->fr_lock);
290	}
291
292	/*
293	 * Insert the fragment into the fragment table, copy the struct used
294	 * in the search using bcopy rather than reassign each field.
295	 * Set the ttl to the default.
296	 */
297	if ((fra->ipfr_hnext = table[idx]) != NULL)
298		table[idx]->ipfr_hprev = &fra->ipfr_hnext;
299	fra->ipfr_hprev = table + idx;
300	fra->ipfr_data = NULL;
301	table[idx] = fra;
302	bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
303	fra->ipfr_ttl = fr_ticks + fr_ipfrttl;
304
305	/*
306	 * Compute the offset of the expected start of the next packet.
307	 */
308	off = ip->ip_off & IP_OFFMASK;
309	if (off == 0)
310		fra->ipfr_seen0 = 1;
311	fra->ipfr_off = off + (fin->fin_dlen >> 3);
312	fra->ipfr_pass = pass;
313	fra->ipfr_ref = 1;
314	ipfr_stats.ifs_new++;
315	ipfr_inuse++;
316	return fra;
317}
318
319
320/* ------------------------------------------------------------------------ */
321/* Function:    fr_newfrag                                                  */
322/* Returns:     int - 0 == success, -1 == error                             */
323/* Parameters:  fin(I)  - pointer to packet information                     */
324/*                                                                          */
325/* Add a new entry to the fragment cache table based on the current packet  */
326/* ------------------------------------------------------------------------ */
327int fr_newfrag(fin, pass)
328u_32_t pass;
329fr_info_t *fin;
330{
331	ipfr_t	*fra;
332
333	if ((fin->fin_v != 4) || (fr_frag_lock != 0))
334		return -1;
335
336	WRITE_ENTER(&ipf_frag);
337	fra = ipfr_newfrag(fin, pass, ipfr_heads);
338	if (fra != NULL) {
339		*ipfr_tail = fra;
340		fra->ipfr_prev = ipfr_tail;
341		ipfr_tail = &fra->ipfr_next;
342		if (ipfr_list == NULL)
343			ipfr_list = fra;
344		fra->ipfr_next = NULL;
345	}
346	RWLOCK_EXIT(&ipf_frag);
347	return fra ? 0 : -1;
348}
349
350
351/* ------------------------------------------------------------------------ */
352/* Function:    fr_nat_newfrag                                              */
353/* Returns:     int - 0 == success, -1 == error                             */
354/* Parameters:  fin(I)  - pointer to packet information                     */
355/*              nat(I)  - pointer to NAT structure                          */
356/*                                                                          */
357/* Create a new NAT fragment cache entry based on the current packet and    */
358/* the NAT structure for this "session".                                    */
359/* ------------------------------------------------------------------------ */
360int fr_nat_newfrag(fin, pass, nat)
361fr_info_t *fin;
362u_32_t pass;
363nat_t *nat;
364{
365	ipfr_t	*fra;
366
367	if ((fin->fin_v != 4) || (fr_frag_lock != 0))
368		return 0;
369
370	WRITE_ENTER(&ipf_natfrag);
371	fra = ipfr_newfrag(fin, pass, ipfr_nattab);
372	if (fra != NULL) {
373		fra->ipfr_data = nat;
374		nat->nat_data = fra;
375		*ipfr_nattail = fra;
376		fra->ipfr_prev = ipfr_nattail;
377		ipfr_nattail = &fra->ipfr_next;
378		fra->ipfr_next = NULL;
379	}
380	RWLOCK_EXIT(&ipf_natfrag);
381	return fra ? 0 : -1;
382}
383
384
385/* ------------------------------------------------------------------------ */
386/* Function:    fr_ipid_newfrag                                             */
387/* Returns:     int - 0 == success, -1 == error                             */
388/* Parameters:  fin(I)  - pointer to packet information                     */
389/*              ipid(I) - new IP ID for this fragmented packet              */
390/*                                                                          */
391/* Create a new fragment cache entry for this packet and store, as a data   */
392/* pointer, the new IP ID value.                                            */
393/* ------------------------------------------------------------------------ */
394int fr_ipid_newfrag(fin, ipid)
395fr_info_t *fin;
396u_32_t ipid;
397{
398	ipfr_t	*fra;
399
400	if ((fin->fin_v != 4) || (fr_frag_lock))
401		return 0;
402
403	WRITE_ENTER(&ipf_ipidfrag);
404	fra = ipfr_newfrag(fin, 0, ipfr_ipidtab);
405	if (fra != NULL) {
406		fra->ipfr_data = (void *)(uintptr_t)ipid;
407		*ipfr_ipidtail = fra;
408		fra->ipfr_prev = ipfr_ipidtail;
409		ipfr_ipidtail = &fra->ipfr_next;
410		fra->ipfr_next = NULL;
411	}
412	RWLOCK_EXIT(&ipf_ipidfrag);
413	return fra ? 0 : -1;
414}
415
416
417/* ------------------------------------------------------------------------ */
418/* Function:    fr_fraglookup                                               */
419/* Returns:     ipfr_t * - pointer to ipfr_t structure if there's a         */
420/*                         matching entry in the frag table, else NULL      */
421/* Parameters:  fin(I)   - pointer to packet information                    */
422/*              table(I) - pointer to fragment cache table to search        */
423/*                                                                          */
424/* Check the fragment cache to see if there is already a record of this     */
425/* packet with its filter result known.                                     */
426/* ------------------------------------------------------------------------ */
427static ipfr_t *fr_fraglookup(fin, table)
428fr_info_t *fin;
429ipfr_t *table[];
430{
431	ipfr_t *f, frag;
432	u_int idx;
433	ip_t *ip;
434
435	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
436		return NULL;
437
438	/*
439	 * For fragments, we record protocol, packet id, TOS and both IP#'s
440	 * (these should all be the same for all fragments of a packet).
441	 *
442	 * build up a hash value to index the table with.
443	 */
444	ip = fin->fin_ip;
445	frag.ipfr_p = ip->ip_p;
446	idx = ip->ip_p;
447	frag.ipfr_id = ip->ip_id;
448	idx += ip->ip_id;
449	frag.ipfr_tos = ip->ip_tos;
450	frag.ipfr_src.s_addr = ip->ip_src.s_addr;
451	idx += ip->ip_src.s_addr;
452	frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
453	idx += ip->ip_dst.s_addr;
454	frag.ipfr_ifp = fin->fin_ifp;
455	idx *= 127;
456	idx %= IPFT_SIZE;
457
458	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
459	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
460	frag.ipfr_auth = fin->fin_fi.fi_auth;
461
462	/*
463	 * check the table, careful to only compare the right amount of data
464	 */
465	for (f = table[idx]; f; f = f->ipfr_hnext)
466		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
467			  IPFR_CMPSZ)) {
468			u_short	off;
469
470			/*
471			 * We don't want to let short packets match because
472			 * they could be compromising the security of other
473			 * rules that want to match on layer 4 fields (and
474			 * can't because they have been fragmented off.)
475			 * Why do this check here?  The counter acts as an
476			 * indicator of this kind of attack, whereas if it was
477			 * elsewhere, it wouldn't know if other matching
478			 * packets had been seen.
479			 */
480			if (fin->fin_flx & FI_SHORT) {
481				ATOMIC_INCL(ipfr_stats.ifs_short);
482				continue;
483			}
484
485			/*
486			 * XXX - We really need to be guarding against the
487			 * retransmission of (src,dst,id,offset-range) here
488			 * because a fragmented packet is never resent with
489			 * the same IP ID# (or shouldn't).
490			 */
491			off = ip->ip_off & IP_OFFMASK;
492			if (f->ipfr_seen0) {
493				if (off == 0) {
494					ATOMIC_INCL(ipfr_stats.ifs_retrans0);
495					continue;
496				}
497			} else if (off == 0)
498				f->ipfr_seen0 = 1;
499
500			if (f != table[idx]) {
501				ipfr_t **fp;
502
503				/*
504				 * Move fragment info. to the top of the list
505				 * to speed up searches.  First, delink...
506				 */
507				fp = f->ipfr_hprev;
508				(*fp) = f->ipfr_hnext;
509				if (f->ipfr_hnext != NULL)
510					f->ipfr_hnext->ipfr_hprev = fp;
511				/*
512				 * Then put back at the top of the chain.
513				 */
514				f->ipfr_hnext = table[idx];
515				table[idx]->ipfr_hprev = &f->ipfr_hnext;
516				f->ipfr_hprev = table + idx;
517				table[idx] = f;
518			}
519
520			/*
521			 * If we've follwed the fragments, and this is the
522			 * last (in order), shrink expiration time.
523			 */
524			if (off == f->ipfr_off) {
525				if (!(ip->ip_off & IP_MF))
526					f->ipfr_ttl = fr_ticks + 1;
527				f->ipfr_off = (fin->fin_dlen >> 3) + off;
528			} else if (f->ipfr_pass & FR_FRSTRICT)
529				continue;
530			ATOMIC_INCL(ipfr_stats.ifs_hits);
531			return f;
532		}
533	return NULL;
534}
535
536
537/* ------------------------------------------------------------------------ */
538/* Function:    fr_nat_knownfrag                                            */
539/* Returns:     nat_t* - pointer to 'parent' NAT structure if frag table    */
540/*                       match found, else NULL                             */
541/* Parameters:  fin(I)  - pointer to packet information                     */
542/*                                                                          */
543/* Functional interface for NAT lookups of the NAT fragment cache           */
544/* ------------------------------------------------------------------------ */
545nat_t *fr_nat_knownfrag(fin)
546fr_info_t *fin;
547{
548	nat_t	*nat;
549	ipfr_t	*ipf;
550
551	if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_natlist)
552		return NULL;
553	READ_ENTER(&ipf_natfrag);
554	ipf = fr_fraglookup(fin, ipfr_nattab);
555	if (ipf != NULL) {
556		nat = ipf->ipfr_data;
557		/*
558		 * This is the last fragment for this packet.
559		 */
560		if ((ipf->ipfr_ttl == fr_ticks + 1) && (nat != NULL)) {
561			nat->nat_data = NULL;
562			ipf->ipfr_data = NULL;
563		}
564	} else
565		nat = NULL;
566	RWLOCK_EXIT(&ipf_natfrag);
567	return nat;
568}
569
570
571/* ------------------------------------------------------------------------ */
572/* Function:    fr_ipid_knownfrag                                           */
573/* Returns:     u_32_t - IPv4 ID for this packet if match found, else       */
574/*                       return 0xfffffff to indicate no match.             */
575/* Parameters:  fin(I) - pointer to packet information                      */
576/*                                                                          */
577/* Functional interface for IP ID lookups of the IP ID fragment cache       */
578/* ------------------------------------------------------------------------ */
579u_32_t fr_ipid_knownfrag(fin)
580fr_info_t *fin;
581{
582	ipfr_t	*ipf;
583	u_32_t	id;
584
585	if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_ipidlist)
586		return 0xffffffff;
587
588	READ_ENTER(&ipf_ipidfrag);
589	ipf = fr_fraglookup(fin, ipfr_ipidtab);
590	if (ipf != NULL)
591		id = (u_32_t)(uintptr_t)ipf->ipfr_data;
592	else
593		id = 0xffffffff;
594	RWLOCK_EXIT(&ipf_ipidfrag);
595	return id;
596}
597
598
599/* ------------------------------------------------------------------------ */
600/* Function:    fr_knownfrag                                                */
601/* Returns:     frentry_t* - pointer to filter rule if a match is found in  */
602/*                           the frag cache table, else NULL.               */
603/* Parameters:  fin(I)   - pointer to packet information                    */
604/*              passp(O) - pointer to where to store rule flags resturned   */
605/*                                                                          */
606/* Functional interface for normal lookups of the fragment cache.  If a     */
607/* match is found, return the rule pointer and flags from the rule, except  */
608/* that if FR_LOGFIRST is set, reset FR_LOG.                                */
609/* ------------------------------------------------------------------------ */
610frentry_t *fr_knownfrag(fin, passp)
611fr_info_t *fin;
612u_32_t *passp;
613{
614	frentry_t *fr = NULL;
615	ipfr_t	*fra;
616	u_32_t pass;
617
618	if ((fin->fin_v != 4) || (fr_frag_lock) || (ipfr_list == NULL))
619		return NULL;
620
621	READ_ENTER(&ipf_frag);
622	fra = fr_fraglookup(fin, ipfr_heads);
623	if (fra != NULL) {
624		fr = fra->ipfr_rule;
625		fin->fin_fr = fr;
626		if (fr != NULL) {
627			pass = fr->fr_flags;
628			if ((pass & FR_LOGFIRST) != 0)
629				pass &= ~(FR_LOGFIRST|FR_LOG);
630			*passp = pass;
631		}
632	}
633	RWLOCK_EXIT(&ipf_frag);
634	return fr;
635}
636
637
638/* ------------------------------------------------------------------------ */
639/* Function:    fr_forget                                                   */
640/* Returns:     Nil                                                         */
641/* Parameters:  ptr(I) - pointer to data structure                          */
642/*                                                                          */
643/* Search through all of the fragment cache entries and wherever a pointer  */
644/* is found to match ptr, reset it to NULL.                                 */
645/* ------------------------------------------------------------------------ */
646void fr_forget(ptr)
647void *ptr;
648{
649	ipfr_t	*fr;
650
651	WRITE_ENTER(&ipf_frag);
652	for (fr = ipfr_list; fr; fr = fr->ipfr_next)
653		if (fr->ipfr_data == ptr)
654			fr->ipfr_data = NULL;
655	RWLOCK_EXIT(&ipf_frag);
656}
657
658
659/* ------------------------------------------------------------------------ */
660/* Function:    fr_forgetnat                                                */
661/* Returns:     Nil                                                         */
662/* Parameters:  ptr(I) - pointer to data structure                          */
663/*                                                                          */
664/* Search through all of the fragment cache entries for NAT and wherever a  */
665/* pointer  is found to match ptr, reset it to NULL.                        */
666/* ------------------------------------------------------------------------ */
667void fr_forgetnat(ptr)
668void *ptr;
669{
670	ipfr_t	*fr;
671
672	WRITE_ENTER(&ipf_natfrag);
673	for (fr = ipfr_natlist; fr; fr = fr->ipfr_next)
674		if (fr->ipfr_data == ptr)
675			fr->ipfr_data = NULL;
676	RWLOCK_EXIT(&ipf_natfrag);
677}
678
679
680/* ------------------------------------------------------------------------ */
681/* Function:    fr_fragdelete                                               */
682/* Returns:     Nil                                                         */
683/* Parameters:  fra(I)   - pointer to fragment structure to delete          */
684/*              tail(IO) - pointer to the pointer to the tail of the frag   */
685/*                         list                                             */
686/*                                                                          */
687/* Remove a fragment cache table entry from the table & list.  Also free    */
688/* the filter rule it is associated with it if it is no longer used as a    */
689/* result of decreasing the reference count.                                */
690/* ------------------------------------------------------------------------ */
691static void fr_fragdelete(fra, tail)
692ipfr_t *fra, ***tail;
693{
694
695	if (fra->ipfr_next)
696		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
697	*fra->ipfr_prev = fra->ipfr_next;
698	if (*tail == &fra->ipfr_next)
699		*tail = fra->ipfr_prev;
700
701	if (fra->ipfr_hnext)
702		fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
703	*fra->ipfr_hprev = fra->ipfr_hnext;
704
705	if (fra->ipfr_rule != NULL) {
706		(void) fr_derefrule(&fra->ipfr_rule);
707	}
708
709	if (fra->ipfr_ref <= 0)
710		fr_fragfree(fra);
711}
712
713
714/* ------------------------------------------------------------------------ */
715/* Function:    fr_fragfree                                                 */
716/* Returns:     Nil                                                         */
717/* Parameters:  fra - pointer to frag structure to free                     */
718/*                                                                          */
719/* Take care of the details associated with deleting an entry from the frag */
720/* cache.  Currently this just means bumping stats correctly after freeing  */
721/* ------------------------------------------------------------------------ */
722static void fr_fragfree(fra)
723ipfr_t *fra;
724{
725	KFREE(fra);
726	ipfr_stats.ifs_expire++;
727	ipfr_inuse--;
728}
729
730
731/* ------------------------------------------------------------------------ */
732/* Function:    fr_fragclear                                                */
733/* Returns:     Nil                                                         */
734/* Parameters:  Nil                                                         */
735/*                                                                          */
736/* Free memory in use by fragment state information kept.  Do the normal    */
737/* fragment state stuff first and then the NAT-fragment table.              */
738/* ------------------------------------------------------------------------ */
739void fr_fragclear()
740{
741	ipfr_t	*fra;
742	nat_t	*nat;
743
744	WRITE_ENTER(&ipf_frag);
745	while ((fra = ipfr_list) != NULL) {
746		fra->ipfr_ref--;
747		fr_fragdelete(fra, &ipfr_tail);
748	}
749	ipfr_tail = &ipfr_list;
750	RWLOCK_EXIT(&ipf_frag);
751
752	WRITE_ENTER(&ipf_nat);
753	WRITE_ENTER(&ipf_natfrag);
754	while ((fra = ipfr_natlist) != NULL) {
755		nat = fra->ipfr_data;
756		if (nat != NULL) {
757			if (nat->nat_data == fra)
758				nat->nat_data = NULL;
759		}
760		fra->ipfr_ref--;
761		fr_fragdelete(fra, &ipfr_nattail);
762	}
763	ipfr_nattail = &ipfr_natlist;
764	RWLOCK_EXIT(&ipf_natfrag);
765	RWLOCK_EXIT(&ipf_nat);
766}
767
768
769/* ------------------------------------------------------------------------ */
770/* Function:    fr_fragexpire                                               */
771/* Returns:     Nil                                                         */
772/* Parameters:  Nil                                                         */
773/*                                                                          */
774/* Expire entries in the fragment cache table that have been there too long */
775/* ------------------------------------------------------------------------ */
776void fr_fragexpire()
777{
778	ipfr_t	**fp, *fra;
779	nat_t	*nat;
780	SPL_INT(s);
781
782	if (fr_frag_lock)
783		return;
784
785	SPL_NET(s);
786	WRITE_ENTER(&ipf_frag);
787	/*
788	 * Go through the entire table, looking for entries to expire,
789	 * which is indicated by the ttl being less than or equal to fr_ticks.
790	 */
791	for (fp = &ipfr_list; ((fra = *fp) != NULL); ) {
792		if (fra->ipfr_ttl > fr_ticks)
793			break;
794		fra->ipfr_ref--;
795		fr_fragdelete(fra, &ipfr_tail);
796	}
797	RWLOCK_EXIT(&ipf_frag);
798
799	WRITE_ENTER(&ipf_ipidfrag);
800	for (fp = &ipfr_ipidlist; ((fra = *fp) != NULL); ) {
801		if (fra->ipfr_ttl > fr_ticks)
802			break;
803		fra->ipfr_ref--;
804		fr_fragdelete(fra, &ipfr_ipidtail);
805	}
806	RWLOCK_EXIT(&ipf_ipidfrag);
807
808	/*
809	 * Same again for the NAT table, except that if the structure also
810	 * still points to a NAT structure, and the NAT structure points back
811	 * at the one to be free'd, NULL the reference from the NAT struct.
812	 * NOTE: We need to grab both mutex's early, and in this order so as
813	 * to prevent a deadlock if both try to expire at the same time.
814	 * The extra if() statement here is because it locks out all NAT
815	 * operations - no need to do that if there are no entries in this
816	 * list, right?
817	 */
818	if (ipfr_natlist != NULL) {
819		WRITE_ENTER(&ipf_nat);
820		WRITE_ENTER(&ipf_natfrag);
821		for (fp = &ipfr_natlist; ((fra = *fp) != NULL); ) {
822			if (fra->ipfr_ttl > fr_ticks)
823				break;
824			nat = fra->ipfr_data;
825			if (nat != NULL) {
826				if (nat->nat_data == fra)
827					nat->nat_data = NULL;
828			}
829			fra->ipfr_ref--;
830			fr_fragdelete(fra, &ipfr_nattail);
831		}
832		RWLOCK_EXIT(&ipf_natfrag);
833		RWLOCK_EXIT(&ipf_nat);
834	}
835	SPL_X(s);
836}
837
838
839/* ------------------------------------------------------------------------ */
840/* Function:    fr_slowtimer                                                */
841/* Returns:     Nil                                                         */
842/* Parameters:  Nil                                                         */
843/*                                                                          */
844/* Slowly expire held state for fragments.  Timeouts are set * in           */
845/* expectation of this being called twice per second.                       */
846/* ------------------------------------------------------------------------ */
847#if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \
848			  !defined(__osf__) && !defined(linux))
849# if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
850void fr_slowtimer __P((void *ptr))
851# else
852int fr_slowtimer()
853# endif
854{
855	READ_ENTER(&ipf_global);
856
857	ipf_expiretokens();
858	fr_fragexpire();
859	fr_timeoutstate();
860	fr_natexpire();
861	fr_authexpire();
862	fr_ticks++;
863	if (fr_running <= 0)
864		goto done;
865# ifdef _KERNEL
866#  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
867	callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
868#  else
869#   if defined(__OpenBSD__)
870	timeout_add(&fr_slowtimer_ch, hz/2);
871#   else
872#    if (__FreeBSD_version >= 300000)
873	fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
874#    else
875#     ifdef linux
876	;
877#     else
878	timeout(fr_slowtimer, NULL, hz/2);
879#     endif
880#    endif /* FreeBSD */
881#   endif /* OpenBSD */
882#  endif /* NetBSD */
883# endif
884done:
885	RWLOCK_EXIT(&ipf_global);
886# if (BSD < 199103) || !defined(_KERNEL)
887	return 0;
888# endif
889}
890#endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
891
892
893/* ------------------------------------------------------------------------ */
894/* Function:    fr_nextfrag                                                 */
895/* Returns:     int      - 0 == success, else error                         */
896/* Parameters:  token(I) - pointer to token information for this caller     */
897/*              itp(I)   - pointer to generic iterator from caller          */
898/*              top(I)   - top of the fragment list                         */
899/*              tail(I)  - tail of the fragment list                        */
900/*              lock(I)  - fragment cache lock                              */
901/*                                                                          */
902/* This function is used to interate through the list of entries in the     */
903/* fragment cache.  It increases the reference count on the one currently   */
904/* being returned so that the caller can come back and resume from it later.*/
905/*                                                                          */
906/* This function is used for both the NAT fragment cache as well as the ipf */
907/* fragment cache - hence the reason for passing in top, tail and lock.     */
908/* ------------------------------------------------------------------------ */
909int fr_nextfrag(token, itp, top, tail
910#ifdef USE_MUTEXES
911, lock
912#endif
913)
914ipftoken_t *token;
915ipfgeniter_t *itp;
916ipfr_t **top, ***tail;
917#ifdef USE_MUTEXES
918ipfrwlock_t *lock;
919#endif
920{
921	ipfr_t *frag, *next, zero;
922	int error = 0;
923
924	frag = token->ipt_data;
925	if (frag == (ipfr_t *)-1) {
926		ipf_freetoken(token);
927		return ESRCH;
928	}
929
930	READ_ENTER(lock);
931	if (frag == NULL)
932		next = *top;
933	else
934		next = frag->ipfr_next;
935
936	if (next != NULL) {
937		ATOMIC_INC(next->ipfr_ref);
938		token->ipt_data = next;
939	} else {
940		bzero(&zero, sizeof(zero));
941		next = &zero;
942		token->ipt_data = (void *)-1;
943	}
944	RWLOCK_EXIT(lock);
945
946	if (frag != NULL) {
947		WRITE_ENTER(lock);
948		frag->ipfr_ref--;
949		if (frag->ipfr_ref <= 0)
950			fr_fragfree(frag);
951		RWLOCK_EXIT(lock);
952	}
953
954	error = COPYOUT(next, itp->igi_data, sizeof(*next));
955	if (error != 0)
956		error = EFAULT;
957
958	return error;
959}
960
961
962/* ------------------------------------------------------------------------ */
963/* Function:    fr_fragderef                                                */
964/* Returns:     Nil                                                         */
965/* Parameters:  frp(IO) - pointer to fragment structure to deference        */
966/*              lock(I) - lock associated with the fragment                 */
967/*                                                                          */
968/* This function dereferences a fragment structure (ipfr_t).  The pointer   */
969/* passed in will always be reset back to NULL, even if the structure is    */
970/* not freed, to enforce the notion that the caller is no longer entitled   */
971/* to use the pointer it is dropping the reference to.                      */
972/* ------------------------------------------------------------------------ */
973void fr_fragderef(frp
974#ifdef USE_MUTEXES
975, lock
976#endif
977)
978ipfr_t **frp;
979#ifdef USE_MUTEXES
980ipfrwlock_t *lock;
981#endif
982{
983	ipfr_t *fra;
984
985	fra = *frp;
986	*frp = NULL;
987
988	WRITE_ENTER(lock);
989	fra->ipfr_ref--;
990	if (fra->ipfr_ref <= 0)
991		fr_fragfree(fra);
992	RWLOCK_EXIT(lock);
993}
994