1/*
2 * Copyright (C) 1993-2003 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7 * Use is subject to license terms.
8 */
9
10#if defined(KERNEL) || defined(_KERNEL)
11# undef KERNEL
12# undef _KERNEL
13# define        KERNEL	1
14# define        _KERNEL	1
15#endif
16#include <sys/errno.h>
17#include <sys/types.h>
18#include <sys/param.h>
19#include <sys/time.h>
20#include <sys/file.h>
21#ifdef __hpux
22# include <sys/timeout.h>
23#endif
24#if !defined(_KERNEL)
25# include <stdio.h>
26# include <string.h>
27# include <stdlib.h>
28# define _KERNEL
29# ifdef __OpenBSD__
30struct file;
31# endif
32# include <sys/uio.h>
33# undef _KERNEL
34#endif
35#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
36# include <sys/filio.h>
37# include <sys/fcntl.h>
38#else
39# include <sys/ioctl.h>
40#endif
41#if !defined(linux)
42# include <sys/protosw.h>
43#endif
44#include <sys/socket.h>
45#if defined(_KERNEL)
46# include <sys/systm.h>
47# if !defined(__SVR4) && !defined(__svr4__)
48#  include <sys/mbuf.h>
49# endif
50#endif
51#if !defined(__SVR4) && !defined(__svr4__)
52# if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
53#  include <sys/kernel.h>
54# endif
55#else
56# include <sys/byteorder.h>
57# ifdef _KERNEL
58#  include <sys/dditypes.h>
59# endif
60# include <sys/stream.h>
61# include <sys/kmem.h>
62#endif
63#include <net/if.h>
64#ifdef sun
65# include <net/af.h>
66#endif
67#include <net/route.h>
68#include <netinet/in.h>
69#include <netinet/in_systm.h>
70#include <netinet/ip.h>
71#if !defined(linux)
72# include <netinet/ip_var.h>
73#endif
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76#include <netinet/ip_icmp.h>
77#include "netinet/ip_compat.h"
78#include <netinet/tcpip.h>
79#include "netinet/ip_fil.h"
80#include "netinet/ip_nat.h"
81#include "netinet/ip_frag.h"
82#include "netinet/ip_state.h"
83#include "netinet/ip_auth.h"
84#include "netinet/ipf_stack.h"
85#if (__FreeBSD_version >= 300000)
86# include <sys/malloc.h>
87# if defined(_KERNEL)
88#  ifndef IPFILTER_LKM
89#   include <sys/libkern.h>
90#   include <sys/systm.h>
91#  endif
92extern struct callout_handle fr_slowtimer_ch;
93# endif
94#endif
95#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
96# include <sys/callout.h>
97extern struct callout fr_slowtimer_ch;
98#endif
99#if defined(__OpenBSD__)
100# include <sys/timeout.h>
101extern struct timeout fr_slowtimer_ch;
102#endif
103/* END OF INCLUDES */
104
105#if !defined(lint)
106static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-2000 Darren Reed";
107static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $";
108#endif
109
110static INLINE int ipfr_index __P((fr_info_t *, ipfr_t *));
111static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
112static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
113static void fr_fragdelete __P((ipfr_t *, ipfr_t ***, ipf_stack_t *));
114
115/* ------------------------------------------------------------------------ */
116/* Function:    fr_fraginit                                                 */
117/* Returns:     int - 0 == success, -1 == error                             */
118/* Parameters:  Nil                                                         */
119/*                                                                          */
120/* Initialise the hash tables for the fragment cache lookups.               */
121/* ------------------------------------------------------------------------ */
122int fr_fraginit(ifs)
123ipf_stack_t *ifs;
124{
125	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
126	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
127	ifs->ifs_ipfr_ipidtail = &ifs->ifs_ipfr_ipidlist;
128	/* the IP frag related variables are set in ipftuneable_setdefs() to
129	 * their default values
130	 */
131
132	KMALLOCS(ifs->ifs_ipfr_heads, ipfr_t **,
133	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
134	if (ifs->ifs_ipfr_heads == NULL)
135		return -1;
136	bzero((char *)ifs->ifs_ipfr_heads,
137	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
138
139	KMALLOCS(ifs->ifs_ipfr_nattab, ipfr_t **,
140	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
141	if (ifs->ifs_ipfr_nattab == NULL)
142		return -1;
143	bzero((char *)ifs->ifs_ipfr_nattab,
144	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
145
146	KMALLOCS(ifs->ifs_ipfr_ipidtab, ipfr_t **,
147	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
148	if (ifs->ifs_ipfr_ipidtab == NULL)
149		return -1;
150	bzero((char *)ifs->ifs_ipfr_ipidtab,
151	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
152
153	RWLOCK_INIT(&ifs->ifs_ipf_frag, "ipf fragment rwlock");
154
155	/* Initialise frblock with "block in all" */
156	bzero((char *)&ifs->ifs_frblock, sizeof(ifs->ifs_frblock));
157	ifs->ifs_frblock.fr_flags = FR_BLOCK|FR_INQUE;	/* block in */
158	ifs->ifs_frblock.fr_ref = 1;
159
160	ifs->ifs_fr_frag_init = 1;
161
162	return 0;
163}
164
165
166/* ------------------------------------------------------------------------ */
167/* Function:    fr_fragunload                                               */
168/* Returns:     Nil                                                         */
169/* Parameters:  Nil                                                         */
170/*                                                                          */
171/* Free all memory allocated whilst running and from initialisation.        */
172/* ------------------------------------------------------------------------ */
173void fr_fragunload(ifs)
174ipf_stack_t *ifs;
175{
176	if (ifs->ifs_fr_frag_init == 1) {
177		fr_fragclear(ifs);
178
179		RW_DESTROY(&ifs->ifs_ipf_frag);
180		ifs->ifs_fr_frag_init = 0;
181	}
182
183	if (ifs->ifs_ipfr_heads != NULL) {
184		KFREES(ifs->ifs_ipfr_heads,
185		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
186	}
187	ifs->ifs_ipfr_heads = NULL;
188
189	if (ifs->ifs_ipfr_nattab != NULL) {
190		KFREES(ifs->ifs_ipfr_nattab,
191		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
192	}
193	ifs->ifs_ipfr_nattab = NULL;
194
195	if (ifs->ifs_ipfr_ipidtab != NULL) {
196		KFREES(ifs->ifs_ipfr_ipidtab,
197		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
198	}
199	ifs->ifs_ipfr_ipidtab = NULL;
200}
201
202
203/* ------------------------------------------------------------------------ */
204/* Function:    fr_fragstats                                                */
205/* Returns:     ipfrstat_t* - pointer to struct with current frag stats     */
206/* Parameters:  Nil                                                         */
207/*                                                                          */
208/* Updates ipfr_stats with current information and returns a pointer to it  */
209/* ------------------------------------------------------------------------ */
210ipfrstat_t *fr_fragstats(ifs)
211ipf_stack_t *ifs;
212{
213	ifs->ifs_ipfr_stats.ifs_table = ifs->ifs_ipfr_heads;
214	ifs->ifs_ipfr_stats.ifs_nattab = ifs->ifs_ipfr_nattab;
215	ifs->ifs_ipfr_stats.ifs_inuse = ifs->ifs_ipfr_inuse;
216	return &ifs->ifs_ipfr_stats;
217}
218
219
220/* ------------------------------------------------------------------------ */
221/* Function:    ipfr_index                                                  */
222/* Returns:     int     - index in fragment table for given packet          */
223/* Parameters:  fin(I)  - pointer to packet information                     */
224/*              frag(O) - pointer to ipfr_t structure to fill               */
225/*                                                                          */
226/* Compute the index in the fragment table while filling the per packet     */
227/* part of the fragment state.                                              */
228/* ------------------------------------------------------------------------ */
229static INLINE int ipfr_index(fin, frag)
230fr_info_t *fin;
231ipfr_t *frag;
232{
233	u_int idx;
234
235	/*
236	 * For fragments, we record protocol, packet id, TOS and both IP#'s
237	 * (these should all be the same for all fragments of a packet).
238	 *
239	 * build up a hash value to index the table with.
240	 */
241
242#ifdef	USE_INET6
243	if (fin->fin_v == 6) {
244		ip6_t *ip6 = (ip6_t *)fin->fin_ip;
245
246		frag->ipfr_p = fin->fin_fi.fi_p;
247		frag->ipfr_id = fin->fin_id;
248		frag->ipfr_tos = ip6->ip6_flow & IPV6_FLOWINFO_MASK;
249		frag->ipfr_src.in6 = ip6->ip6_src;
250		frag->ipfr_dst.in6 = ip6->ip6_dst;
251	} else
252#endif
253	{
254		ip_t *ip = fin->fin_ip;
255
256		frag->ipfr_p = ip->ip_p;
257		frag->ipfr_id = ip->ip_id;
258		frag->ipfr_tos = ip->ip_tos;
259		frag->ipfr_src.in4.s_addr = ip->ip_src.s_addr;
260		frag->ipfr_src.i6[1] = 0;
261		frag->ipfr_src.i6[2] = 0;
262		frag->ipfr_src.i6[3] = 0;
263		frag->ipfr_dst.in4.s_addr = ip->ip_dst.s_addr;
264		frag->ipfr_dst.i6[1] = 0;
265		frag->ipfr_dst.i6[2] = 0;
266		frag->ipfr_dst.i6[3] = 0;
267	}
268	frag->ipfr_ifp = fin->fin_ifp;
269	frag->ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
270	frag->ipfr_secmsk = fin->fin_fi.fi_secmsk;
271	frag->ipfr_auth = fin->fin_fi.fi_auth;
272
273	idx = frag->ipfr_p;
274	idx += frag->ipfr_id;
275	idx += frag->ipfr_src.i6[0];
276	idx += frag->ipfr_src.i6[1];
277	idx += frag->ipfr_src.i6[2];
278	idx += frag->ipfr_src.i6[3];
279	idx += frag->ipfr_dst.i6[0];
280	idx += frag->ipfr_dst.i6[1];
281	idx += frag->ipfr_dst.i6[2];
282	idx += frag->ipfr_dst.i6[3];
283	idx *= 127;
284	idx %= IPFT_SIZE;
285
286	return idx;
287}
288
289
290/* ------------------------------------------------------------------------ */
291/* Function:    ipfr_newfrag                                                */
292/* Returns:     ipfr_t * - pointer to fragment cache state info or NULL     */
293/* Parameters:  fin(I)   - pointer to packet information                    */
294/*              table(I) - pointer to frag table to add to                  */
295/*                                                                          */
296/* Add a new entry to the fragment cache, registering it as having come     */
297/* through this box, with the result of the filter operation.               */
298/* ------------------------------------------------------------------------ */
299static ipfr_t *ipfr_newfrag(fin, pass, table)
300fr_info_t *fin;
301u_32_t pass;
302ipfr_t *table[];
303{
304	ipfr_t *fra, frag;
305	u_int idx, off;
306	ipf_stack_t *ifs = fin->fin_ifs;
307
308	if (ifs->ifs_ipfr_inuse >= ifs->ifs_ipfr_size)
309		return NULL;
310
311	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
312		return NULL;
313
314	if (pass & FR_FRSTRICT)
315		if (fin->fin_off != 0)
316			return NULL;
317
318	idx = ipfr_index(fin, &frag);
319
320	/*
321	 * first, make sure it isn't already there...
322	 */
323	for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
324		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
325			  IPFR_CMPSZ)) {
326			ifs->ifs_ipfr_stats.ifs_exists++;
327			return NULL;
328		}
329
330	/*
331	 * allocate some memory, if possible, if not, just record that we
332	 * failed to do so.
333	 */
334	KMALLOC(fra, ipfr_t *);
335	if (fra == NULL) {
336		ifs->ifs_ipfr_stats.ifs_nomem++;
337		return NULL;
338	}
339
340	fra->ipfr_rule = fin->fin_fr;
341	if (fra->ipfr_rule != NULL) {
342
343		frentry_t *fr;
344
345		fr = fin->fin_fr;
346		MUTEX_ENTER(&fr->fr_lock);
347		fr->fr_ref++;
348		MUTEX_EXIT(&fr->fr_lock);
349	}
350
351	/*
352	 * Insert the fragment into the fragment table, copy the struct used
353	 * in the search using bcopy rather than reassign each field.
354	 * Set the ttl to the default.
355	 */
356	if ((fra->ipfr_hnext = table[idx]) != NULL)
357		table[idx]->ipfr_hprev = &fra->ipfr_hnext;
358	fra->ipfr_hprev = table + idx;
359	fra->ipfr_data = NULL;
360	table[idx] = fra;
361	bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
362	fra->ipfr_ttl = ifs->ifs_fr_ticks + ifs->ifs_fr_ipfrttl;
363
364	/*
365	 * Compute the offset of the expected start of the next packet.
366	 */
367	off = fin->fin_off >> 3;
368	if (off == 0) {
369		fra->ipfr_seen0 = 1;
370	} else {
371		fra->ipfr_seen0 = 0;
372	}
373	fra->ipfr_off = off + fin->fin_dlen;
374	fra->ipfr_pass = pass;
375	fra->ipfr_ref = 1;
376	ifs->ifs_ipfr_stats.ifs_new++;
377	ifs->ifs_ipfr_inuse++;
378	return fra;
379}
380
381
382/* ------------------------------------------------------------------------ */
383/* Function:    fr_newfrag                                                  */
384/* Returns:     int - 0 == success, -1 == error                             */
385/* Parameters:  fin(I)  - pointer to packet information                     */
386/*                                                                          */
387/* Add a new entry to the fragment cache table based on the current packet  */
388/* ------------------------------------------------------------------------ */
389int fr_newfrag(fin, pass)
390u_32_t pass;
391fr_info_t *fin;
392{
393	ipfr_t	*fra;
394	ipf_stack_t *ifs = fin->fin_ifs;
395
396	if (ifs->ifs_fr_frag_lock != 0)
397		return -1;
398
399	WRITE_ENTER(&ifs->ifs_ipf_frag);
400	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_heads);
401	if (fra != NULL) {
402		*ifs->ifs_ipfr_tail = fra;
403		fra->ipfr_prev = ifs->ifs_ipfr_tail;
404		ifs->ifs_ipfr_tail = &fra->ipfr_next;
405		if (ifs->ifs_ipfr_list == NULL)
406			ifs->ifs_ipfr_list = fra;
407		fra->ipfr_next = NULL;
408	}
409	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
410	return fra ? 0 : -1;
411}
412
413
414/* ------------------------------------------------------------------------ */
415/* Function:    fr_nat_newfrag                                              */
416/* Returns:     int - 0 == success, -1 == error                             */
417/* Parameters:  fin(I)  - pointer to packet information                     */
418/*              nat(I)  - pointer to NAT structure                          */
419/*                                                                          */
420/* Create a new NAT fragment cache entry based on the current packet and    */
421/* the NAT structure for this "session".                                    */
422/* ------------------------------------------------------------------------ */
423int fr_nat_newfrag(fin, pass, nat)
424fr_info_t *fin;
425u_32_t pass;
426nat_t *nat;
427{
428	ipfr_t	*fra;
429	ipf_stack_t *ifs = fin->fin_ifs;
430
431	if (ifs->ifs_fr_frag_lock != 0)
432		return 0;
433
434	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
435	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_nattab);
436	if (fra != NULL) {
437		fra->ipfr_data = nat;
438		nat->nat_data = fra;
439		*ifs->ifs_ipfr_nattail = fra;
440		fra->ipfr_prev = ifs->ifs_ipfr_nattail;
441		ifs->ifs_ipfr_nattail = &fra->ipfr_next;
442		fra->ipfr_next = NULL;
443	}
444	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
445	return fra ? 0 : -1;
446}
447
448
449/* ------------------------------------------------------------------------ */
450/* Function:    fr_ipid_newfrag                                             */
451/* Returns:     int - 0 == success, -1 == error                             */
452/* Parameters:  fin(I)  - pointer to packet information                     */
453/*              ipid(I) - new IP ID for this fragmented packet              */
454/*                                                                          */
455/* Create a new fragment cache entry for this packet and store, as a data   */
456/* pointer, the new IP ID value.                                            */
457/* ------------------------------------------------------------------------ */
458int fr_ipid_newfrag(fin, ipid)
459fr_info_t *fin;
460u_32_t ipid;
461{
462	ipfr_t	*fra;
463	ipf_stack_t *ifs = fin->fin_ifs;
464
465	if (ifs->ifs_fr_frag_lock)
466		return 0;
467
468	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
469	fra = ipfr_newfrag(fin, 0, ifs->ifs_ipfr_ipidtab);
470	if (fra != NULL) {
471		fra->ipfr_data = (void *)(uintptr_t)ipid;
472		*ifs->ifs_ipfr_ipidtail = fra;
473		fra->ipfr_prev = ifs->ifs_ipfr_ipidtail;
474		ifs->ifs_ipfr_ipidtail = &fra->ipfr_next;
475		fra->ipfr_next = NULL;
476	}
477	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
478	return fra ? 0 : -1;
479}
480
481
482/* ------------------------------------------------------------------------ */
483/* Function:    fr_fraglookup                                               */
484/* Returns:     ipfr_t * - pointer to ipfr_t structure if there's a         */
485/*                         matching entry in the frag table, else NULL      */
486/* Parameters:  fin(I)   - pointer to packet information                    */
487/*              table(I) - pointer to fragment cache table to search        */
488/*                                                                          */
489/* Check the fragment cache to see if there is already a record of this     */
490/* packet with its filter result known.                                     */
491/* ------------------------------------------------------------------------ */
492static ipfr_t *fr_fraglookup(fin, table)
493fr_info_t *fin;
494ipfr_t *table[];
495{
496	ipfr_t *f, frag;
497	u_int idx;
498	ipf_stack_t *ifs = fin->fin_ifs;
499
500	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
501		return NULL;
502
503	/*
504	 * For fragments, we record protocol, packet id, TOS and both IP#'s
505	 * (these should all be the same for all fragments of a packet).
506	 *
507	 * build up a hash value to index the table with.
508	 */
509	idx = ipfr_index(fin, &frag);
510
511	/*
512	 * check the table, careful to only compare the right amount of data
513	 */
514	for (f = table[idx]; f; f = f->ipfr_hnext)
515		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
516			  IPFR_CMPSZ)) {
517			u_short	off;
518
519			/*
520			 * We don't want to let short packets match because
521			 * they could be compromising the security of other
522			 * rules that want to match on layer 4 fields (and
523			 * can't because they have been fragmented off.)
524			 * Why do this check here?  The counter acts as an
525			 * indicator of this kind of attack, whereas if it was
526			 * elsewhere, it wouldn't know if other matching
527			 * packets had been seen.
528			 */
529			if (fin->fin_flx & FI_SHORT) {
530				ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_short);
531				continue;
532			}
533
534			/*
535			 * XXX - We really need to be guarding against the
536			 * retransmission of (src,dst,id,offset-range) here
537			 * because a fragmented packet is never resent with
538			 * the same IP ID# (or shouldn't).
539			 */
540			off = fin->fin_off >> 3;
541			if (f->ipfr_seen0) {
542				if (off == 0) {
543					ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_retrans0);
544					continue;
545				}
546			} else if (off == 0) {
547				f->ipfr_seen0 = 1;
548			}
549
550			if (f != table[idx]) {
551				ipfr_t **fp;
552
553				/*
554				 * Move fragment info. to the top of the list
555				 * to speed up searches.  First, delink...
556				 */
557				fp = f->ipfr_hprev;
558				(*fp) = f->ipfr_hnext;
559				if (f->ipfr_hnext != NULL)
560					f->ipfr_hnext->ipfr_hprev = fp;
561				/*
562				 * Then put back at the top of the chain.
563				 */
564				f->ipfr_hnext = table[idx];
565				table[idx]->ipfr_hprev = &f->ipfr_hnext;
566				f->ipfr_hprev = table + idx;
567				table[idx] = f;
568			}
569
570			/*
571			 * If we've follwed the fragments, and this is the
572			 * last (in order), shrink expiration time.
573			 */
574			if (off == f->ipfr_off) {
575				if (!(fin->fin_flx & FI_MOREFRAG))
576					f->ipfr_ttl = ifs->ifs_fr_ticks + 1;
577				f->ipfr_off = fin->fin_dlen + off;
578			} else if (f->ipfr_pass & FR_FRSTRICT)
579				continue;
580			ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_hits);
581			return f;
582		}
583	return NULL;
584}
585
586
587/* ------------------------------------------------------------------------ */
588/* Function:    fr_nat_knownfrag                                            */
589/* Returns:     nat_t* - pointer to 'parent' NAT structure if frag table    */
590/*                       match found, else NULL                             */
591/* Parameters:  fin(I)  - pointer to packet information                     */
592/*                                                                          */
593/* Functional interface for NAT lookups of the NAT fragment cache           */
594/* ------------------------------------------------------------------------ */
595nat_t *fr_nat_knownfrag(fin)
596fr_info_t *fin;
597{
598	nat_t	*nat;
599	ipfr_t	*ipf;
600	ipf_stack_t *ifs = fin->fin_ifs;
601
602	if (ifs->ifs_fr_frag_lock || !ifs->ifs_ipfr_natlist)
603		return NULL;
604	READ_ENTER(&ifs->ifs_ipf_natfrag);
605	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_nattab);
606	if (ipf != NULL) {
607		nat = ipf->ipfr_data;
608		/*
609		 * This is the last fragment for this packet.
610		 */
611		if ((ipf->ipfr_ttl == ifs->ifs_fr_ticks + 1) && (nat != NULL)) {
612			nat->nat_data = NULL;
613			ipf->ipfr_data = NULL;
614		}
615	} else
616		nat = NULL;
617	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
618	return nat;
619}
620
621
622/* ------------------------------------------------------------------------ */
623/* Function:    fr_ipid_knownfrag                                           */
624/* Returns:     u_32_t - IPv4 ID for this packet if match found, else       */
625/*                       return 0xfffffff to indicate no match.             */
626/* Parameters:  fin(I) - pointer to packet information                      */
627/*                                                                          */
628/* Functional interface for IP ID lookups of the IP ID fragment cache       */
629/* ------------------------------------------------------------------------ */
630u_32_t fr_ipid_knownfrag(fin)
631fr_info_t *fin;
632{
633	ipfr_t	*ipf;
634	u_32_t	id;
635	ipf_stack_t *ifs = fin->fin_ifs;
636
637	if (ifs->ifs_fr_frag_lock || !ifs->ifs_ipfr_ipidlist)
638		return 0xffffffff;
639
640	READ_ENTER(&ifs->ifs_ipf_ipidfrag);
641	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_ipidtab);
642	if (ipf != NULL)
643		id = (u_32_t)(uintptr_t)ipf->ipfr_data;
644	else
645		id = 0xffffffff;
646	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
647	return id;
648}
649
650
651/* ------------------------------------------------------------------------ */
652/* Function:    fr_knownfrag                                                */
653/* Returns:     frentry_t* - pointer to filter rule if a match is found in  */
654/*                           the frag cache table, else NULL.               */
655/* Parameters:  fin(I)   - pointer to packet information                    */
656/*              passp(O) - pointer to where to store rule flags resturned   */
657/*                                                                          */
658/* Functional interface for normal lookups of the fragment cache.  If a     */
659/* match is found, return the rule pointer and flags from the rule, except  */
660/* that if FR_LOGFIRST is set, reset FR_LOG.                                */
661/* ------------------------------------------------------------------------ */
662frentry_t *fr_knownfrag(fin, passp)
663fr_info_t *fin;
664u_32_t *passp;
665{
666	frentry_t *fr = NULL;
667	ipfr_t	*fra;
668	u_32_t pass, oflx;
669	ipf_stack_t *ifs = fin->fin_ifs;
670
671	if (ifs->ifs_fr_frag_lock || (ifs->ifs_ipfr_list == NULL))
672		return NULL;
673
674	READ_ENTER(&ifs->ifs_ipf_frag);
675	oflx = fin->fin_flx;
676	fra = fr_fraglookup(fin, ifs->ifs_ipfr_heads);
677	if (fra != NULL) {
678		fr = fra->ipfr_rule;
679		fin->fin_fr = fr;
680		if (fr != NULL) {
681			pass = fr->fr_flags;
682			if ((pass & FR_LOGFIRST) != 0)
683				pass &= ~(FR_LOGFIRST|FR_LOG);
684			*passp = pass;
685		}
686	}
687	if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) {
688		*passp &= ~FR_CMDMASK;
689		*passp |= FR_BLOCK;
690		fr = &ifs->ifs_frblock;
691	}
692	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
693	return fr;
694}
695
696
697/* ------------------------------------------------------------------------ */
698/* Function:    fr_forget                                                   */
699/* Returns:     Nil                                                         */
700/* Parameters:  ptr(I) - pointer to data structure                          */
701/*                                                                          */
702/* Search through all of the fragment cache entries and wherever a pointer  */
703/* is found to match ptr, reset it to NULL.                                 */
704/* ------------------------------------------------------------------------ */
705void fr_forget(ptr, ifs)
706void *ptr;
707ipf_stack_t *ifs;
708{
709	ipfr_t	*fr;
710
711	WRITE_ENTER(&ifs->ifs_ipf_frag);
712	for (fr = ifs->ifs_ipfr_list; fr; fr = fr->ipfr_next)
713		if (fr->ipfr_data == ptr)
714			fr->ipfr_data = NULL;
715	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
716}
717
718
719/* ------------------------------------------------------------------------ */
720/* Function:    fr_forgetnat                                                */
721/* Returns:     Nil                                                         */
722/* Parameters:  ptr(I) - pointer to data structure                          */
723/*                                                                          */
724/* Search through all of the fragment cache entries for NAT and wherever a  */
725/* pointer  is found to match ptr, reset it to NULL.                        */
726/* ------------------------------------------------------------------------ */
727void fr_forgetnat(ptr, ifs)
728void *ptr;
729ipf_stack_t *ifs;
730{
731	ipfr_t	*fr;
732
733	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
734	for (fr = ifs->ifs_ipfr_natlist; fr; fr = fr->ipfr_next)
735		if (fr->ipfr_data == ptr)
736			fr->ipfr_data = NULL;
737	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
738}
739
740
741/* ------------------------------------------------------------------------ */
742/* Function:    fr_fragdelete                                               */
743/* Returns:     Nil                                                         */
744/* Parameters:  fra(I)   - pointer to fragment structure to delete          */
745/*              tail(IO) - pointer to the pointer to the tail of the frag   */
746/*                         list                                             */
747/*                                                                          */
748/* Remove a fragment cache table entry from the table & list.  Also free    */
749/* the filter rule it is associated with it if it is no longer used as a    */
750/* result of decreasing the reference count.                                */
751/* ------------------------------------------------------------------------ */
752static void fr_fragdelete(fra, tail, ifs)
753ipfr_t *fra, ***tail;
754ipf_stack_t *ifs;
755{
756	frentry_t *fr;
757
758	fr = fra->ipfr_rule;
759	if (fr != NULL)
760	    (void)fr_derefrule(&fr, ifs);
761
762	if (fra->ipfr_next)
763		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
764	*fra->ipfr_prev = fra->ipfr_next;
765	if (*tail == &fra->ipfr_next)
766		*tail = fra->ipfr_prev;
767
768	if (fra->ipfr_hnext)
769		fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
770	*fra->ipfr_hprev = fra->ipfr_hnext;
771
772	if (fra->ipfr_ref <= 0)
773		KFREE(fra);
774}
775
776
777/* ------------------------------------------------------------------------ */
778/* Function:    fr_fragclear                                                */
779/* Returns:     Nil                                                         */
780/* Parameters:  Nil                                                         */
781/*                                                                          */
782/* Free memory in use by fragment state information kept.  Do the normal    */
783/* fragment state stuff first and then the NAT-fragment table.              */
784/* ------------------------------------------------------------------------ */
785void fr_fragclear(ifs)
786ipf_stack_t *ifs;
787{
788	ipfr_t	*fra;
789	nat_t	*nat;
790
791	WRITE_ENTER(&ifs->ifs_ipf_frag);
792	while ((fra = ifs->ifs_ipfr_list) != NULL) {
793		fra->ipfr_ref--;
794		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
795	}
796	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
797	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
798
799	WRITE_ENTER(&ifs->ifs_ipf_nat);
800	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
801	while ((fra = ifs->ifs_ipfr_natlist) != NULL) {
802		nat = fra->ipfr_data;
803		if (nat != NULL) {
804			if (nat->nat_data == fra)
805				nat->nat_data = NULL;
806		}
807		fra->ipfr_ref--;
808		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
809	}
810	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
811	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
812	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
813}
814
815
816/* ------------------------------------------------------------------------ */
817/* Function:    fr_fragexpire                                               */
818/* Returns:     Nil                                                         */
819/* Parameters:  Nil                                                         */
820/*                                                                          */
821/* Expire entries in the fragment cache table that have been there too long */
822/* ------------------------------------------------------------------------ */
823void fr_fragexpire(ifs)
824ipf_stack_t *ifs;
825{
826	ipfr_t	**fp, *fra;
827	nat_t	*nat;
828	SPL_INT(s);
829
830	if (ifs->ifs_fr_frag_lock)
831		return;
832
833	SPL_NET(s);
834	WRITE_ENTER(&ifs->ifs_ipf_frag);
835	/*
836	 * Go through the entire table, looking for entries to expire,
837	 * which is indicated by the ttl being less than or equal to
838	 * ifs_fr_ticks.
839	 */
840	for (fp = &ifs->ifs_ipfr_list; ((fra = *fp) != NULL); ) {
841		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
842			break;
843		fra->ipfr_ref--;
844		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
845		ifs->ifs_ipfr_stats.ifs_expire++;
846		ifs->ifs_ipfr_inuse--;
847	}
848	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
849
850	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
851	for (fp = &ifs->ifs_ipfr_ipidlist; ((fra = *fp) != NULL); ) {
852		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
853			break;
854		fra->ipfr_ref--;
855		fr_fragdelete(fra, &ifs->ifs_ipfr_ipidtail, ifs);
856		ifs->ifs_ipfr_stats.ifs_expire++;
857		ifs->ifs_ipfr_inuse--;
858	}
859	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
860
861	/*
862	 * Same again for the NAT table, except that if the structure also
863	 * still points to a NAT structure, and the NAT structure points back
864	 * at the one to be free'd, NULL the reference from the NAT struct.
865	 * NOTE: We need to grab both mutex's early, and in this order so as
866	 * to prevent a deadlock if both try to expire at the same time.
867	 */
868	WRITE_ENTER(&ifs->ifs_ipf_nat);
869	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
870	for (fp = &ifs->ifs_ipfr_natlist; ((fra = *fp) != NULL); ) {
871		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
872			break;
873		nat = fra->ipfr_data;
874		if (nat != NULL) {
875			if (nat->nat_data == fra)
876				nat->nat_data = NULL;
877		}
878		fra->ipfr_ref--;
879		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
880		ifs->ifs_ipfr_stats.ifs_expire++;
881		ifs->ifs_ipfr_inuse--;
882	}
883	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
884	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
885	SPL_X(s);
886}
887
888
889/* ------------------------------------------------------------------------ */
890/* Function:    fr_slowtimer                                                */
891/* Returns:     Nil                                                         */
892/* Parameters:  Nil                                                         */
893/*                                                                          */
894/* Slowly expire held state for fragments.  Timeouts are set * in           */
895/* expectation of this being called twice per second.                       */
896/* ------------------------------------------------------------------------ */
897#if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \
898			  !defined(__osf__) && !defined(linux))
899# if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
900void fr_slowtimer __P((void *arg))
901# else
902int fr_slowtimer(void *arg)
903# endif
904{
905	ipf_stack_t *ifs = arg;
906
907	READ_ENTER(&ifs->ifs_ipf_global);
908
909	fr_fragexpire(ifs);
910	fr_timeoutstate(ifs);
911	fr_natexpire(ifs);
912	fr_authexpire(ifs);
913	ifs->ifs_fr_ticks++;
914	if (ifs->ifs_fr_running <= 0)
915		goto done;
916# ifdef _KERNEL
917#  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
918	callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
919#  else
920#   if defined(__OpenBSD__)
921	timeout_add(&fr_slowtimer_ch, hz/2);
922#   else
923#    if (__FreeBSD_version >= 300000)
924	fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
925#    else
926#     ifdef linux
927	;
928#     else
929	timeout(fr_slowtimer, NULL, hz/2);
930#     endif
931#    endif /* FreeBSD */
932#   endif /* OpenBSD */
933#  endif /* NetBSD */
934# endif
935done:
936	RWLOCK_EXIT(&ifs->ifs_ipf_global);
937# if (BSD < 199103) || !defined(_KERNEL)
938	return 0;
939# endif
940}
941#endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
942
943/*ARGSUSED*/
944int fr_nextfrag(token, itp, top, tail, lock, ifs)
945ipftoken_t *token;
946ipfgeniter_t *itp;
947ipfr_t **top, ***tail;
948ipfrwlock_t *lock;
949ipf_stack_t *ifs;
950{
951	ipfr_t *frag, *next, zero;
952	int error = 0;
953
954	READ_ENTER(lock);
955
956	/*
957	 * Retrieve "previous" entry from token and find the next entry.
958	 */
959	frag = token->ipt_data;
960	if (frag == NULL)
961		next = *top;
962	else
963		next = frag->ipfr_next;
964
965	/*
966	 * If we found an entry, add reference to it and update token.
967	 * Otherwise, zero out data to be returned and NULL out token.
968	 */
969	if (next != NULL) {
970		ATOMIC_INC(next->ipfr_ref);
971		token->ipt_data = next;
972	} else {
973		bzero(&zero, sizeof(zero));
974		next = &zero;
975		token->ipt_data = NULL;
976	}
977
978	/*
979	 * Now that we have ref, it's save to give up lock.
980	 */
981	RWLOCK_EXIT(lock);
982
983	/*
984	 * Copy out data and clean up references and token as needed.
985	 */
986	error = COPYOUT(next, itp->igi_data, sizeof(*next));
987	if (error != 0)
988		error = EFAULT;
989	if (token->ipt_data == NULL) {
990		ipf_freetoken(token, ifs);
991	} else {
992		if (frag != NULL)
993			fr_fragderef(&frag, lock, ifs);
994		if (next->ipfr_next == NULL)
995			ipf_freetoken(token, ifs);
996	}
997	return error;
998}
999
1000
1001void fr_fragderef(frp, lock, ifs)
1002ipfr_t **frp;
1003ipfrwlock_t *lock;
1004ipf_stack_t *ifs;
1005{
1006	ipfr_t *fra;
1007
1008	fra = *frp;
1009	*frp = NULL;
1010
1011	WRITE_ENTER(lock);
1012	fra->ipfr_ref--;
1013	if (fra->ipfr_ref <= 0) {
1014		KFREE(fra);
1015		ifs->ifs_ipfr_stats.ifs_expire++;
1016		ifs->ifs_ipfr_inuse--;
1017	}
1018	RWLOCK_EXIT(lock);
1019}
1020