Deleted Added
full compact
1/*
2 * Copyright (C) 1993-2001 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 */
6#if defined(KERNEL) && !defined(_KERNEL)
7# define _KERNEL
8#endif
9
10#ifdef __sgi
11# include <sys/ptimers.h>
12#endif
13#include <sys/errno.h>
14#include <sys/types.h>
15#include <sys/param.h>
16#include <sys/time.h>
17#include <sys/file.h>
18#if !defined(_KERNEL) && !defined(KERNEL)
19# include <stdio.h>
20# include <string.h>
21# include <stdlib.h>
22#endif
23#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
24# include <sys/filio.h>
25# include <sys/fcntl.h>
26#else
27# include <sys/ioctl.h>
28#endif
26#include <sys/uio.h>
29#ifndef linux
30# include <sys/protosw.h>
31#endif
32#include <sys/socket.h>
33#if defined(_KERNEL) && !defined(linux)
34# include <sys/systm.h>
35#endif
36#if !defined(__SVR4) && !defined(__svr4__)
37# if defined(_KERNEL) && !defined(__sgi)
38# include <sys/kernel.h>
39# endif
40# ifndef linux
41# include <sys/mbuf.h>
42# endif
43#else
44# include <sys/byteorder.h>
45# ifdef _KERNEL
46# include <sys/dditypes.h>
47# endif
48# include <sys/stream.h>
49# include <sys/kmem.h>
50#endif
51#include <net/if.h>
52#ifdef sun
53# include <net/af.h>
54#endif
55#include <net/route.h>
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#ifndef linux
60# include <netinet/ip_var.h>
61#endif
62#include <netinet/tcp.h>
63#include <netinet/udp.h>
64#include <netinet/ip_icmp.h>
65#include "netinet/ip_compat.h"
66#include <netinet/tcpip.h>
67#include "netinet/ip_fil.h"
66#include "netinet/ip_proxy.h"
68#include "netinet/ip_nat.h"
69#include "netinet/ip_frag.h"
70#include "netinet/ip_state.h"
71#include "netinet/ip_auth.h"
72#if (__FreeBSD_version >= 300000)
73# include <sys/malloc.h>
74# if (defined(KERNEL) || defined(_KERNEL))
75# ifndef IPFILTER_LKM
76# include <sys/libkern.h>
77# include <sys/systm.h>
78# endif
79extern struct callout_handle ipfr_slowtimer_ch;
80# endif
81#endif
82#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
83# include <sys/callout.h>
84extern struct callout ipfr_slowtimer_ch;
85#endif
86#if defined(__OpenBSD__)
87# include <sys/timeout.h>
88extern struct timeout ipfr_slowtimer_ch;
89#endif
90
91#if !defined(lint)
92static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed";
92static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_frag.c 89336 2002-01-14 09:07:15Z alfred $";
93static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_frag.c 92685 2002-03-19 11:44:16Z darrenr $";
94#endif
95
96
97static ipfr_t *ipfr_heads[IPFT_SIZE];
98static ipfr_t *ipfr_nattab[IPFT_SIZE];
99static ipfrstat_t ipfr_stats;
100static int ipfr_inuse = 0;
101
102int fr_ipfrttl = 120; /* 60 seconds */
103int fr_frag_lock = 0;
104
105#ifdef _KERNEL
106# if SOLARIS2 >= 7
107extern timeout_id_t ipfr_timer_id;
108# else
109extern int ipfr_timer_id;
110# endif
111#endif
112#if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
113extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex;
114# if SOLARIS
115extern KRWLOCK_T ipf_solaris;
116# else
117KRWLOCK_T ipf_solaris;
118# endif
119extern kmutex_t ipf_rw;
120#endif
121
122
123static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, u_int, ipfr_t **));
124static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **));
125static void ipfr_delete __P((ipfr_t *));
126
127
128ipfrstat_t *ipfr_fragstats()
129{
130 ipfr_stats.ifs_table = ipfr_heads;
131 ipfr_stats.ifs_nattab = ipfr_nattab;
132 ipfr_stats.ifs_inuse = ipfr_inuse;
133 return &ipfr_stats;
134}
135
136
137/*
138 * add a new entry to the fragment cache, registering it as having come
139 * through this box, with the result of the filter operation.
140 */
141static ipfr_t *ipfr_new(ip, fin, pass, table)
142ip_t *ip;
143fr_info_t *fin;
144u_int pass;
145ipfr_t *table[];
146{
147 ipfr_t **fp, *fra, frag;
148 u_int idx, off;
149
150 if (ipfr_inuse >= IPFT_SIZE)
151 return NULL;
152
153 if (!(fin->fin_fl & FI_FRAG))
154 return NULL;
155
156 frag.ipfr_p = ip->ip_p;
157 idx = ip->ip_p;
158 frag.ipfr_id = ip->ip_id;
159 idx += ip->ip_id;
160 frag.ipfr_tos = ip->ip_tos;
161 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
162 idx += ip->ip_src.s_addr;
163 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
164 idx += ip->ip_dst.s_addr;
165 frag.ipfr_ifp = fin->fin_ifp;
166 idx *= 127;
167 idx %= IPFT_SIZE;
168
169 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
170 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
171 frag.ipfr_auth = fin->fin_fi.fi_auth;
172
173 /*
174 * first, make sure it isn't already there...
175 */
176 for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next)
177 if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src,
178 IPFR_CMPSZ)) {
179 ATOMIC_INCL(ipfr_stats.ifs_exists);
180 return NULL;
181 }
182
183 /*
184 * allocate some memory, if possible, if not, just record that we
185 * failed to do so.
186 */
187 KMALLOC(fra, ipfr_t *);
188 if (fra == NULL) {
189 ATOMIC_INCL(ipfr_stats.ifs_nomem);
190 return NULL;
191 }
192
193 if ((fra->ipfr_rule = fin->fin_fr) != NULL) {
194 ATOMIC_INC32(fin->fin_fr->fr_ref);
195 }
196
197
198 /*
199 * Instert the fragment into the fragment table, copy the struct used
200 * in the search using bcopy rather than reassign each field.
201 * Set the ttl to the default and mask out logging from "pass"
202 */
203 if ((fra->ipfr_next = table[idx]))
204 table[idx]->ipfr_prev = fra;
205 fra->ipfr_prev = NULL;
206 fra->ipfr_data = NULL;
207 table[idx] = fra;
208 bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ);
209 fra->ipfr_ttl = fr_ipfrttl;
210 /*
211 * Compute the offset of the expected start of the next packet.
212 */
213 off = ip->ip_off & IP_OFFMASK;
214 if (!off)
215 fra->ipfr_seen0 = 1;
216 fra->ipfr_off = off + (fin->fin_dlen >> 3);
217 ATOMIC_INCL(ipfr_stats.ifs_new);
218 ATOMIC_INC32(ipfr_inuse);
219 return fra;
220}
221
222
223int ipfr_newfrag(ip, fin, pass)
224ip_t *ip;
225fr_info_t *fin;
226u_int pass;
227{
228 ipfr_t *ipf;
229
230 if ((ip->ip_v != 4) || (fr_frag_lock))
231 return -1;
232 WRITE_ENTER(&ipf_frag);
233 ipf = ipfr_new(ip, fin, pass, ipfr_heads);
234 RWLOCK_EXIT(&ipf_frag);
235 if (ipf == NULL) {
236 ATOMIC_INCL(frstats[fin->fin_out].fr_bnfr);
237 return -1;
238 }
239 ATOMIC_INCL(frstats[fin->fin_out].fr_nfr);
240 return 0;
241}
242
243
244int ipfr_nat_newfrag(ip, fin, pass, nat)
245ip_t *ip;
246fr_info_t *fin;
247u_int pass;
248nat_t *nat;
249{
250 ipfr_t *ipf;
251 int off;
252
253 if ((ip->ip_v != 4) || (fr_frag_lock))
254 return -1;
255
256 off = fin->fin_off;
257 off <<= 3;
258 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
259 return NULL;
260
261 WRITE_ENTER(&ipf_natfrag);
262 ipf = ipfr_new(ip, fin, pass, ipfr_nattab);
263 if (ipf != NULL) {
264 ipf->ipfr_data = nat;
265 nat->nat_data = ipf;
266 }
267 RWLOCK_EXIT(&ipf_natfrag);
268 return ipf ? 0 : -1;
269}
270
271
272/*
273 * check the fragment cache to see if there is already a record of this packet
274 * with its filter result known.
275 */
276static ipfr_t *ipfr_lookup(ip, fin, table)
277ip_t *ip;
278fr_info_t *fin;
279ipfr_t *table[];
280{
281 ipfr_t *f, frag;
282 u_int idx;
283
284 /*
285 * For fragments, we record protocol, packet id, TOS and both IP#'s
286 * (these should all be the same for all fragments of a packet).
287 *
288 * build up a hash value to index the table with.
289 */
290 frag.ipfr_p = ip->ip_p;
291 idx = ip->ip_p;
292 frag.ipfr_id = ip->ip_id;
293 idx += ip->ip_id;
294 frag.ipfr_tos = ip->ip_tos;
295 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
296 idx += ip->ip_src.s_addr;
297 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
298 idx += ip->ip_dst.s_addr;
299 frag.ipfr_ifp = fin->fin_ifp;
300 idx *= 127;
301 idx %= IPFT_SIZE;
302
303 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
304 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
305 frag.ipfr_auth = fin->fin_fi.fi_auth;
306
307 /*
308 * check the table, careful to only compare the right amount of data
309 */
310 for (f = table[idx]; f; f = f->ipfr_next)
311 if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
312 IPFR_CMPSZ)) {
313 u_short atoff, off;
314
315 off = fin->fin_off;
316
317 /*
318 * XXX - We really need to be guarding against the
319 * retransmission of (src,dst,id,offset-range) here
320 * because a fragmented packet is never resent with
321 * the same IP ID#.
322 */
323 if (f->ipfr_seen0) {
324 if (!off || (fin->fin_fl & FI_SHORT))
325 continue;
326 } else if (!off)
327 f->ipfr_seen0 = 1;
328
329 if (f != table[idx]) {
330 /*
331 * move fragment info. to the top of the list
332 * to speed up searches.
333 */
334 if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
335 f->ipfr_next->ipfr_prev = f->ipfr_prev;
336 f->ipfr_next = table[idx];
337 table[idx]->ipfr_prev = f;
338 f->ipfr_prev = NULL;
339 table[idx] = f;
340 }
341 atoff = off + (fin->fin_dlen >> 3);
342 /*
343 * If we've follwed the fragments, and this is the
344 * last (in order), shrink expiration time.
345 */
346 if (off == f->ipfr_off) {
347 if (!(ip->ip_off & IP_MF))
348 f->ipfr_ttl = 1;
349 else
350 f->ipfr_off = atoff;
351 }
352 ATOMIC_INCL(ipfr_stats.ifs_hits);
353 return f;
354 }
355 return NULL;
356}
357
358
359/*
360 * functional interface for NAT lookups of the NAT fragment cache
361 */
362nat_t *ipfr_nat_knownfrag(ip, fin)
363ip_t *ip;
364fr_info_t *fin;
365{
366 ipfr_t *ipf;
367 nat_t *nat;
368 int off;
369
370 if ((fin->fin_v != 4) || (fr_frag_lock))
371 return NULL;
372
373 off = fin->fin_off;
374 off <<= 3;
375 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
376 return NULL;
377
378 READ_ENTER(&ipf_natfrag);
379 ipf = ipfr_lookup(ip, fin, ipfr_nattab);
380 if (ipf != NULL) {
381 nat = ipf->ipfr_data;
382 /*
383 * This is the last fragment for this packet.
384 */
385 if ((ipf->ipfr_ttl == 1) && (nat != NULL)) {
386 nat->nat_data = NULL;
387 ipf->ipfr_data = NULL;
388 }
389 } else
390 nat = NULL;
391 RWLOCK_EXIT(&ipf_natfrag);
392 return nat;
393}
394
395
396/*
397 * functional interface for normal lookups of the fragment cache
398 */
399frentry_t *ipfr_knownfrag(ip, fin)
400ip_t *ip;
401fr_info_t *fin;
402{
403 frentry_t *fr;
404 ipfr_t *fra;
405 int off;
406
407 if ((fin->fin_v != 4) || (fr_frag_lock))
408 return NULL;
409
410 off = fin->fin_off;
411 off <<= 3;
412 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
413 return NULL;
414
415 READ_ENTER(&ipf_frag);
416 fra = ipfr_lookup(ip, fin, ipfr_heads);
417 if (fra != NULL)
418 fr = fra->ipfr_rule;
419 else
420 fr = NULL;
421 RWLOCK_EXIT(&ipf_frag);
422 return fr;
423}
424
425
426/*
427 * forget any references to this external object.
428 */
429void ipfr_forget(nat)
430void *nat;
431{
432 ipfr_t *fr;
433 int idx;
434
435 WRITE_ENTER(&ipf_natfrag);
436 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
437 for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
438 if (fr->ipfr_data == nat)
439 fr->ipfr_data = NULL;
440
441 RWLOCK_EXIT(&ipf_natfrag);
442}
443
444
445static void ipfr_delete(fra)
446ipfr_t *fra;
447{
448 frentry_t *fr;
449
450 fr = fra->ipfr_rule;
451 if (fr != NULL) {
452 ATOMIC_DEC32(fr->fr_ref);
453 if (fr->fr_ref == 0)
454 KFREE(fr);
455 }
456 if (fra->ipfr_prev)
457 fra->ipfr_prev->ipfr_next = fra->ipfr_next;
458 if (fra->ipfr_next)
459 fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
460 KFREE(fra);
461}
462
463
464/*
465 * Free memory in use by fragment state info. kept.
466 */
467void ipfr_unload()
468{
469 ipfr_t **fp, *fra;
470 nat_t *nat;
471 int idx;
472
473 WRITE_ENTER(&ipf_frag);
474 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
475 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
476 *fp = fra->ipfr_next;
477 ipfr_delete(fra);
478 }
479 RWLOCK_EXIT(&ipf_frag);
480
481 WRITE_ENTER(&ipf_nat);
482 WRITE_ENTER(&ipf_natfrag);
483 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
484 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
485 *fp = fra->ipfr_next;
486 nat = fra->ipfr_data;
487 if (nat != NULL) {
488 if (nat->nat_data == fra)
489 nat->nat_data = NULL;
490 }
491 ipfr_delete(fra);
492 }
493 RWLOCK_EXIT(&ipf_natfrag);
494 RWLOCK_EXIT(&ipf_nat);
495}
496
497
497#ifdef _KERNEL
498void ipfr_fragexpire()
499{
500 ipfr_t **fp, *fra;
501 nat_t *nat;
502 int idx;
503#if defined(_KERNEL)
504# if !SOLARIS
505 int s;
506# endif
507#endif
508
509 if (fr_frag_lock)
510 return;
511
512 SPL_NET(s);
513 WRITE_ENTER(&ipf_frag);
514
515 /*
516 * Go through the entire table, looking for entries to expire,
517 * decreasing the ttl by one for each entry. If it reaches 0,
518 * remove it from the chain and free it.
519 */
520 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
521 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
522 --fra->ipfr_ttl;
523 if (fra->ipfr_ttl == 0) {
524 *fp = fra->ipfr_next;
525 ipfr_delete(fra);
526 ATOMIC_INCL(ipfr_stats.ifs_expire);
527 ATOMIC_DEC32(ipfr_inuse);
528 } else
529 fp = &fra->ipfr_next;
530 }
531 RWLOCK_EXIT(&ipf_frag);
532
533 /*
534 * Same again for the NAT table, except that if the structure also
535 * still points to a NAT structure, and the NAT structure points back
536 * at the one to be free'd, NULL the reference from the NAT struct.
537 * NOTE: We need to grab both mutex's early, and in this order so as
538 * to prevent a deadlock if both try to expire at the same time.
539 */
540 WRITE_ENTER(&ipf_nat);
541 WRITE_ENTER(&ipf_natfrag);
542 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
543 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
544 --fra->ipfr_ttl;
545 if (fra->ipfr_ttl == 0) {
546 ATOMIC_INCL(ipfr_stats.ifs_expire);
547 ATOMIC_DEC32(ipfr_inuse);
548 nat = fra->ipfr_data;
549 if (nat != NULL) {
550 if (nat->nat_data == fra)
551 nat->nat_data = NULL;
552 }
553 *fp = fra->ipfr_next;
554 ipfr_delete(fra);
555 } else
556 fp = &fra->ipfr_next;
557 }
558 RWLOCK_EXIT(&ipf_natfrag);
559 RWLOCK_EXIT(&ipf_nat);
560 SPL_X(s);
561}
562
563
564/*
565 * Slowly expire held state for fragments. Timeouts are set * in expectation
566 * of this being called twice per second.
567 */
568#ifdef _KERNEL
569# if (BSD >= 199306) || SOLARIS || defined(__sgi)
570# if defined(SOLARIS2) && (SOLARIS2 < 7)
571void ipfr_slowtimer()
572# else
573void ipfr_slowtimer __P((void *ptr))
574# endif
575# else
576int ipfr_slowtimer()
577# endif
578#else
579void ipfr_slowtimer()
580#endif
581{
582#if defined(_KERNEL) && SOLARIS
583 extern int fr_running;
584
585 if (fr_running <= 0)
586 return;
587#endif
588
589 READ_ENTER(&ipf_solaris);
586#ifdef __sgi
590#if defined(__sgi) && defined(_KERNEL)
591 ipfilter_sgi_intfsync();
592#endif
593
594 ipfr_fragexpire();
595 fr_timeoutstate();
596 ip_natexpire();
597 fr_authexpire();
598#if defined(_KERNEL)
599# if SOLARIS
600 ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
601 RWLOCK_EXIT(&ipf_solaris);
602# else
603# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
604 callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
605# else
606# if (__FreeBSD_version >= 300000)
607 ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2);
608# else
604# if defined(__OpenBSD_)
605 timeout_add(&ipfr_slowtimer_ch, hz/2, ipfr_slowtimer, NULL);
609# if defined(__OpenBSD__)
610 timeout_add(&ipfr_slowtimer_ch, hz/2);
611# else
612 timeout(ipfr_slowtimer, NULL, hz/2);
613# endif
614# endif
615# if (BSD < 199306) && !defined(__sgi)
616 return 0;
617# endif /* FreeBSD */
618# endif /* NetBSD */
619# endif /* SOLARIS */
615}
620#endif /* defined(_KERNEL) */
621}