Deleted Added
sdiff udiff text old ( 89336 ) new ( 92685 )
full compact
1/*
2 * Copyright (C) 1993-2001 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 */
6#if defined(KERNEL) && !defined(_KERNEL)
7# define _KERNEL
8#endif
9
10#include <sys/errno.h>
11#include <sys/types.h>
12#include <sys/param.h>
13#include <sys/time.h>
14#include <sys/file.h>
15#if !defined(_KERNEL) && !defined(KERNEL)
16# include <stdio.h>
17# include <string.h>
18# include <stdlib.h>
19#endif
20#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
21# include <sys/filio.h>
22# include <sys/fcntl.h>
23#else
24# include <sys/ioctl.h>
25#endif
26#include <sys/uio.h>
27#ifndef linux
28# include <sys/protosw.h>
29#endif
30#include <sys/socket.h>
31#if defined(_KERNEL) && !defined(linux)
32# include <sys/systm.h>
33#endif
34#if !defined(__SVR4) && !defined(__svr4__)
35# if defined(_KERNEL) && !defined(__sgi)
36# include <sys/kernel.h>
37# endif
38# ifndef linux
39# include <sys/mbuf.h>
40# endif
41#else
42# include <sys/byteorder.h>
43# ifdef _KERNEL
44# include <sys/dditypes.h>
45# endif
46# include <sys/stream.h>
47# include <sys/kmem.h>
48#endif
49#include <net/if.h>
50#ifdef sun
51# include <net/af.h>
52#endif
53#include <net/route.h>
54#include <netinet/in.h>
55#include <netinet/in_systm.h>
56#include <netinet/ip.h>
57#ifndef linux
58# include <netinet/ip_var.h>
59#endif
60#include <netinet/tcp.h>
61#include <netinet/udp.h>
62#include <netinet/ip_icmp.h>
63#include "netinet/ip_compat.h"
64#include <netinet/tcpip.h>
65#include "netinet/ip_fil.h"
66#include "netinet/ip_proxy.h"
67#include "netinet/ip_nat.h"
68#include "netinet/ip_frag.h"
69#include "netinet/ip_state.h"
70#include "netinet/ip_auth.h"
71#if (__FreeBSD_version >= 300000)
72# include <sys/malloc.h>
73# if (defined(KERNEL) || defined(_KERNEL))
74# ifndef IPFILTER_LKM
75# include <sys/libkern.h>
76# include <sys/systm.h>
77# endif
78extern struct callout_handle ipfr_slowtimer_ch;
79# endif
80#endif
81#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
82# include <sys/callout.h>
83extern struct callout ipfr_slowtimer_ch;
84#endif
85#if defined(__OpenBSD__)
86# include <sys/timeout.h>
87extern struct timeout ipfr_slowtimer_ch;
88#endif
89
90#if !defined(lint)
91static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed";
92static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_frag.c 89336 2002-01-14 09:07:15Z alfred $";
93#endif
94
95
96static ipfr_t *ipfr_heads[IPFT_SIZE];
97static ipfr_t *ipfr_nattab[IPFT_SIZE];
98static ipfrstat_t ipfr_stats;
99static int ipfr_inuse = 0;
100
101int fr_ipfrttl = 120; /* 60 seconds */
102int fr_frag_lock = 0;
103
104#ifdef _KERNEL
105# if SOLARIS2 >= 7
106extern timeout_id_t ipfr_timer_id;
107# else
108extern int ipfr_timer_id;
109# endif
110#endif
111#if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
112extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex;
113# if SOLARIS
114extern KRWLOCK_T ipf_solaris;
115# else
116KRWLOCK_T ipf_solaris;
117# endif
118extern kmutex_t ipf_rw;
119#endif
120
121
122static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, u_int, ipfr_t **));
123static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **));
124static void ipfr_delete __P((ipfr_t *));
125
126
127ipfrstat_t *ipfr_fragstats()
128{
129 ipfr_stats.ifs_table = ipfr_heads;
130 ipfr_stats.ifs_nattab = ipfr_nattab;
131 ipfr_stats.ifs_inuse = ipfr_inuse;
132 return &ipfr_stats;
133}
134
135
136/*
137 * add a new entry to the fragment cache, registering it as having come
138 * through this box, with the result of the filter operation.
139 */
140static ipfr_t *ipfr_new(ip, fin, pass, table)
141ip_t *ip;
142fr_info_t *fin;
143u_int pass;
144ipfr_t *table[];
145{
146 ipfr_t **fp, *fra, frag;
147 u_int idx, off;
148
149 if (ipfr_inuse >= IPFT_SIZE)
150 return NULL;
151
152 if (!(fin->fin_fl & FI_FRAG))
153 return NULL;
154
155 frag.ipfr_p = ip->ip_p;
156 idx = ip->ip_p;
157 frag.ipfr_id = ip->ip_id;
158 idx += ip->ip_id;
159 frag.ipfr_tos = ip->ip_tos;
160 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
161 idx += ip->ip_src.s_addr;
162 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
163 idx += ip->ip_dst.s_addr;
164 frag.ipfr_ifp = fin->fin_ifp;
165 idx *= 127;
166 idx %= IPFT_SIZE;
167
168 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
169 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
170 frag.ipfr_auth = fin->fin_fi.fi_auth;
171
172 /*
173 * first, make sure it isn't already there...
174 */
175 for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next)
176 if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src,
177 IPFR_CMPSZ)) {
178 ATOMIC_INCL(ipfr_stats.ifs_exists);
179 return NULL;
180 }
181
182 /*
183 * allocate some memory, if possible, if not, just record that we
184 * failed to do so.
185 */
186 KMALLOC(fra, ipfr_t *);
187 if (fra == NULL) {
188 ATOMIC_INCL(ipfr_stats.ifs_nomem);
189 return NULL;
190 }
191
192 if ((fra->ipfr_rule = fin->fin_fr) != NULL) {
193 ATOMIC_INC32(fin->fin_fr->fr_ref);
194 }
195
196
197 /*
198 * Instert the fragment into the fragment table, copy the struct used
199 * in the search using bcopy rather than reassign each field.
200 * Set the ttl to the default and mask out logging from "pass"
201 */
202 if ((fra->ipfr_next = table[idx]))
203 table[idx]->ipfr_prev = fra;
204 fra->ipfr_prev = NULL;
205 fra->ipfr_data = NULL;
206 table[idx] = fra;
207 bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ);
208 fra->ipfr_ttl = fr_ipfrttl;
209 /*
210 * Compute the offset of the expected start of the next packet.
211 */
212 off = ip->ip_off & IP_OFFMASK;
213 if (!off)
214 fra->ipfr_seen0 = 1;
215 fra->ipfr_off = off + (fin->fin_dlen >> 3);
216 ATOMIC_INCL(ipfr_stats.ifs_new);
217 ATOMIC_INC32(ipfr_inuse);
218 return fra;
219}
220
221
222int ipfr_newfrag(ip, fin, pass)
223ip_t *ip;
224fr_info_t *fin;
225u_int pass;
226{
227 ipfr_t *ipf;
228
229 if ((ip->ip_v != 4) || (fr_frag_lock))
230 return -1;
231 WRITE_ENTER(&ipf_frag);
232 ipf = ipfr_new(ip, fin, pass, ipfr_heads);
233 RWLOCK_EXIT(&ipf_frag);
234 if (ipf == NULL) {
235 ATOMIC_INCL(frstats[fin->fin_out].fr_bnfr);
236 return -1;
237 }
238 ATOMIC_INCL(frstats[fin->fin_out].fr_nfr);
239 return 0;
240}
241
242
243int ipfr_nat_newfrag(ip, fin, pass, nat)
244ip_t *ip;
245fr_info_t *fin;
246u_int pass;
247nat_t *nat;
248{
249 ipfr_t *ipf;
250 int off;
251
252 if ((ip->ip_v != 4) || (fr_frag_lock))
253 return -1;
254
255 off = fin->fin_off;
256 off <<= 3;
257 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
258 return NULL;
259
260 WRITE_ENTER(&ipf_natfrag);
261 ipf = ipfr_new(ip, fin, pass, ipfr_nattab);
262 if (ipf != NULL) {
263 ipf->ipfr_data = nat;
264 nat->nat_data = ipf;
265 }
266 RWLOCK_EXIT(&ipf_natfrag);
267 return ipf ? 0 : -1;
268}
269
270
271/*
272 * check the fragment cache to see if there is already a record of this packet
273 * with its filter result known.
274 */
275static ipfr_t *ipfr_lookup(ip, fin, table)
276ip_t *ip;
277fr_info_t *fin;
278ipfr_t *table[];
279{
280 ipfr_t *f, frag;
281 u_int idx;
282
283 /*
284 * For fragments, we record protocol, packet id, TOS and both IP#'s
285 * (these should all be the same for all fragments of a packet).
286 *
287 * build up a hash value to index the table with.
288 */
289 frag.ipfr_p = ip->ip_p;
290 idx = ip->ip_p;
291 frag.ipfr_id = ip->ip_id;
292 idx += ip->ip_id;
293 frag.ipfr_tos = ip->ip_tos;
294 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
295 idx += ip->ip_src.s_addr;
296 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
297 idx += ip->ip_dst.s_addr;
298 frag.ipfr_ifp = fin->fin_ifp;
299 idx *= 127;
300 idx %= IPFT_SIZE;
301
302 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
303 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
304 frag.ipfr_auth = fin->fin_fi.fi_auth;
305
306 /*
307 * check the table, careful to only compare the right amount of data
308 */
309 for (f = table[idx]; f; f = f->ipfr_next)
310 if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
311 IPFR_CMPSZ)) {
312 u_short atoff, off;
313
314 off = fin->fin_off;
315
316 /*
317 * XXX - We really need to be guarding against the
318 * retransmission of (src,dst,id,offset-range) here
319 * because a fragmented packet is never resent with
320 * the same IP ID#.
321 */
322 if (f->ipfr_seen0) {
323 if (!off || (fin->fin_fl & FI_SHORT))
324 continue;
325 } else if (!off)
326 f->ipfr_seen0 = 1;
327
328 if (f != table[idx]) {
329 /*
330 * move fragment info. to the top of the list
331 * to speed up searches.
332 */
333 if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
334 f->ipfr_next->ipfr_prev = f->ipfr_prev;
335 f->ipfr_next = table[idx];
336 table[idx]->ipfr_prev = f;
337 f->ipfr_prev = NULL;
338 table[idx] = f;
339 }
340 atoff = off + (fin->fin_dlen >> 3);
341 /*
342 * If we've follwed the fragments, and this is the
343 * last (in order), shrink expiration time.
344 */
345 if (off == f->ipfr_off) {
346 if (!(ip->ip_off & IP_MF))
347 f->ipfr_ttl = 1;
348 else
349 f->ipfr_off = atoff;
350 }
351 ATOMIC_INCL(ipfr_stats.ifs_hits);
352 return f;
353 }
354 return NULL;
355}
356
357
358/*
359 * functional interface for NAT lookups of the NAT fragment cache
360 */
361nat_t *ipfr_nat_knownfrag(ip, fin)
362ip_t *ip;
363fr_info_t *fin;
364{
365 ipfr_t *ipf;
366 nat_t *nat;
367 int off;
368
369 if ((fin->fin_v != 4) || (fr_frag_lock))
370 return NULL;
371
372 off = fin->fin_off;
373 off <<= 3;
374 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
375 return NULL;
376
377 READ_ENTER(&ipf_natfrag);
378 ipf = ipfr_lookup(ip, fin, ipfr_nattab);
379 if (ipf != NULL) {
380 nat = ipf->ipfr_data;
381 /*
382 * This is the last fragment for this packet.
383 */
384 if ((ipf->ipfr_ttl == 1) && (nat != NULL)) {
385 nat->nat_data = NULL;
386 ipf->ipfr_data = NULL;
387 }
388 } else
389 nat = NULL;
390 RWLOCK_EXIT(&ipf_natfrag);
391 return nat;
392}
393
394
395/*
396 * functional interface for normal lookups of the fragment cache
397 */
398frentry_t *ipfr_knownfrag(ip, fin)
399ip_t *ip;
400fr_info_t *fin;
401{
402 frentry_t *fr;
403 ipfr_t *fra;
404 int off;
405
406 if ((fin->fin_v != 4) || (fr_frag_lock))
407 return NULL;
408
409 off = fin->fin_off;
410 off <<= 3;
411 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
412 return NULL;
413
414 READ_ENTER(&ipf_frag);
415 fra = ipfr_lookup(ip, fin, ipfr_heads);
416 if (fra != NULL)
417 fr = fra->ipfr_rule;
418 else
419 fr = NULL;
420 RWLOCK_EXIT(&ipf_frag);
421 return fr;
422}
423
424
425/*
426 * forget any references to this external object.
427 */
428void ipfr_forget(nat)
429void *nat;
430{
431 ipfr_t *fr;
432 int idx;
433
434 WRITE_ENTER(&ipf_natfrag);
435 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
436 for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
437 if (fr->ipfr_data == nat)
438 fr->ipfr_data = NULL;
439
440 RWLOCK_EXIT(&ipf_natfrag);
441}
442
443
444static void ipfr_delete(fra)
445ipfr_t *fra;
446{
447 frentry_t *fr;
448
449 fr = fra->ipfr_rule;
450 if (fr != NULL) {
451 ATOMIC_DEC32(fr->fr_ref);
452 if (fr->fr_ref == 0)
453 KFREE(fr);
454 }
455 if (fra->ipfr_prev)
456 fra->ipfr_prev->ipfr_next = fra->ipfr_next;
457 if (fra->ipfr_next)
458 fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
459 KFREE(fra);
460}
461
462
463/*
464 * Free memory in use by fragment state info. kept.
465 */
466void ipfr_unload()
467{
468 ipfr_t **fp, *fra;
469 nat_t *nat;
470 int idx;
471
472 WRITE_ENTER(&ipf_frag);
473 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
474 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
475 *fp = fra->ipfr_next;
476 ipfr_delete(fra);
477 }
478 RWLOCK_EXIT(&ipf_frag);
479
480 WRITE_ENTER(&ipf_nat);
481 WRITE_ENTER(&ipf_natfrag);
482 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
483 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
484 *fp = fra->ipfr_next;
485 nat = fra->ipfr_data;
486 if (nat != NULL) {
487 if (nat->nat_data == fra)
488 nat->nat_data = NULL;
489 }
490 ipfr_delete(fra);
491 }
492 RWLOCK_EXIT(&ipf_natfrag);
493 RWLOCK_EXIT(&ipf_nat);
494}
495
496
497#ifdef _KERNEL
498void ipfr_fragexpire()
499{
500 ipfr_t **fp, *fra;
501 nat_t *nat;
502 int idx;
503#if defined(_KERNEL)
504# if !SOLARIS
505 int s;
506# endif
507#endif
508
509 if (fr_frag_lock)
510 return;
511
512 SPL_NET(s);
513 WRITE_ENTER(&ipf_frag);
514
515 /*
516 * Go through the entire table, looking for entries to expire,
517 * decreasing the ttl by one for each entry. If it reaches 0,
518 * remove it from the chain and free it.
519 */
520 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
521 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
522 --fra->ipfr_ttl;
523 if (fra->ipfr_ttl == 0) {
524 *fp = fra->ipfr_next;
525 ipfr_delete(fra);
526 ATOMIC_INCL(ipfr_stats.ifs_expire);
527 ATOMIC_DEC32(ipfr_inuse);
528 } else
529 fp = &fra->ipfr_next;
530 }
531 RWLOCK_EXIT(&ipf_frag);
532
533 /*
534 * Same again for the NAT table, except that if the structure also
535 * still points to a NAT structure, and the NAT structure points back
536 * at the one to be free'd, NULL the reference from the NAT struct.
537 * NOTE: We need to grab both mutex's early, and in this order so as
538 * to prevent a deadlock if both try to expire at the same time.
539 */
540 WRITE_ENTER(&ipf_nat);
541 WRITE_ENTER(&ipf_natfrag);
542 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
543 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
544 --fra->ipfr_ttl;
545 if (fra->ipfr_ttl == 0) {
546 ATOMIC_INCL(ipfr_stats.ifs_expire);
547 ATOMIC_DEC32(ipfr_inuse);
548 nat = fra->ipfr_data;
549 if (nat != NULL) {
550 if (nat->nat_data == fra)
551 nat->nat_data = NULL;
552 }
553 *fp = fra->ipfr_next;
554 ipfr_delete(fra);
555 } else
556 fp = &fra->ipfr_next;
557 }
558 RWLOCK_EXIT(&ipf_natfrag);
559 RWLOCK_EXIT(&ipf_nat);
560 SPL_X(s);
561}
562
563
564/*
565 * Slowly expire held state for fragments. Timeouts are set * in expectation
566 * of this being called twice per second.
567 */
568# if (BSD >= 199306) || SOLARIS || defined(__sgi)
569# if defined(SOLARIS2) && (SOLARIS2 < 7)
570void ipfr_slowtimer()
571# else
572void ipfr_slowtimer __P((void *ptr))
573# endif
574# else
575int ipfr_slowtimer()
576# endif
577{
578#if defined(_KERNEL) && SOLARIS
579 extern int fr_running;
580
581 if (fr_running <= 0)
582 return;
583#endif
584
585 READ_ENTER(&ipf_solaris);
586#ifdef __sgi
587 ipfilter_sgi_intfsync();
588#endif
589
590 ipfr_fragexpire();
591 fr_timeoutstate();
592 ip_natexpire();
593 fr_authexpire();
594# if SOLARIS
595 ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
596 RWLOCK_EXIT(&ipf_solaris);
597# else
598# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
599 callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
600# else
601# if (__FreeBSD_version >= 300000)
602 ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2);
603# else
604# if defined(__OpenBSD_)
605 timeout_add(&ipfr_slowtimer_ch, hz/2, ipfr_slowtimer, NULL);
606# else
607 timeout(ipfr_slowtimer, NULL, hz/2);
608# endif
609# endif
610# if (BSD < 199306) && !defined(__sgi)
611 return 0;
612# endif /* FreeBSD */
613# endif /* NetBSD */
614# endif /* SOLARIS */
615}
616#endif /* defined(_KERNEL) */