1/* 2 * Copyright (C) 1993-2001 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 */ 6#if defined(KERNEL) && !defined(_KERNEL) 7# define _KERNEL 8#endif 9
|
10#ifdef __sgi 11# include <sys/ptimers.h> 12#endif |
13#include <sys/errno.h> 14#include <sys/types.h> 15#include <sys/param.h> 16#include <sys/time.h> 17#include <sys/file.h> 18#if !defined(_KERNEL) && !defined(KERNEL) 19# include <stdio.h> 20# include <string.h> 21# include <stdlib.h> 22#endif 23#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) 24# include <sys/filio.h> 25# include <sys/fcntl.h> 26#else 27# include <sys/ioctl.h> 28#endif
|
26#include <sys/uio.h>
|
29#ifndef linux 30# include <sys/protosw.h> 31#endif 32#include <sys/socket.h> 33#if defined(_KERNEL) && !defined(linux) 34# include <sys/systm.h> 35#endif 36#if !defined(__SVR4) && !defined(__svr4__) 37# if defined(_KERNEL) && !defined(__sgi) 38# include <sys/kernel.h> 39# endif 40# ifndef linux 41# include <sys/mbuf.h> 42# endif 43#else 44# include <sys/byteorder.h> 45# ifdef _KERNEL 46# include <sys/dditypes.h> 47# endif 48# include <sys/stream.h> 49# include <sys/kmem.h> 50#endif 51#include <net/if.h> 52#ifdef sun 53# include <net/af.h> 54#endif 55#include <net/route.h> 56#include <netinet/in.h> 57#include <netinet/in_systm.h> 58#include <netinet/ip.h> 59#ifndef linux 60# include <netinet/ip_var.h> 61#endif 62#include <netinet/tcp.h> 63#include <netinet/udp.h> 64#include <netinet/ip_icmp.h> 65#include "netinet/ip_compat.h" 66#include <netinet/tcpip.h> 67#include "netinet/ip_fil.h"
|
66#include "netinet/ip_proxy.h"
|
68#include "netinet/ip_nat.h" 69#include "netinet/ip_frag.h" 70#include "netinet/ip_state.h" 71#include "netinet/ip_auth.h" 72#if (__FreeBSD_version >= 300000) 73# include <sys/malloc.h> 74# if (defined(KERNEL) || defined(_KERNEL)) 75# ifndef IPFILTER_LKM 76# include <sys/libkern.h> 77# include <sys/systm.h> 78# endif 79extern struct callout_handle ipfr_slowtimer_ch; 80# endif 81#endif 82#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) 83# include <sys/callout.h> 84extern struct callout ipfr_slowtimer_ch; 85#endif 86#if defined(__OpenBSD__) 87# include <sys/timeout.h> 88extern struct timeout ipfr_slowtimer_ch; 89#endif 90 91#if !defined(lint) 92static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed";
|
92static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_frag.c 89336 2002-01-14 09:07:15Z alfred $";
|
93static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_frag.c 92685 2002-03-19 11:44:16Z darrenr $"; |
94#endif 95 96 97static ipfr_t *ipfr_heads[IPFT_SIZE]; 98static ipfr_t *ipfr_nattab[IPFT_SIZE]; 99static ipfrstat_t ipfr_stats; 100static int ipfr_inuse = 0; 101 102int fr_ipfrttl = 120; /* 60 seconds */ 103int fr_frag_lock = 0; 104 105#ifdef _KERNEL 106# if SOLARIS2 >= 7 107extern timeout_id_t ipfr_timer_id; 108# else 109extern int ipfr_timer_id; 110# endif 111#endif 112#if (SOLARIS || defined(__sgi)) && defined(_KERNEL) 113extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex; 114# if SOLARIS 115extern KRWLOCK_T ipf_solaris; 116# else 117KRWLOCK_T ipf_solaris; 118# endif 119extern kmutex_t ipf_rw; 120#endif 121 122 123static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, u_int, ipfr_t **)); 124static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **)); 125static void ipfr_delete __P((ipfr_t *)); 126 127 128ipfrstat_t *ipfr_fragstats() 129{ 130 ipfr_stats.ifs_table = ipfr_heads; 131 ipfr_stats.ifs_nattab = ipfr_nattab; 132 ipfr_stats.ifs_inuse = ipfr_inuse; 133 return &ipfr_stats; 134} 135 136 137/* 138 * add a new entry to the fragment cache, registering it as having come 139 * through this box, with the result of the filter operation. 140 */ 141static ipfr_t *ipfr_new(ip, fin, pass, table) 142ip_t *ip; 143fr_info_t *fin; 144u_int pass; 145ipfr_t *table[]; 146{ 147 ipfr_t **fp, *fra, frag; 148 u_int idx, off; 149 150 if (ipfr_inuse >= IPFT_SIZE) 151 return NULL; 152 153 if (!(fin->fin_fl & FI_FRAG)) 154 return NULL; 155 156 frag.ipfr_p = ip->ip_p; 157 idx = ip->ip_p; 158 frag.ipfr_id = ip->ip_id; 159 idx += ip->ip_id; 160 frag.ipfr_tos = ip->ip_tos; 161 frag.ipfr_src.s_addr = ip->ip_src.s_addr; 162 idx += ip->ip_src.s_addr; 163 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr; 164 idx += ip->ip_dst.s_addr; 165 frag.ipfr_ifp = fin->fin_ifp; 166 idx *= 127; 167 idx %= IPFT_SIZE; 168 169 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY; 170 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk; 171 frag.ipfr_auth = fin->fin_fi.fi_auth; 172 173 /* 174 * first, make sure it isn't already there... 175 */ 176 for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next) 177 if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, 178 IPFR_CMPSZ)) { 179 ATOMIC_INCL(ipfr_stats.ifs_exists); 180 return NULL; 181 } 182 183 /* 184 * allocate some memory, if possible, if not, just record that we 185 * failed to do so. 186 */ 187 KMALLOC(fra, ipfr_t *); 188 if (fra == NULL) { 189 ATOMIC_INCL(ipfr_stats.ifs_nomem); 190 return NULL; 191 } 192 193 if ((fra->ipfr_rule = fin->fin_fr) != NULL) { 194 ATOMIC_INC32(fin->fin_fr->fr_ref); 195 } 196 197 198 /* 199 * Instert the fragment into the fragment table, copy the struct used 200 * in the search using bcopy rather than reassign each field. 201 * Set the ttl to the default and mask out logging from "pass" 202 */ 203 if ((fra->ipfr_next = table[idx])) 204 table[idx]->ipfr_prev = fra; 205 fra->ipfr_prev = NULL; 206 fra->ipfr_data = NULL; 207 table[idx] = fra; 208 bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ); 209 fra->ipfr_ttl = fr_ipfrttl; 210 /* 211 * Compute the offset of the expected start of the next packet. 212 */ 213 off = ip->ip_off & IP_OFFMASK; 214 if (!off) 215 fra->ipfr_seen0 = 1; 216 fra->ipfr_off = off + (fin->fin_dlen >> 3); 217 ATOMIC_INCL(ipfr_stats.ifs_new); 218 ATOMIC_INC32(ipfr_inuse); 219 return fra; 220} 221 222 223int ipfr_newfrag(ip, fin, pass) 224ip_t *ip; 225fr_info_t *fin; 226u_int pass; 227{ 228 ipfr_t *ipf; 229 230 if ((ip->ip_v != 4) || (fr_frag_lock)) 231 return -1; 232 WRITE_ENTER(&ipf_frag); 233 ipf = ipfr_new(ip, fin, pass, ipfr_heads); 234 RWLOCK_EXIT(&ipf_frag); 235 if (ipf == NULL) { 236 ATOMIC_INCL(frstats[fin->fin_out].fr_bnfr); 237 return -1; 238 } 239 ATOMIC_INCL(frstats[fin->fin_out].fr_nfr); 240 return 0; 241} 242 243 244int ipfr_nat_newfrag(ip, fin, pass, nat) 245ip_t *ip; 246fr_info_t *fin; 247u_int pass; 248nat_t *nat; 249{ 250 ipfr_t *ipf; 251 int off; 252 253 if ((ip->ip_v != 4) || (fr_frag_lock)) 254 return -1; 255 256 off = fin->fin_off; 257 off <<= 3; 258 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0)) 259 return NULL; 260 261 WRITE_ENTER(&ipf_natfrag); 262 ipf = ipfr_new(ip, fin, pass, ipfr_nattab); 263 if (ipf != NULL) { 264 ipf->ipfr_data = nat; 265 nat->nat_data = ipf; 266 } 267 RWLOCK_EXIT(&ipf_natfrag); 268 return ipf ? 0 : -1; 269} 270 271 272/* 273 * check the fragment cache to see if there is already a record of this packet 274 * with its filter result known. 275 */ 276static ipfr_t *ipfr_lookup(ip, fin, table) 277ip_t *ip; 278fr_info_t *fin; 279ipfr_t *table[]; 280{ 281 ipfr_t *f, frag; 282 u_int idx; 283 284 /* 285 * For fragments, we record protocol, packet id, TOS and both IP#'s 286 * (these should all be the same for all fragments of a packet). 287 * 288 * build up a hash value to index the table with. 289 */ 290 frag.ipfr_p = ip->ip_p; 291 idx = ip->ip_p; 292 frag.ipfr_id = ip->ip_id; 293 idx += ip->ip_id; 294 frag.ipfr_tos = ip->ip_tos; 295 frag.ipfr_src.s_addr = ip->ip_src.s_addr; 296 idx += ip->ip_src.s_addr; 297 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr; 298 idx += ip->ip_dst.s_addr; 299 frag.ipfr_ifp = fin->fin_ifp; 300 idx *= 127; 301 idx %= IPFT_SIZE; 302 303 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY; 304 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk; 305 frag.ipfr_auth = fin->fin_fi.fi_auth; 306 307 /* 308 * check the table, careful to only compare the right amount of data 309 */ 310 for (f = table[idx]; f; f = f->ipfr_next) 311 if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src, 312 IPFR_CMPSZ)) { 313 u_short atoff, off; 314 315 off = fin->fin_off; 316 317 /* 318 * XXX - We really need to be guarding against the 319 * retransmission of (src,dst,id,offset-range) here 320 * because a fragmented packet is never resent with 321 * the same IP ID#. 322 */ 323 if (f->ipfr_seen0) { 324 if (!off || (fin->fin_fl & FI_SHORT)) 325 continue; 326 } else if (!off) 327 f->ipfr_seen0 = 1; 328 329 if (f != table[idx]) { 330 /* 331 * move fragment info. to the top of the list 332 * to speed up searches. 333 */ 334 if ((f->ipfr_prev->ipfr_next = f->ipfr_next)) 335 f->ipfr_next->ipfr_prev = f->ipfr_prev; 336 f->ipfr_next = table[idx]; 337 table[idx]->ipfr_prev = f; 338 f->ipfr_prev = NULL; 339 table[idx] = f; 340 } 341 atoff = off + (fin->fin_dlen >> 3); 342 /* 343 * If we've follwed the fragments, and this is the 344 * last (in order), shrink expiration time. 345 */ 346 if (off == f->ipfr_off) { 347 if (!(ip->ip_off & IP_MF)) 348 f->ipfr_ttl = 1; 349 else 350 f->ipfr_off = atoff; 351 } 352 ATOMIC_INCL(ipfr_stats.ifs_hits); 353 return f; 354 } 355 return NULL; 356} 357 358 359/* 360 * functional interface for NAT lookups of the NAT fragment cache 361 */ 362nat_t *ipfr_nat_knownfrag(ip, fin) 363ip_t *ip; 364fr_info_t *fin; 365{ 366 ipfr_t *ipf; 367 nat_t *nat; 368 int off; 369 370 if ((fin->fin_v != 4) || (fr_frag_lock)) 371 return NULL; 372 373 off = fin->fin_off; 374 off <<= 3; 375 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0)) 376 return NULL; 377 378 READ_ENTER(&ipf_natfrag); 379 ipf = ipfr_lookup(ip, fin, ipfr_nattab); 380 if (ipf != NULL) { 381 nat = ipf->ipfr_data; 382 /* 383 * This is the last fragment for this packet. 384 */ 385 if ((ipf->ipfr_ttl == 1) && (nat != NULL)) { 386 nat->nat_data = NULL; 387 ipf->ipfr_data = NULL; 388 } 389 } else 390 nat = NULL; 391 RWLOCK_EXIT(&ipf_natfrag); 392 return nat; 393} 394 395 396/* 397 * functional interface for normal lookups of the fragment cache 398 */ 399frentry_t *ipfr_knownfrag(ip, fin) 400ip_t *ip; 401fr_info_t *fin; 402{ 403 frentry_t *fr; 404 ipfr_t *fra; 405 int off; 406 407 if ((fin->fin_v != 4) || (fr_frag_lock)) 408 return NULL; 409 410 off = fin->fin_off; 411 off <<= 3; 412 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0)) 413 return NULL; 414 415 READ_ENTER(&ipf_frag); 416 fra = ipfr_lookup(ip, fin, ipfr_heads); 417 if (fra != NULL) 418 fr = fra->ipfr_rule; 419 else 420 fr = NULL; 421 RWLOCK_EXIT(&ipf_frag); 422 return fr; 423} 424 425 426/* 427 * forget any references to this external object. 428 */ 429void ipfr_forget(nat) 430void *nat; 431{ 432 ipfr_t *fr; 433 int idx; 434 435 WRITE_ENTER(&ipf_natfrag); 436 for (idx = IPFT_SIZE - 1; idx >= 0; idx--) 437 for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next) 438 if (fr->ipfr_data == nat) 439 fr->ipfr_data = NULL; 440 441 RWLOCK_EXIT(&ipf_natfrag); 442} 443 444 445static void ipfr_delete(fra) 446ipfr_t *fra; 447{ 448 frentry_t *fr; 449 450 fr = fra->ipfr_rule; 451 if (fr != NULL) { 452 ATOMIC_DEC32(fr->fr_ref); 453 if (fr->fr_ref == 0) 454 KFREE(fr); 455 } 456 if (fra->ipfr_prev) 457 fra->ipfr_prev->ipfr_next = fra->ipfr_next; 458 if (fra->ipfr_next) 459 fra->ipfr_next->ipfr_prev = fra->ipfr_prev; 460 KFREE(fra); 461} 462 463 464/* 465 * Free memory in use by fragment state info. kept. 466 */ 467void ipfr_unload() 468{ 469 ipfr_t **fp, *fra; 470 nat_t *nat; 471 int idx; 472 473 WRITE_ENTER(&ipf_frag); 474 for (idx = IPFT_SIZE - 1; idx >= 0; idx--) 475 for (fp = &ipfr_heads[idx]; (fra = *fp); ) { 476 *fp = fra->ipfr_next; 477 ipfr_delete(fra); 478 } 479 RWLOCK_EXIT(&ipf_frag); 480 481 WRITE_ENTER(&ipf_nat); 482 WRITE_ENTER(&ipf_natfrag); 483 for (idx = IPFT_SIZE - 1; idx >= 0; idx--) 484 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { 485 *fp = fra->ipfr_next; 486 nat = fra->ipfr_data; 487 if (nat != NULL) { 488 if (nat->nat_data == fra) 489 nat->nat_data = NULL; 490 } 491 ipfr_delete(fra); 492 } 493 RWLOCK_EXIT(&ipf_natfrag); 494 RWLOCK_EXIT(&ipf_nat); 495} 496 497
|
497#ifdef _KERNEL
|
498void ipfr_fragexpire() 499{ 500 ipfr_t **fp, *fra; 501 nat_t *nat; 502 int idx; 503#if defined(_KERNEL) 504# if !SOLARIS 505 int s; 506# endif 507#endif 508 509 if (fr_frag_lock) 510 return; 511 512 SPL_NET(s); 513 WRITE_ENTER(&ipf_frag); 514 515 /* 516 * Go through the entire table, looking for entries to expire, 517 * decreasing the ttl by one for each entry. If it reaches 0, 518 * remove it from the chain and free it. 519 */ 520 for (idx = IPFT_SIZE - 1; idx >= 0; idx--) 521 for (fp = &ipfr_heads[idx]; (fra = *fp); ) { 522 --fra->ipfr_ttl; 523 if (fra->ipfr_ttl == 0) { 524 *fp = fra->ipfr_next; 525 ipfr_delete(fra); 526 ATOMIC_INCL(ipfr_stats.ifs_expire); 527 ATOMIC_DEC32(ipfr_inuse); 528 } else 529 fp = &fra->ipfr_next; 530 } 531 RWLOCK_EXIT(&ipf_frag); 532 533 /* 534 * Same again for the NAT table, except that if the structure also 535 * still points to a NAT structure, and the NAT structure points back 536 * at the one to be free'd, NULL the reference from the NAT struct. 537 * NOTE: We need to grab both mutex's early, and in this order so as 538 * to prevent a deadlock if both try to expire at the same time. 539 */ 540 WRITE_ENTER(&ipf_nat); 541 WRITE_ENTER(&ipf_natfrag); 542 for (idx = IPFT_SIZE - 1; idx >= 0; idx--) 543 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { 544 --fra->ipfr_ttl; 545 if (fra->ipfr_ttl == 0) { 546 ATOMIC_INCL(ipfr_stats.ifs_expire); 547 ATOMIC_DEC32(ipfr_inuse); 548 nat = fra->ipfr_data; 549 if (nat != NULL) { 550 if (nat->nat_data == fra) 551 nat->nat_data = NULL; 552 } 553 *fp = fra->ipfr_next; 554 ipfr_delete(fra); 555 } else 556 fp = &fra->ipfr_next; 557 } 558 RWLOCK_EXIT(&ipf_natfrag); 559 RWLOCK_EXIT(&ipf_nat); 560 SPL_X(s); 561} 562 563 564/* 565 * Slowly expire held state for fragments. Timeouts are set * in expectation 566 * of this being called twice per second. 567 */
|
568#ifdef _KERNEL |
569# if (BSD >= 199306) || SOLARIS || defined(__sgi) 570# if defined(SOLARIS2) && (SOLARIS2 < 7) 571void ipfr_slowtimer() 572# else 573void ipfr_slowtimer __P((void *ptr)) 574# endif 575# else 576int ipfr_slowtimer() 577# endif
|
578#else 579void ipfr_slowtimer() 580#endif |
581{ 582#if defined(_KERNEL) && SOLARIS 583 extern int fr_running; 584 585 if (fr_running <= 0) 586 return; 587#endif 588 589 READ_ENTER(&ipf_solaris);
|
586#ifdef __sgi
|
590#if defined(__sgi) && defined(_KERNEL) |
591 ipfilter_sgi_intfsync(); 592#endif 593 594 ipfr_fragexpire(); 595 fr_timeoutstate(); 596 ip_natexpire(); 597 fr_authexpire();
|
598#if defined(_KERNEL) |
599# if SOLARIS 600 ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000)); 601 RWLOCK_EXIT(&ipf_solaris); 602# else 603# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000) 604 callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL); 605# else 606# if (__FreeBSD_version >= 300000) 607 ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2); 608# else
|
604# if defined(__OpenBSD_)
605 timeout_add(&ipfr_slowtimer_ch, hz/2, ipfr_slowtimer, NULL);
|
609# if defined(__OpenBSD__) 610 timeout_add(&ipfr_slowtimer_ch, hz/2); |
611# else 612 timeout(ipfr_slowtimer, NULL, hz/2); 613# endif 614# endif 615# if (BSD < 199306) && !defined(__sgi) 616 return 0; 617# endif /* FreeBSD */ 618# endif /* NetBSD */ 619# endif /* SOLARIS */
|
615}
|
620#endif /* defined(_KERNEL) */
|
621} |
|