ip_state.c revision 110916
153642Sguido/* 292685Sdarrenr * Copyright (C) 1995-2002 by Darren Reed. 353642Sguido * 480482Sdarrenr * See the IPFILTER.LICENCE file for details on licencing. 553642Sguido */ 653642Sguido 7110916Sdarrenr#if defined(__sgi) && (IRIX > 602) 892685Sdarrenr# include <sys/ptimers.h> 992685Sdarrenr#endif 1053642Sguido#include <sys/errno.h> 1153642Sguido#include <sys/types.h> 1253642Sguido#include <sys/param.h> 1353642Sguido#include <sys/file.h> 1453642Sguido#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 1553642Sguido defined(_KERNEL) 1653642Sguido# include "opt_ipfilter_log.h" 1753642Sguido#endif 1860854Sdarrenr#if defined(_KERNEL) && defined(__FreeBSD_version) && \ 1960854Sdarrenr (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 2060854Sdarrenr#include "opt_inet6.h" 2160854Sdarrenr#endif 2253642Sguido#if !defined(_KERNEL) && !defined(KERNEL) && !defined(__KERNEL__) 2353642Sguido# include <stdio.h> 2453642Sguido# include <stdlib.h> 2553642Sguido# include <string.h> 2653642Sguido#else 2753642Sguido# ifdef linux 2853642Sguido# include <linux/kernel.h> 2953642Sguido# include <linux/module.h> 3053642Sguido# endif 3153642Sguido#endif 3260854Sdarrenr#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) 3353642Sguido# include <sys/filio.h> 3453642Sguido# include <sys/fcntl.h> 3553642Sguido# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 3653642Sguido# include "opt_ipfilter.h" 3753642Sguido# endif 3853642Sguido#else 3953642Sguido# include <sys/ioctl.h> 4053642Sguido#endif 4153642Sguido#include <sys/time.h> 4253642Sguido#ifndef linux 4353642Sguido# include <sys/protosw.h> 4453642Sguido#endif 4553642Sguido#include <sys/socket.h> 4657096Sguido#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux) 4753642Sguido# include <sys/systm.h> 4853642Sguido#endif 4953642Sguido#if !defined(__SVR4) && !defined(__svr4__) 5053642Sguido# ifndef linux 5153642Sguido# include <sys/mbuf.h> 5253642Sguido# endif 5353642Sguido#else 5453642Sguido# include <sys/filio.h> 5553642Sguido# include <sys/byteorder.h> 5653642Sguido# ifdef _KERNEL 5753642Sguido# include <sys/dditypes.h> 5853642Sguido# endif 5953642Sguido# include <sys/stream.h> 6053642Sguido# include <sys/kmem.h> 6153642Sguido#endif 6253642Sguido 6353642Sguido#include <net/if.h> 6453642Sguido#ifdef sun 6553642Sguido# include <net/af.h> 6653642Sguido#endif 6753642Sguido#include <net/route.h> 6853642Sguido#include <netinet/in.h> 6953642Sguido#include <netinet/in_systm.h> 7053642Sguido#include <netinet/ip.h> 7153642Sguido#include <netinet/tcp.h> 7253642Sguido#ifndef linux 7353642Sguido# include <netinet/ip_var.h> 7453642Sguido# include <netinet/tcp_fsm.h> 7553642Sguido#endif 7653642Sguido#include <netinet/udp.h> 7753642Sguido#include <netinet/ip_icmp.h> 7853642Sguido#include "netinet/ip_compat.h" 7953642Sguido#include <netinet/tcpip.h> 8053642Sguido#include "netinet/ip_fil.h" 8153642Sguido#include "netinet/ip_nat.h" 8253642Sguido#include "netinet/ip_frag.h" 8353642Sguido#include "netinet/ip_state.h" 8460854Sdarrenr#ifdef USE_INET6 8560854Sdarrenr#include <netinet/icmp6.h> 8660854Sdarrenr#endif 8753642Sguido#if (__FreeBSD_version >= 300000) 8853642Sguido# include <sys/malloc.h> 8953642Sguido# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM) 9053642Sguido# include <sys/libkern.h> 9153642Sguido# include <sys/systm.h> 9253642Sguido# endif 9353642Sguido#endif 9453642Sguido 9580482Sdarrenr#if !defined(lint) 9680482Sdarrenrstatic const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 9780482Sdarrenr/* static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.30.2.38 2001/07/23 13:49:46 darrenr Exp $"; */ 9880482Sdarrenrstatic const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_state.c 110916 2003-02-15 06:25:25Z darrenr $"; 9980482Sdarrenr#endif 10080482Sdarrenr 10153642Sguido#ifndef MIN 10253642Sguido# define MIN(a,b) (((a)<(b))?(a):(b)) 10353642Sguido#endif 10453642Sguido 10553642Sguido#define TCP_CLOSE (TH_FIN|TH_RST) 10653642Sguido 10760854Sdarrenrstatic ipstate_t **ips_table = NULL; 10860854Sdarrenrstatic int ips_num = 0; 10967614Sdarrenrstatic int ips_wild = 0; 11060854Sdarrenrstatic ips_stat_t ips_stats; 11153642Sguido#if (SOLARIS || defined(__sgi)) && defined(_KERNEL) 11253642Sguidoextern KRWLOCK_T ipf_state, ipf_mutex; 11353642Sguidoextern kmutex_t ipf_rw; 11453642Sguido#endif 11553642Sguido 11660854Sdarrenr#ifdef USE_INET6 11760854Sdarrenrstatic frentry_t *fr_checkicmp6matchingstate __P((ip6_t *, fr_info_t *)); 11860854Sdarrenr#endif 11960854Sdarrenrstatic int fr_matchsrcdst __P((ipstate_t *, union i6addr, union i6addr, 12053642Sguido fr_info_t *, tcphdr_t *)); 12153642Sguidostatic frentry_t *fr_checkicmpmatchingstate __P((ip_t *, fr_info_t *)); 122110916Sdarrenrstatic int fr_matchicmpqueryreply __P((int, ipstate_t *, icmphdr_t *, int)); 123110916Sdarrenrstatic int fr_state_flush __P((int, int)); 12453642Sguidostatic ips_stat_t *fr_statetstats __P((void)); 12553642Sguidostatic void fr_delstate __P((ipstate_t *)); 12660854Sdarrenrstatic int fr_state_remove __P((caddr_t)); 12767614Sdarrenrstatic void fr_ipsmove __P((ipstate_t **, ipstate_t *, u_int)); 12895418Sdarrenrstatic int fr_tcpoptions __P((tcphdr_t *)); 12960854Sdarrenrint fr_stputent __P((caddr_t)); 13060854Sdarrenrint fr_stgetent __P((caddr_t)); 13160854Sdarrenrvoid fr_stinsert __P((ipstate_t *)); 13253642Sguido 13353642Sguido 13453642Sguido#define FIVE_DAYS (2 * 5 * 86400) /* 5 days: half closed session */ 13553642Sguido 13653642Sguido#define TCP_MSL 240 /* 2 minutes */ 13753642Sguidou_long fr_tcpidletimeout = FIVE_DAYS, 13853642Sguido fr_tcpclosewait = 2 * TCP_MSL, 13953642Sguido fr_tcplastack = 2 * TCP_MSL, 14053642Sguido fr_tcptimeout = 2 * TCP_MSL, 14167614Sdarrenr fr_tcpclosed = 120, 14267614Sdarrenr fr_tcphalfclosed = 2 * 2 * 3600, /* 2 hours */ 14353642Sguido fr_udptimeout = 240, 14480482Sdarrenr fr_udpacktimeout = 24, 14580482Sdarrenr fr_icmptimeout = 120, 14680482Sdarrenr fr_icmpacktimeout = 12; 14753642Sguidoint fr_statemax = IPSTATE_MAX, 14853642Sguido fr_statesize = IPSTATE_SIZE; 14960854Sdarrenrint fr_state_doflush = 0, 15060854Sdarrenr fr_state_lock = 0; 15192685Sdarrenripstate_t *ips_list = NULL; 15253642Sguido 15360854Sdarrenrstatic int icmpreplytype4[ICMP_MAXTYPE + 1]; 15492685Sdarrenr#ifdef USE_INET6 15592685Sdarrenrstatic int icmpreplytype6[ICMP6_MAXTYPE + 1]; 15692685Sdarrenr#endif 15753642Sguido 15853642Sguidoint fr_stateinit() 15953642Sguido{ 16060854Sdarrenr int i; 16160854Sdarrenr 16253642Sguido KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *)); 16353642Sguido if (ips_table != NULL) 16453642Sguido bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *)); 16553642Sguido else 16653642Sguido return -1; 16760854Sdarrenr 16860854Sdarrenr /* fill icmp reply type table */ 16960854Sdarrenr for (i = 0; i <= ICMP_MAXTYPE; i++) 17060854Sdarrenr icmpreplytype4[i] = -1; 17160854Sdarrenr icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 17260854Sdarrenr icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 17360854Sdarrenr icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 17460854Sdarrenr icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 17592685Sdarrenr#ifdef USE_INET6 17692685Sdarrenr /* fill icmp reply type table */ 17792685Sdarrenr for (i = 0; i <= ICMP6_MAXTYPE; i++) 17892685Sdarrenr icmpreplytype6[i] = -1; 17992685Sdarrenr icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 18092685Sdarrenr icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 18192685Sdarrenr icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 18292685Sdarrenr icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 18392685Sdarrenr icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 18492685Sdarrenr#endif 18560854Sdarrenr 18653642Sguido return 0; 18753642Sguido} 18853642Sguido 18953642Sguido 19053642Sguidostatic ips_stat_t *fr_statetstats() 19153642Sguido{ 19253642Sguido ips_stats.iss_active = ips_num; 19353642Sguido ips_stats.iss_table = ips_table; 19460854Sdarrenr ips_stats.iss_list = ips_list; 19553642Sguido return &ips_stats; 19653642Sguido} 19753642Sguido 19853642Sguido 19953642Sguido/* 20053642Sguido * flush state tables. two actions currently defined: 20153642Sguido * which == 0 : flush all state table entries 20253642Sguido * which == 1 : flush TCP connections which have started to close but are 20364580Sdarrenr * stuck for some reason. 20492685Sdarrenr * which == 2 : flush TCP connections which have been idle for a long time, 20592685Sdarrenr * starting at > 4 days idle and working back in successive half- 20692685Sdarrenr * days to at most 12 hours old. 20753642Sguido */ 208110916Sdarrenrstatic int fr_state_flush(which, proto) 209110916Sdarrenrint which, proto; 21053642Sguido{ 21192685Sdarrenr ipstate_t *is, **isp; 21253642Sguido#if defined(_KERNEL) && !SOLARIS 21353642Sguido int s; 21453642Sguido#endif 21592685Sdarrenr int delete, removed = 0, try; 21653642Sguido 21753642Sguido SPL_NET(s); 21860854Sdarrenr for (isp = &ips_list; (is = *isp); ) { 21960854Sdarrenr delete = 0; 22053642Sguido 221110916Sdarrenr if ((proto != 0) && (is->is_v != proto)) 222110916Sdarrenr continue; 223110916Sdarrenr 22460854Sdarrenr switch (which) 22560854Sdarrenr { 22660854Sdarrenr case 0 : 22760854Sdarrenr delete = 1; 22860854Sdarrenr break; 22960854Sdarrenr case 1 : 23092685Sdarrenr case 2 : 23160854Sdarrenr if (is->is_p != IPPROTO_TCP) 23260854Sdarrenr break; 23360854Sdarrenr if ((is->is_state[0] != TCPS_ESTABLISHED) || 23460854Sdarrenr (is->is_state[1] != TCPS_ESTABLISHED)) 23553642Sguido delete = 1; 23660854Sdarrenr break; 23760854Sdarrenr } 23853642Sguido 23960854Sdarrenr if (delete) { 24060854Sdarrenr if (is->is_p == IPPROTO_TCP) 24160854Sdarrenr ips_stats.iss_fin++; 24260854Sdarrenr else 24360854Sdarrenr ips_stats.iss_expire++; 24453642Sguido#ifdef IPFILTER_LOG 24560854Sdarrenr ipstate_log(is, ISL_FLUSH); 24653642Sguido#endif 24760854Sdarrenr fr_delstate(is); 24860854Sdarrenr removed++; 24960854Sdarrenr } else 25060854Sdarrenr isp = &is->is_next; 25160854Sdarrenr } 25292685Sdarrenr 25392685Sdarrenr /* 25492685Sdarrenr * Asked to remove inactive entries, try again if first attempt 25592685Sdarrenr * failed. In this case, 86400 is half a day because the counter is 25692685Sdarrenr * activated every half second. 25792685Sdarrenr */ 25892685Sdarrenr if ((which == 2) && (removed == 0)) { 25992685Sdarrenr try = 86400; /* half a day */ 26092685Sdarrenr for (; (try < FIVE_DAYS) && (removed == 0); try += 86400) { 26192685Sdarrenr for (isp = &ips_list; (is = *isp); ) { 26292685Sdarrenr delete = 0; 26392685Sdarrenr if ((is->is_p == IPPROTO_TCP) && 26492685Sdarrenr ((is->is_state[0] == TCPS_ESTABLISHED) || 26592685Sdarrenr (is->is_state[1] == TCPS_ESTABLISHED)) && 26692685Sdarrenr (is->is_age < try)) { 26792685Sdarrenr ips_stats.iss_fin++; 26892685Sdarrenr delete = 1; 26992685Sdarrenr } else if ((is->is_p != IPPROTO_TCP) && 27092685Sdarrenr (is->is_pkts > 1)) { 27192685Sdarrenr ips_stats.iss_expire++; 27292685Sdarrenr delete = 1; 27392685Sdarrenr } 27492685Sdarrenr if (delete) { 27592685Sdarrenr#ifdef IPFILTER_LOG 27692685Sdarrenr ipstate_log(is, ISL_FLUSH); 27792685Sdarrenr#endif 27892685Sdarrenr fr_delstate(is); 27992685Sdarrenr removed++; 28092685Sdarrenr } else 28192685Sdarrenr isp = &is->is_next; 28292685Sdarrenr } 28392685Sdarrenr } 28492685Sdarrenr } 28592685Sdarrenr 28653642Sguido SPL_X(s); 28753642Sguido return removed; 28853642Sguido} 28953642Sguido 29053642Sguido 29160854Sdarrenrstatic int fr_state_remove(data) 29260854Sdarrenrcaddr_t data; 29360854Sdarrenr{ 29460854Sdarrenr ipstate_t *sp, st; 29560854Sdarrenr int error; 29660854Sdarrenr 29760854Sdarrenr sp = &st; 29860854Sdarrenr error = IRCOPYPTR(data, (caddr_t)&st, sizeof(st)); 29960854Sdarrenr if (error) 30060854Sdarrenr return EFAULT; 30160854Sdarrenr 30280482Sdarrenr WRITE_ENTER(&ipf_state); 30360854Sdarrenr for (sp = ips_list; sp; sp = sp->is_next) 30460854Sdarrenr if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 30567614Sdarrenr !bcmp((char *)&sp->is_src, (char *)&st.is_src, 30667614Sdarrenr sizeof(st.is_src)) && 30795418Sdarrenr !bcmp((char *)&sp->is_dst, (char *)&st.is_dst, 30867614Sdarrenr sizeof(st.is_dst)) && 30967614Sdarrenr !bcmp((char *)&sp->is_ps, (char *)&st.is_ps, 31067614Sdarrenr sizeof(st.is_ps))) { 31160854Sdarrenr#ifdef IPFILTER_LOG 31260854Sdarrenr ipstate_log(sp, ISL_REMOVE); 31360854Sdarrenr#endif 31460854Sdarrenr fr_delstate(sp); 31560854Sdarrenr RWLOCK_EXIT(&ipf_state); 31660854Sdarrenr return 0; 31760854Sdarrenr } 31880482Sdarrenr RWLOCK_EXIT(&ipf_state); 31960854Sdarrenr return ESRCH; 32060854Sdarrenr} 32160854Sdarrenr 32260854Sdarrenr 32353642Sguidoint fr_state_ioctl(data, cmd, mode) 32453642Sguidocaddr_t data; 32553642Sguido#if defined(__NetBSD__) || defined(__OpenBSD__) 32653642Sguidou_long cmd; 32753642Sguido#else 32853642Sguidoint cmd; 32953642Sguido#endif 33053642Sguidoint mode; 33153642Sguido{ 33260854Sdarrenr int arg, ret, error = 0; 33353642Sguido 33453642Sguido switch (cmd) 33553642Sguido { 33660854Sdarrenr case SIOCDELST : 33760854Sdarrenr error = fr_state_remove(data); 33860854Sdarrenr break; 33953642Sguido case SIOCIPFFL : 34060854Sdarrenr error = IRCOPY(data, (caddr_t)&arg, sizeof(arg)); 34160854Sdarrenr if (error) 34260854Sdarrenr break; 34353642Sguido if (arg == 0 || arg == 1) { 34460854Sdarrenr WRITE_ENTER(&ipf_state); 345110916Sdarrenr ret = fr_state_flush(arg, 4); 34660854Sdarrenr RWLOCK_EXIT(&ipf_state); 34760854Sdarrenr error = IWCOPY((caddr_t)&ret, data, sizeof(ret)); 34853642Sguido } else 34953642Sguido error = EINVAL; 35053642Sguido break; 351110916Sdarrenr#ifdef USE_INET6 352110916Sdarrenr case SIOCIPFL6 : 353110916Sdarrenr error = IRCOPY(data, (caddr_t)&arg, sizeof(arg)); 354110916Sdarrenr if (error) 355110916Sdarrenr break; 356110916Sdarrenr if (arg == 0 || arg == 1) { 357110916Sdarrenr WRITE_ENTER(&ipf_state); 358110916Sdarrenr ret = fr_state_flush(arg, 6); 359110916Sdarrenr RWLOCK_EXIT(&ipf_state); 360110916Sdarrenr error = IWCOPY((caddr_t)&ret, data, sizeof(ret)); 361110916Sdarrenr } else 362110916Sdarrenr error = EINVAL; 363110916Sdarrenr break; 364110916Sdarrenr#endif 36555929Sguido#ifdef IPFILTER_LOG 36655929Sguido case SIOCIPFFB : 36755929Sguido if (!(mode & FWRITE)) 36855929Sguido error = EPERM; 36960854Sdarrenr else { 37060854Sdarrenr int tmp; 37160854Sdarrenr 37260854Sdarrenr tmp = ipflog_clear(IPL_LOGSTATE); 37360854Sdarrenr IWCOPY((char *)&tmp, data, sizeof(tmp)); 37460854Sdarrenr } 37555929Sguido break; 37655929Sguido#endif 37760854Sdarrenr case SIOCGETFS : 37860854Sdarrenr error = IWCOPYPTR((caddr_t)fr_statetstats(), data, 37960854Sdarrenr sizeof(ips_stat_t)); 38053642Sguido break; 38153642Sguido case FIONREAD : 38253642Sguido#ifdef IPFILTER_LOG 38372006Sdarrenr arg = (int)iplused[IPL_LOGSTATE]; 38472006Sdarrenr error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg)); 38553642Sguido#endif 38653642Sguido break; 38760854Sdarrenr case SIOCSTLCK : 38860854Sdarrenr error = fr_lock(data, &fr_state_lock); 38960854Sdarrenr break; 39060854Sdarrenr case SIOCSTPUT : 39160854Sdarrenr if (!fr_state_lock) { 39260854Sdarrenr error = EACCES; 39360854Sdarrenr break; 39460854Sdarrenr } 39560854Sdarrenr error = fr_stputent(data); 39660854Sdarrenr break; 39760854Sdarrenr case SIOCSTGET : 39860854Sdarrenr if (!fr_state_lock) { 39960854Sdarrenr error = EACCES; 40060854Sdarrenr break; 40160854Sdarrenr } 40260854Sdarrenr error = fr_stgetent(data); 40360854Sdarrenr break; 40453642Sguido default : 40553642Sguido error = EINVAL; 40653642Sguido break; 40753642Sguido } 40853642Sguido return error; 40953642Sguido} 41053642Sguido 41153642Sguido 41292685Sdarrenr/* 41392685Sdarrenr * Copy out state information from the kernel to a user space process. 41492685Sdarrenr */ 41560854Sdarrenrint fr_stgetent(data) 41660854Sdarrenrcaddr_t data; 41760854Sdarrenr{ 41860854Sdarrenr register ipstate_t *is, *isn; 41992685Sdarrenr ipstate_save_t ips; 42060854Sdarrenr int error; 42160854Sdarrenr 42292685Sdarrenr error = IRCOPYPTR(data, (caddr_t)&ips, sizeof(ips)); 42360854Sdarrenr if (error) 42492685Sdarrenr return error; 42560854Sdarrenr 42660854Sdarrenr isn = ips.ips_next; 42760854Sdarrenr if (!isn) { 42860854Sdarrenr isn = ips_list; 42960854Sdarrenr if (isn == NULL) { 43060854Sdarrenr if (ips.ips_next == NULL) 43160854Sdarrenr return ENOENT; 43260854Sdarrenr return 0; 43360854Sdarrenr } 43460854Sdarrenr } else { 43560854Sdarrenr /* 43660854Sdarrenr * Make sure the pointer we're copying from exists in the 43760854Sdarrenr * current list of entries. Security precaution to prevent 43860854Sdarrenr * copying of random kernel data. 43960854Sdarrenr */ 44060854Sdarrenr for (is = ips_list; is; is = is->is_next) 44160854Sdarrenr if (is == isn) 44260854Sdarrenr break; 44360854Sdarrenr if (!is) 44460854Sdarrenr return ESRCH; 44560854Sdarrenr } 44660854Sdarrenr ips.ips_next = isn->is_next; 44760854Sdarrenr bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 44860854Sdarrenr if (isn->is_rule) 44960854Sdarrenr bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 45060854Sdarrenr sizeof(ips.ips_fr)); 45192685Sdarrenr error = IWCOPYPTR((caddr_t)&ips, data, sizeof(ips)); 45260854Sdarrenr if (error) 45364580Sdarrenr error = EFAULT; 45464580Sdarrenr return error; 45560854Sdarrenr} 45660854Sdarrenr 45760854Sdarrenr 45860854Sdarrenrint fr_stputent(data) 45960854Sdarrenrcaddr_t data; 46060854Sdarrenr{ 46160854Sdarrenr register ipstate_t *is, *isn; 46292685Sdarrenr ipstate_save_t ips; 46392685Sdarrenr int error, out, i; 46460854Sdarrenr frentry_t *fr; 46592685Sdarrenr char *name; 46660854Sdarrenr 46792685Sdarrenr error = IRCOPYPTR(data, (caddr_t)&ips, sizeof(ips)); 46860854Sdarrenr if (error) 46992685Sdarrenr return error; 47060854Sdarrenr 47160854Sdarrenr KMALLOC(isn, ipstate_t *); 47260854Sdarrenr if (isn == NULL) 47360854Sdarrenr return ENOMEM; 47460854Sdarrenr 47560854Sdarrenr bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 47660854Sdarrenr fr = isn->is_rule; 47760854Sdarrenr if (fr != NULL) { 47860854Sdarrenr if (isn->is_flags & FI_NEWFR) { 47960854Sdarrenr KMALLOC(fr, frentry_t *); 48060854Sdarrenr if (fr == NULL) { 48160854Sdarrenr KFREE(isn); 48260854Sdarrenr return ENOMEM; 48360854Sdarrenr } 48460854Sdarrenr bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 48563523Sdarrenr out = fr->fr_flags & FR_OUTQUE ? 1 : 0; 48660854Sdarrenr isn->is_rule = fr; 48760854Sdarrenr ips.ips_is.is_rule = fr; 48892685Sdarrenr 48992685Sdarrenr /* 49092685Sdarrenr * Look up all the interface names in the rule. 49192685Sdarrenr */ 49292685Sdarrenr for (i = 0; i < 4; i++) { 49392685Sdarrenr name = fr->fr_ifnames[i]; 49492685Sdarrenr if ((name[1] == '\0') && 49592685Sdarrenr ((name[0] == '-') || (name[0] == '*'))) { 49692685Sdarrenr fr->fr_ifas[i] = NULL; 49792685Sdarrenr } else if (*name != '\0') { 49892685Sdarrenr fr->fr_ifas[i] = GETUNIT(name, 49992685Sdarrenr fr->fr_v); 50092685Sdarrenr if (fr->fr_ifas[i] == NULL) 50192685Sdarrenr fr->fr_ifas[i] = (void *)-1; 50292685Sdarrenr else { 50392685Sdarrenr strncpy(isn->is_ifname[i], 50492685Sdarrenr IFNAME(fr->fr_ifas[i]), 50592685Sdarrenr IFNAMSIZ); 50692685Sdarrenr } 50763523Sdarrenr } 50892685Sdarrenr isn->is_ifp[out] = fr->fr_ifas[i]; 50992685Sdarrenr } 51092685Sdarrenr 51163523Sdarrenr /* 51263523Sdarrenr * send a copy back to userland of what we ended up 51363523Sdarrenr * to allow for verification. 51463523Sdarrenr */ 51592685Sdarrenr error = IWCOPYPTR((caddr_t)&ips, data, sizeof(ips)); 51660854Sdarrenr if (error) { 51760854Sdarrenr KFREE(isn); 51860854Sdarrenr KFREE(fr); 51960854Sdarrenr return EFAULT; 52060854Sdarrenr } 52160854Sdarrenr } else { 52260854Sdarrenr for (is = ips_list; is; is = is->is_next) 52360854Sdarrenr if (is->is_rule == fr) 52460854Sdarrenr break; 52560854Sdarrenr if (!is) { 52660854Sdarrenr KFREE(isn); 52760854Sdarrenr return ESRCH; 52860854Sdarrenr } 52960854Sdarrenr } 53060854Sdarrenr } 53160854Sdarrenr fr_stinsert(isn); 53260854Sdarrenr return 0; 53360854Sdarrenr} 53460854Sdarrenr 53560854Sdarrenr 53692685Sdarrenr/* 53792685Sdarrenr * Insert a state table entry manually. 53892685Sdarrenr */ 53960854Sdarrenrvoid fr_stinsert(is) 54060854Sdarrenrregister ipstate_t *is; 54160854Sdarrenr{ 54260854Sdarrenr register u_int hv = is->is_hv; 54392685Sdarrenr char *name; 54492685Sdarrenr int i; 54560854Sdarrenr 54660854Sdarrenr MUTEX_INIT(&is->is_lock, "ipf state entry", NULL); 54760854Sdarrenr 54892685Sdarrenr /* 54992685Sdarrenr * Look up all the interface names in the state entry. 55092685Sdarrenr */ 55192685Sdarrenr for (i = 0; i < 4; i++) { 55292685Sdarrenr name = is->is_ifname[i]; 55392685Sdarrenr if ((name[1] == '\0') && 55492685Sdarrenr ((name[0] == '-') || (name[0] == '*'))) { 55592685Sdarrenr is->is_ifp[0] = NULL; 55692685Sdarrenr } else if (*name != '\0') { 55792685Sdarrenr is->is_ifp[i] = GETUNIT(name, is->is_v); 55892685Sdarrenr if (is->is_ifp[i] == NULL) 55992685Sdarrenr is->is_ifp[i] = (void *)-1; 56092685Sdarrenr } 56160854Sdarrenr } 56260854Sdarrenr 56392685Sdarrenr 56460854Sdarrenr /* 56560854Sdarrenr * add into list table. 56660854Sdarrenr */ 56760854Sdarrenr if (ips_list) 56860854Sdarrenr ips_list->is_pnext = &is->is_next; 56960854Sdarrenr is->is_pnext = &ips_list; 57060854Sdarrenr is->is_next = ips_list; 57160854Sdarrenr ips_list = is; 57260854Sdarrenr if (ips_table[hv]) 57360854Sdarrenr ips_table[hv]->is_phnext = &is->is_hnext; 57460854Sdarrenr else 57560854Sdarrenr ips_stats.iss_inuse++; 57660854Sdarrenr is->is_phnext = ips_table + hv; 57760854Sdarrenr is->is_hnext = ips_table[hv]; 57860854Sdarrenr ips_table[hv] = is; 57964580Sdarrenr ips_num++; 58060854Sdarrenr} 58160854Sdarrenr 58260854Sdarrenr 58353642Sguido/* 58453642Sguido * Create a new ipstate structure and hang it off the hash table. 58553642Sguido */ 58692685Sdarrenripstate_t *fr_addstate(ip, fin, stsave, flags) 58753642Sguidoip_t *ip; 58853642Sguidofr_info_t *fin; 58992685Sdarrenripstate_t **stsave; 59053642Sguidou_int flags; 59153642Sguido{ 59260854Sdarrenr register tcphdr_t *tcp = NULL; 59353642Sguido register ipstate_t *is; 59453642Sguido register u_int hv; 59592685Sdarrenr struct icmp *ic; 59653642Sguido ipstate_t ips; 59798004Sdarrenr int out, ws; 59853642Sguido u_int pass; 59992685Sdarrenr void *ifp; 60053642Sguido 60195418Sdarrenr if (fr_state_lock || (fin->fin_off != 0) || (fin->fin_fl & FI_SHORT) || 60295418Sdarrenr (fin->fin_misc & FM_BADSTATE)) 60353642Sguido return NULL; 60453642Sguido if (ips_num == fr_statemax) { 60553642Sguido ips_stats.iss_max++; 60653642Sguido fr_state_doflush = 1; 60753642Sguido return NULL; 60853642Sguido } 60960854Sdarrenr out = fin->fin_out; 61053642Sguido is = &ips; 61153642Sguido bzero((char *)is, sizeof(*is)); 61253642Sguido ips.is_age = 1; 61353642Sguido /* 61453642Sguido * Copy and calculate... 61553642Sguido */ 61660854Sdarrenr hv = (is->is_p = fin->fin_fi.fi_p); 61760854Sdarrenr is->is_src = fin->fin_fi.fi_src; 61860854Sdarrenr hv += is->is_saddr; 61960854Sdarrenr is->is_dst = fin->fin_fi.fi_dst; 62060854Sdarrenr hv += is->is_daddr; 62160854Sdarrenr#ifdef USE_INET6 62260854Sdarrenr if (fin->fin_v == 6) { 62392685Sdarrenr if ((is->is_p == IPPROTO_ICMPV6) && 62492685Sdarrenr IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 62592685Sdarrenr /* 62692685Sdarrenr * So you can do keep state with neighbour discovery. 62792685Sdarrenr */ 62892685Sdarrenr flags |= FI_W_DADDR; 62992685Sdarrenr hv -= is->is_daddr; 63092685Sdarrenr } else { 63192685Sdarrenr hv += is->is_dst.i6[1]; 63292685Sdarrenr hv += is->is_dst.i6[2]; 63392685Sdarrenr hv += is->is_dst.i6[3]; 63460854Sdarrenr } 63592685Sdarrenr hv += is->is_src.i6[1]; 63692685Sdarrenr hv += is->is_src.i6[2]; 63792685Sdarrenr hv += is->is_src.i6[3]; 63860854Sdarrenr } 63960854Sdarrenr#endif 64053642Sguido 64160854Sdarrenr switch (is->is_p) 64253642Sguido { 64395418Sdarrenr int off; 64495418Sdarrenr 64560854Sdarrenr#ifdef USE_INET6 64660854Sdarrenr case IPPROTO_ICMPV6 : 64792685Sdarrenr ic = (struct icmp *)fin->fin_dp; 64892685Sdarrenr if ((ic->icmp_type & ICMP6_INFOMSG_MASK) == 0) 64992685Sdarrenr return NULL; 65053642Sguido 65153642Sguido switch (ic->icmp_type) 65253642Sguido { 65360854Sdarrenr case ICMP6_ECHO_REQUEST : 65492685Sdarrenr is->is_icmp.ics_type = ic->icmp_type; 65553642Sguido hv += (is->is_icmp.ics_id = ic->icmp_id); 65653642Sguido hv += (is->is_icmp.ics_seq = ic->icmp_seq); 65753642Sguido break; 65860854Sdarrenr case ICMP6_MEMBERSHIP_QUERY : 65960854Sdarrenr case ND_ROUTER_SOLICIT : 66060854Sdarrenr case ND_NEIGHBOR_SOLICIT : 66192685Sdarrenr case ICMP6_NI_QUERY : 66292685Sdarrenr is->is_icmp.ics_type = ic->icmp_type; 66360854Sdarrenr break; 66492685Sdarrenr default : 66592685Sdarrenr return NULL; 66692685Sdarrenr } 66792685Sdarrenr ATOMIC_INCL(ips_stats.iss_icmp); 66892685Sdarrenr is->is_age = fr_icmptimeout; 66992685Sdarrenr break; 67060854Sdarrenr#endif 67192685Sdarrenr case IPPROTO_ICMP : 67292685Sdarrenr ic = (struct icmp *)fin->fin_dp; 67392685Sdarrenr 67492685Sdarrenr switch (ic->icmp_type) 67592685Sdarrenr { 67660854Sdarrenr case ICMP_ECHO : 67753642Sguido case ICMP_TSTAMP : 67853642Sguido case ICMP_IREQ : 67953642Sguido case ICMP_MASKREQ : 68060854Sdarrenr is->is_icmp.ics_type = ic->icmp_type; 68160854Sdarrenr hv += (is->is_icmp.ics_id = ic->icmp_id); 68260854Sdarrenr hv += (is->is_icmp.ics_seq = ic->icmp_seq); 68353642Sguido break; 68453642Sguido default : 68553642Sguido return NULL; 68653642Sguido } 68760854Sdarrenr ATOMIC_INCL(ips_stats.iss_icmp); 68853642Sguido is->is_age = fr_icmptimeout; 68953642Sguido break; 69053642Sguido case IPPROTO_TCP : 69160854Sdarrenr tcp = (tcphdr_t *)fin->fin_dp; 69253642Sguido 69360854Sdarrenr if (tcp->th_flags & TH_RST) 69460854Sdarrenr return NULL; 69553642Sguido /* 69653642Sguido * The endian of the ports doesn't matter, but the ack and 69753642Sguido * sequence numbers do as we do mathematics on them later. 69853642Sguido */ 69992685Sdarrenr is->is_sport = htons(fin->fin_data[0]); 70092685Sdarrenr is->is_dport = htons(fin->fin_data[1]); 70153642Sguido if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) { 70292685Sdarrenr hv += is->is_sport; 70392685Sdarrenr hv += is->is_dport; 70453642Sguido } 70595563Sdarrenr if ((flags & FI_IGNOREPKT) == 0) { 70695563Sdarrenr is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 70795563Sdarrenr (off = (tcp->th_off << 2)) + 70895563Sdarrenr ((tcp->th_flags & TH_SYN) ? 1 : 0) + 70995563Sdarrenr ((tcp->th_flags & TH_FIN) ? 1 : 0); 71095563Sdarrenr is->is_maxsend = is->is_send; 71195563Sdarrenr 71295563Sdarrenr if ((tcp->th_flags & TH_SYN) && 71398004Sdarrenr ((tcp->th_off << 2) >= (sizeof(*tcp) + 4))) { 71498004Sdarrenr ws = fr_tcpoptions(tcp); 71598004Sdarrenr if (ws >= 0) 71698004Sdarrenr is->is_swscale = ws; 71798004Sdarrenr } 71895563Sdarrenr } 71995563Sdarrenr 72060854Sdarrenr is->is_maxdwin = 1; 72153642Sguido is->is_maxswin = ntohs(tcp->th_win); 72253642Sguido if (is->is_maxswin == 0) 72353642Sguido is->is_maxswin = 1; 72495418Sdarrenr 72595418Sdarrenr if ((tcp->th_flags & TH_OPENING) == TH_SYN) 72695418Sdarrenr is->is_fsm = 1; 72795418Sdarrenr 72853642Sguido /* 72953642Sguido * If we're creating state for a starting connection, start the 73053642Sguido * timer on it as we'll never see an error if it fails to 73153642Sguido * connect. 73253642Sguido */ 73360854Sdarrenr ATOMIC_INCL(ips_stats.iss_tcp); 73453642Sguido break; 73592685Sdarrenr 73653642Sguido case IPPROTO_UDP : 73760854Sdarrenr tcp = (tcphdr_t *)fin->fin_dp; 73853642Sguido 73992685Sdarrenr is->is_sport = htons(fin->fin_data[0]); 74092685Sdarrenr is->is_dport = htons(fin->fin_data[1]); 74153642Sguido if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) { 74292685Sdarrenr hv += is->is_sport; 74392685Sdarrenr hv += is->is_dport; 74453642Sguido } 74560854Sdarrenr ATOMIC_INCL(ips_stats.iss_udp); 74653642Sguido is->is_age = fr_udptimeout; 74753642Sguido break; 74853642Sguido default : 74992685Sdarrenr is->is_age = fr_udptimeout; 75092685Sdarrenr break; 75153642Sguido } 75253642Sguido 75353642Sguido KMALLOC(is, ipstate_t *); 75453642Sguido if (is == NULL) { 75560854Sdarrenr ATOMIC_INCL(ips_stats.iss_nomem); 75653642Sguido return NULL; 75753642Sguido } 75853642Sguido bcopy((char *)&ips, (char *)is, sizeof(*is)); 75953642Sguido hv %= fr_statesize; 76060854Sdarrenr is->is_hv = hv; 76153642Sguido is->is_rule = fin->fin_fr; 76253642Sguido if (is->is_rule != NULL) { 763102520Sdarrenr is->is_group = is->is_rule->fr_group; 76460854Sdarrenr ATOMIC_INC32(is->is_rule->fr_ref); 76553642Sguido pass = is->is_rule->fr_flags; 76692685Sdarrenr is->is_frage[0] = is->is_rule->fr_age[0]; 76792685Sdarrenr is->is_frage[1] = is->is_rule->fr_age[1]; 76892685Sdarrenr if (is->is_frage[0] != 0) 76992685Sdarrenr is->is_age = is->is_frage[0]; 77092685Sdarrenr 77192685Sdarrenr is->is_ifp[(out << 1) + 1] = is->is_rule->fr_ifas[1]; 77292685Sdarrenr is->is_ifp[(1 - out) << 1] = is->is_rule->fr_ifas[2]; 77392685Sdarrenr is->is_ifp[((1 - out) << 1) + 1] = is->is_rule->fr_ifas[3]; 77492685Sdarrenr 77592685Sdarrenr if (((ifp = is->is_rule->fr_ifas[1]) != NULL) && 77692685Sdarrenr (ifp != (void *)-1)) 77792685Sdarrenr strncpy(is->is_ifname[(out << 1) + 1], 77892685Sdarrenr IFNAME(ifp), IFNAMSIZ); 77992685Sdarrenr if (((ifp = is->is_rule->fr_ifas[2]) != NULL) && 78092685Sdarrenr (ifp != (void *)-1)) 78192685Sdarrenr strncpy(is->is_ifname[(1 - out) << 1], 78292685Sdarrenr IFNAME(ifp), IFNAMSIZ); 78392685Sdarrenr if (((ifp = is->is_rule->fr_ifas[3]) != NULL) && 78492685Sdarrenr (ifp != (void *)-1)) 78592685Sdarrenr strncpy(is->is_ifname[((1 - out) << 1) + 1], 78692685Sdarrenr IFNAME(ifp), IFNAMSIZ); 78753642Sguido } else 78853642Sguido pass = fr_flags; 78992685Sdarrenr 79092685Sdarrenr is->is_ifp[out << 1] = fin->fin_ifp; 79192685Sdarrenr strncpy(is->is_ifname[out << 1], IFNAME(fin->fin_ifp), IFNAMSIZ); 79292685Sdarrenr 79353642Sguido WRITE_ENTER(&ipf_state); 79453642Sguido 79553642Sguido is->is_pass = pass; 79692685Sdarrenr if ((flags & FI_IGNOREPKT) == 0) { 79792685Sdarrenr is->is_pkts = 1; 79892685Sdarrenr is->is_bytes = fin->fin_dlen + fin->fin_hlen; 79992685Sdarrenr } 80053642Sguido /* 80153642Sguido * We want to check everything that is a property of this packet, 80253642Sguido * but we don't (automatically) care about it's fragment status as 80353642Sguido * this may change. 80453642Sguido */ 80592685Sdarrenr is->is_v = fin->fin_v; 80692685Sdarrenr is->is_rulen = fin->fin_rule; 80753642Sguido is->is_opt = fin->fin_fi.fi_optmsk; 80853642Sguido is->is_optmsk = 0xffffffff; 80953642Sguido is->is_sec = fin->fin_fi.fi_secmsk; 81053642Sguido is->is_secmsk = 0xffff; 81153642Sguido is->is_auth = fin->fin_fi.fi_auth; 81253642Sguido is->is_authmsk = 0xffff; 81380482Sdarrenr is->is_flags = fin->fin_fl & FI_CMP; 81453642Sguido is->is_flags |= FI_CMP << 4; 81560854Sdarrenr is->is_flags |= flags & (FI_WILDP|FI_WILDA); 81667614Sdarrenr if (flags & (FI_WILDP|FI_WILDA)) 81767614Sdarrenr ips_wild++; 81892685Sdarrenr 81953642Sguido if (pass & FR_LOGFIRST) 82053642Sguido is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 82160854Sdarrenr fr_stinsert(is); 82292685Sdarrenr is->is_me = stsave; 82360854Sdarrenr if (is->is_p == IPPROTO_TCP) { 82460854Sdarrenr fr_tcp_age(&is->is_age, is->is_state, fin, 82595418Sdarrenr 0, is->is_fsm); /* 0 = packet from the source */ 82660854Sdarrenr } 82753642Sguido#ifdef IPFILTER_LOG 82853642Sguido ipstate_log(is, ISL_NEW); 82953642Sguido#endif 83053642Sguido RWLOCK_EXIT(&ipf_state); 83160854Sdarrenr fin->fin_rev = IP6NEQ(is->is_dst, fin->fin_fi.fi_dst); 832102520Sdarrenr if ((fin->fin_fl & FI_FRAG) && (pass & FR_KEEPFRAG)) 833102520Sdarrenr ipfr_newfrag(ip, fin); 83453642Sguido return is; 83553642Sguido} 83653642Sguido 83753642Sguido 83895418Sdarrenrstatic int fr_tcpoptions(tcp) 83995418Sdarrenrtcphdr_t *tcp; 84095418Sdarrenr{ 84195418Sdarrenr u_char *opt, *last; 84295418Sdarrenr int wscale; 84353642Sguido 84495418Sdarrenr opt = (u_char *) (tcp + 1); 84595418Sdarrenr last = ((u_char *)tcp) + (tcp->th_off << 2); 84695418Sdarrenr 84795418Sdarrenr /* If we don't find wscale here, we need to clear it */ 84895418Sdarrenr wscale = -2; 84995418Sdarrenr 85095418Sdarrenr /* Termination condition picked such that opt[0 .. 2] exist */ 85195418Sdarrenr while ((opt < last - 2) && (*opt != TCPOPT_EOL)) { 85295418Sdarrenr switch (*opt) { 85395418Sdarrenr case TCPOPT_NOP: 85495418Sdarrenr opt++; 85595418Sdarrenr continue; 85695418Sdarrenr case TCPOPT_WSCALE: 85795418Sdarrenr /* Proper length ? */ 85895418Sdarrenr if (opt[1] == 3) { 85995418Sdarrenr if (opt[2] > 14) 86095418Sdarrenr wscale = 14; 86195418Sdarrenr else 86295418Sdarrenr wscale = opt[2]; 86395418Sdarrenr } 86495418Sdarrenr break; 86595418Sdarrenr default: 86695418Sdarrenr /* Unknown options must be two bytes+ */ 86795418Sdarrenr if (opt[1] < 2) 86895418Sdarrenr break; 86995418Sdarrenr opt += opt[1]; 87095418Sdarrenr continue; 87195418Sdarrenr } 87295418Sdarrenr break; 87395418Sdarrenr } 87495418Sdarrenr return wscale; 87595418Sdarrenr} 87695418Sdarrenr 87795418Sdarrenr 87895418Sdarrenr 87953642Sguido/* 88053642Sguido * check to see if a packet with TCP headers fits within the TCP window. 88153642Sguido * change timeout depending on whether new packet is a SYN-ACK returning for a 88253642Sguido * SYN or a RST or FIN which indicate time to close up shop. 88353642Sguido */ 88453642Sguidoint fr_tcpstate(is, fin, ip, tcp) 88553642Sguidoregister ipstate_t *is; 88653642Sguidofr_info_t *fin; 88753642Sguidoip_t *ip; 88853642Sguidotcphdr_t *tcp; 88953642Sguido{ 89053642Sguido register tcp_seq seq, ack, end; 89153642Sguido register int ackskew; 89253642Sguido tcpdata_t *fdata, *tdata; 89395418Sdarrenr u_32_t win, maxwin; 89495418Sdarrenr int ret = 0, off; 89553642Sguido int source; 89695418Sdarrenr int wscale; 89753642Sguido 89853642Sguido /* 89953642Sguido * Find difference between last checked packet and this packet. 90053642Sguido */ 90160854Sdarrenr source = IP6EQ(fin->fin_fi.fi_src, is->is_src); 90280482Sdarrenr if (source && (ntohs(is->is_sport) != fin->fin_data[0])) 90380482Sdarrenr source = 0; 90453642Sguido fdata = &is->is_tcp.ts_data[!source]; 90553642Sguido tdata = &is->is_tcp.ts_data[source]; 90695418Sdarrenr off = tcp->th_off << 2; 90753642Sguido seq = ntohl(tcp->th_seq); 90853642Sguido ack = ntohl(tcp->th_ack); 90953642Sguido win = ntohs(tcp->th_win); 91095418Sdarrenr end = seq + fin->fin_dlen - off + 91153642Sguido ((tcp->th_flags & TH_SYN) ? 1 : 0) + 91257096Sguido ((tcp->th_flags & TH_FIN) ? 1 : 0); 91353642Sguido 91495418Sdarrenr 91595418Sdarrenr if ((tcp->th_flags & TH_SYN) && (off >= sizeof(*tcp) + 4)) 91695418Sdarrenr wscale = fr_tcpoptions(tcp); 91795418Sdarrenr else 91895418Sdarrenr wscale = -1; 91995418Sdarrenr 92067614Sdarrenr MUTEX_ENTER(&is->is_lock); 92195418Sdarrenr 92295418Sdarrenr if (wscale >= 0) 92395418Sdarrenr fdata->td_wscale = wscale; 92495418Sdarrenr else if (wscale == -2) 92595418Sdarrenr fdata->td_wscale = tdata->td_wscale = 0; 92698004Sdarrenr win <<= fdata->td_wscale; 92795418Sdarrenr 92895418Sdarrenr if ((fdata->td_end == 0) && 92995418Sdarrenr (!is->is_fsm || ((tcp->th_flags & TH_OPENING) == TH_OPENING))) { 93053642Sguido /* 93153642Sguido * Must be a (outgoing) SYN-ACK in reply to a SYN. 93253642Sguido */ 93353642Sguido fdata->td_end = end; 93453642Sguido fdata->td_maxwin = 1; 93598004Sdarrenr fdata->td_maxend = end + win; 93698004Sdarrenr if (win == 0) 93798004Sdarrenr fdata->td_maxend++; 93853642Sguido } 93953642Sguido 94053642Sguido if (!(tcp->th_flags & TH_ACK)) { /* Pretend an ack was sent */ 94153642Sguido ack = tdata->td_end; 94253642Sguido } else if (((tcp->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 94353642Sguido (ack == 0)) { 94453642Sguido /* gross hack to get around certain broken tcp stacks */ 94553642Sguido ack = tdata->td_end; 94653642Sguido } 94753642Sguido 94853642Sguido if (seq == end) 94953642Sguido seq = end = fdata->td_end; 95053642Sguido 95153642Sguido maxwin = tdata->td_maxwin; 95253642Sguido ackskew = tdata->td_end - ack; 95353642Sguido 95453642Sguido#define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 95553642Sguido#define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 95653642Sguido if ((SEQ_GE(fdata->td_maxend, end)) && 95757096Sguido (SEQ_GE(seq, fdata->td_end - maxwin)) && 95853642Sguido/* XXX what about big packets */ 95953642Sguido#define MAXACKWINDOW 66000 96053642Sguido (ackskew >= -MAXACKWINDOW) && 96153642Sguido (ackskew <= MAXACKWINDOW)) { 96253642Sguido /* if ackskew < 0 then this should be due to fragented 96353642Sguido * packets. There is no way to know the length of the 96453642Sguido * total packet in advance. 96553642Sguido * We do know the total length from the fragment cache though. 96653642Sguido * Note however that there might be more sessions with 96753642Sguido * exactly the same source and destination paramters in the 96853642Sguido * state cache (and source and destination is the only stuff 96953642Sguido * that is saved in the fragment cache). Note further that 97053642Sguido * some TCP connections in the state cache are hashed with 97153642Sguido * sport and dport as well which makes it not worthwhile to 97253642Sguido * look for them. 97353642Sguido * Thus, when ackskew is negative but still seems to belong 97453642Sguido * to this session, we bump up the destinations end value. 97553642Sguido */ 97653642Sguido /* 97753642Sguido * Nearing end of connection, start timeout. 97853642Sguido */ 97964580Sdarrenr /* source ? 0 : 1 -> !source */ 98095418Sdarrenr if (fr_tcp_age(&is->is_age, is->is_state, fin, !source, 98195418Sdarrenr (int)is->is_fsm) == 0) { 98295418Sdarrenr if (ackskew < 0) 98395418Sdarrenr tdata->td_end = ack; 98495418Sdarrenr 98595418Sdarrenr /* update max window seen */ 98695418Sdarrenr if (fdata->td_maxwin < win) 98795418Sdarrenr fdata->td_maxwin = win; 98895418Sdarrenr if (SEQ_GT(end, fdata->td_end)) 98995418Sdarrenr fdata->td_end = end; 99095418Sdarrenr if (SEQ_GE(ack + win, tdata->td_maxend)) { 99195418Sdarrenr tdata->td_maxend = ack + win; 99295418Sdarrenr if (win == 0) 99395418Sdarrenr tdata->td_maxend++; 99495418Sdarrenr } 99595418Sdarrenr 99695418Sdarrenr ATOMIC_INCL(ips_stats.iss_hits); 99795418Sdarrenr ret = 1; 99895418Sdarrenr } 99953642Sguido } 100067614Sdarrenr MUTEX_EXIT(&is->is_lock); 100195563Sdarrenr if ((ret == 0) && ((tcp->th_flags & TH_OPENING) != TH_SYN)) 100295418Sdarrenr fin->fin_misc |= FM_BADSTATE; 100353642Sguido return ret; 100453642Sguido} 100553642Sguido 100653642Sguido 100792685Sdarrenr/* 100892685Sdarrenr * Match a state table entry against an IP packet. 100992685Sdarrenr */ 101053642Sguidostatic int fr_matchsrcdst(is, src, dst, fin, tcp) 101153642Sguidoipstate_t *is; 101260854Sdarrenrunion i6addr src, dst; 101353642Sguidofr_info_t *fin; 101453642Sguidotcphdr_t *tcp; 101553642Sguido{ 101692685Sdarrenr int ret = 0, rev, out, flags, idx; 101753642Sguido u_short sp, dp; 101853642Sguido void *ifp; 101953642Sguido 102080482Sdarrenr rev = IP6NEQ(is->is_dst, dst); 102153642Sguido ifp = fin->fin_ifp; 102253642Sguido out = fin->fin_out; 102392685Sdarrenr flags = is->is_flags & (FI_WILDA|FI_WILDP); 102492685Sdarrenr sp = 0; 102592685Sdarrenr dp = 0; 102653642Sguido 102753642Sguido if (tcp != NULL) { 102853642Sguido flags = is->is_flags; 102953642Sguido sp = tcp->th_sport; 103053642Sguido dp = tcp->th_dport; 103180482Sdarrenr if (!rev) { 103280482Sdarrenr if (!(flags & FI_W_SPORT) && (sp != is->is_sport)) 103380482Sdarrenr rev = 1; 103480482Sdarrenr else if (!(flags & FI_W_DPORT) && (dp != is->is_dport)) 103580482Sdarrenr rev = 1; 103680482Sdarrenr } 103753642Sguido } 103853642Sguido 103992685Sdarrenr idx = (out << 1) + rev; 104092685Sdarrenr 104192685Sdarrenr if ((is->is_ifp[idx] == NULL && 104292685Sdarrenr (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 104392685Sdarrenr is->is_ifp[idx] == ifp) 104492685Sdarrenr ret = 1; 104592685Sdarrenr 104653642Sguido if (ret == 0) 104753642Sguido return 0; 104853642Sguido ret = 0; 104953642Sguido 105053642Sguido if (rev == 0) { 105192685Sdarrenr if ((IP6EQ(is->is_dst, dst) || (flags & FI_W_DADDR)) && 105260854Sdarrenr (IP6EQ(is->is_src, src) || (flags & FI_W_SADDR)) && 105353642Sguido (!tcp || ((sp == is->is_sport || flags & FI_W_SPORT) && 105453642Sguido (dp == is->is_dport || flags & FI_W_DPORT)))) { 105553642Sguido ret = 1; 105653642Sguido } 105753642Sguido } else { 105892685Sdarrenr if ((IP6EQ(is->is_dst, src) || (flags & FI_W_DADDR)) && 105960854Sdarrenr (IP6EQ(is->is_src, dst) || (flags & FI_W_SADDR)) && 106053642Sguido (!tcp || ((sp == is->is_dport || flags & FI_W_DPORT) && 106153642Sguido (dp == is->is_sport || flags & FI_W_SPORT)))) { 106253642Sguido ret = 1; 106353642Sguido } 106453642Sguido } 106553642Sguido if (ret == 0) 106653642Sguido return 0; 106753642Sguido 106853642Sguido /* 106953642Sguido * Whether or not this should be here, is questionable, but the aim 107053642Sguido * is to get this out of the main line. 107153642Sguido */ 107253642Sguido if (tcp == NULL) 107353642Sguido flags = is->is_flags & (FI_CMP|(FI_CMP<<4)); 107453642Sguido 107580482Sdarrenr if (((fin->fin_fl & (flags >> 4)) != (flags & FI_CMP)) || 107680482Sdarrenr (fin->fin_fi.fi_optmsk != is->is_opt) || 107780482Sdarrenr (fin->fin_fi.fi_secmsk != is->is_sec) || 107880482Sdarrenr (fin->fin_fi.fi_auth != is->is_auth)) 107953642Sguido return 0; 108053642Sguido 108192685Sdarrenr flags = is->is_flags & (FI_WILDA|FI_WILDP); 108292685Sdarrenr if ((flags & (FI_W_SADDR|FI_W_DADDR))) { 108392685Sdarrenr if ((flags & FI_W_SADDR) != 0) { 108492685Sdarrenr if (rev == 0) { 108592685Sdarrenr is->is_src = fin->fin_fi.fi_src; 108692685Sdarrenr } else { 108792685Sdarrenr is->is_src = fin->fin_fi.fi_dst; 108892685Sdarrenr } 1089102520Sdarrenr } else if ((flags & FI_W_DADDR) != 0) { 109092685Sdarrenr if (rev == 0) { 109192685Sdarrenr is->is_dst = fin->fin_fi.fi_dst; 109292685Sdarrenr } else { 109392685Sdarrenr is->is_dst = fin->fin_fi.fi_src; 109492685Sdarrenr } 109592685Sdarrenr } 109692685Sdarrenr is->is_flags &= ~(FI_W_SADDR|FI_W_DADDR); 109792685Sdarrenr if ((is->is_flags & (FI_WILDA|FI_WILDP)) == 0) 109892685Sdarrenr ips_wild--; 109992685Sdarrenr } 110092685Sdarrenr 110153642Sguido if ((flags & (FI_W_SPORT|FI_W_DPORT))) { 110253642Sguido if ((flags & FI_W_SPORT) != 0) { 110353642Sguido if (rev == 0) { 110453642Sguido is->is_sport = sp; 110553642Sguido is->is_send = htonl(tcp->th_seq); 110653642Sguido } else { 110753642Sguido is->is_sport = dp; 110853642Sguido is->is_send = htonl(tcp->th_ack); 110953642Sguido } 111053642Sguido is->is_maxsend = is->is_send + 1; 111153642Sguido } else if ((flags & FI_W_DPORT) != 0) { 111253642Sguido if (rev == 0) { 111353642Sguido is->is_dport = dp; 111453642Sguido is->is_dend = htonl(tcp->th_ack); 111553642Sguido } else { 111653642Sguido is->is_dport = sp; 111753642Sguido is->is_dend = htonl(tcp->th_seq); 111853642Sguido } 111953642Sguido is->is_maxdend = is->is_dend + 1; 112053642Sguido } 112153642Sguido is->is_flags &= ~(FI_W_SPORT|FI_W_DPORT); 112267614Sdarrenr ips_wild--; 112353642Sguido } 112453642Sguido 112560854Sdarrenr ret = -1; 112660854Sdarrenr 112792685Sdarrenr if (is->is_ifp[idx] == NULL && 112892685Sdarrenr (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) 112992685Sdarrenr ret = idx; 113060854Sdarrenr 113160854Sdarrenr if (ret >= 0) { 113260854Sdarrenr is->is_ifp[ret] = ifp; 113392685Sdarrenr strncpy(is->is_ifname[ret], IFNAME(ifp), 113480482Sdarrenr sizeof(is->is_ifname[ret])); 113560854Sdarrenr } 113680482Sdarrenr fin->fin_rev = rev; 113753642Sguido return 1; 113853642Sguido} 113953642Sguido 1140110916Sdarrenrstatic int fr_matchicmpqueryreply(v, is, icmp, rev) 114160854Sdarrenrint v; 114260854Sdarrenripstate_t *is; 114360854Sdarrenricmphdr_t *icmp; 114460854Sdarrenr{ 114560854Sdarrenr if (v == 4) { 114660854Sdarrenr /* 114760854Sdarrenr * If we matched its type on the way in, then when going out 114860854Sdarrenr * it will still be the same type. 114960854Sdarrenr */ 1150110916Sdarrenr if ((!rev && (icmp->icmp_type == is->is_type)) || 1151110916Sdarrenr (rev && (icmpreplytype4[is->is_type] == icmp->icmp_type))) { 115292685Sdarrenr if (icmp->icmp_type != ICMP_ECHOREPLY) 115392685Sdarrenr return 1; 115492685Sdarrenr if ((icmp->icmp_id == is->is_icmp.ics_id) && 115592685Sdarrenr (icmp->icmp_seq == is->is_icmp.ics_seq)) 115692685Sdarrenr return 1; 115792685Sdarrenr } 115860854Sdarrenr } 115960854Sdarrenr#ifdef USE_INET6 116060854Sdarrenr else if (is->is_v == 6) { 1161110916Sdarrenr if ((!rev && (icmp->icmp_type == is->is_type)) || 1162110916Sdarrenr (rev && (icmpreplytype6[is->is_type] == icmp->icmp_type))) { 116392685Sdarrenr if (icmp->icmp_type != ICMP6_ECHO_REPLY) 116492685Sdarrenr return 1; 116592685Sdarrenr if ((icmp->icmp_id == is->is_icmp.ics_id) && 116692685Sdarrenr (icmp->icmp_seq == is->is_icmp.ics_seq)) 116792685Sdarrenr return 1; 116892685Sdarrenr } 116960854Sdarrenr } 117060854Sdarrenr#endif 117160854Sdarrenr return 0; 117260854Sdarrenr} 117360854Sdarrenr 117460854Sdarrenrstatic frentry_t *fr_checkicmpmatchingstate(ip, fin) 117553642Sguidoip_t *ip; 117653642Sguidofr_info_t *fin; 117753642Sguido{ 117853642Sguido register ipstate_t *is, **isp; 117953642Sguido register u_short sport, dport; 118053642Sguido register u_char pr; 118195418Sdarrenr u_short savelen, ohlen; 118260854Sdarrenr union i6addr dst, src; 118353642Sguido struct icmp *ic; 118464580Sdarrenr icmphdr_t *icmp; 118553642Sguido fr_info_t ofin; 118664580Sdarrenr int type, len; 118753642Sguido tcphdr_t *tcp; 118853642Sguido frentry_t *fr; 118953642Sguido ip_t *oip; 119055929Sguido u_int hv; 119153642Sguido 119257096Sguido /* 119357096Sguido * Does it at least have the return (basic) IP header ? 119453642Sguido * Only a basic IP header (no options) should be with 119553642Sguido * an ICMP error header. 119653642Sguido */ 119767614Sdarrenr if (((ip->ip_v != 4) || (ip->ip_hl != 5)) || 119860854Sdarrenr (fin->fin_plen < ICMPERR_MINPKTLEN)) 119953642Sguido return NULL; 120092685Sdarrenr 120160854Sdarrenr ic = (struct icmp *)fin->fin_dp; 120253642Sguido type = ic->icmp_type; 120353642Sguido /* 120453642Sguido * If it's not an error type, then return 120553642Sguido */ 120653642Sguido if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) && 120753642Sguido (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) && 120853642Sguido (type != ICMP_PARAMPROB)) 120953642Sguido return NULL; 121053642Sguido 121160854Sdarrenr oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 121295418Sdarrenr ohlen = oip->ip_hl << 2; 121395418Sdarrenr if (fin->fin_plen < ICMPERR_MAXPKTLEN + ohlen - sizeof(*oip)) 121453642Sguido return NULL; 121555929Sguido 121664580Sdarrenr /* 121764580Sdarrenr * Sanity checks. 121864580Sdarrenr */ 121964580Sdarrenr len = fin->fin_dlen - ICMPERR_ICMPHLEN; 122095418Sdarrenr if ((len <= 0) || (ohlen > len)) 122164580Sdarrenr return NULL; 122264580Sdarrenr 122364580Sdarrenr /* 122464580Sdarrenr * Is the buffer big enough for all of it ? It's the size of the IP 122564580Sdarrenr * header claimed in the encapsulated part which is of concern. It 122664580Sdarrenr * may be too big to be in this buffer but not so big that it's 122764580Sdarrenr * outside the ICMP packet, leading to TCP deref's causing problems. 122864580Sdarrenr * This is possible because we don't know how big oip_hl is when we 122964580Sdarrenr * do the pullup early in fr_check() and thus can't gaurantee it is 123064580Sdarrenr * all here now. 123164580Sdarrenr */ 123264580Sdarrenr#ifdef _KERNEL 123364580Sdarrenr { 123464580Sdarrenr mb_t *m; 123564580Sdarrenr 123664580Sdarrenr# if SOLARIS 123764580Sdarrenr m = fin->fin_qfm; 123864580Sdarrenr if ((char *)oip + len > (char *)m->b_wptr) 123964580Sdarrenr return NULL; 124064580Sdarrenr# else 124164580Sdarrenr m = *(mb_t **)fin->fin_mp; 124264580Sdarrenr if ((char *)oip + len > (char *)ip + m->m_len) 124364580Sdarrenr return NULL; 124464580Sdarrenr# endif 124564580Sdarrenr } 124664580Sdarrenr#endif 124764580Sdarrenr 124864580Sdarrenr /* 124964580Sdarrenr * in the IPv4 case we must zero the i6addr union otherwise 125064580Sdarrenr * the IP6EQ and IP6NEQ macros produce the wrong results because 125164580Sdarrenr * of the 'junk' in the unused part of the union 125264580Sdarrenr */ 125367614Sdarrenr bzero((char *)&src, sizeof(src)); 125467614Sdarrenr bzero((char *)&dst, sizeof(dst)); 125595563Sdarrenr bzero((char *)&ofin, sizeof(ofin)); 125695563Sdarrenr ofin.fin_ifp = fin->fin_ifp; 125795563Sdarrenr ofin.fin_out = !fin->fin_out; 125895563Sdarrenr ofin.fin_v = 4; 125992685Sdarrenr fr = NULL; 126064580Sdarrenr 126192685Sdarrenr switch (oip->ip_p) 126292685Sdarrenr { 126392685Sdarrenr case IPPROTO_ICMP : 126495418Sdarrenr icmp = (icmphdr_t *)((char *)oip + ohlen); 126555929Sguido 126655929Sguido /* 126755929Sguido * a ICMP error can only be generated as a result of an 126855929Sguido * ICMP query, not as the response on an ICMP error 126955929Sguido * 127055929Sguido * XXX theoretically ICMP_ECHOREP and the other reply's are 127155929Sguido * ICMP query's as well, but adding them here seems strange XXX 127255929Sguido */ 127355929Sguido if ((icmp->icmp_type != ICMP_ECHO) && 127455929Sguido (icmp->icmp_type != ICMP_TSTAMP) && 127555929Sguido (icmp->icmp_type != ICMP_IREQ) && 127657096Sguido (icmp->icmp_type != ICMP_MASKREQ)) 127755929Sguido return NULL; 127855929Sguido 127957096Sguido /* 128055929Sguido * perform a lookup of the ICMP packet in the state table 128155929Sguido */ 128255929Sguido hv = (pr = oip->ip_p); 128360854Sdarrenr src.in4 = oip->ip_src; 128460854Sdarrenr hv += src.in4.s_addr; 128560854Sdarrenr dst.in4 = oip->ip_dst; 128660854Sdarrenr hv += dst.in4.s_addr; 128760854Sdarrenr hv += icmp->icmp_id; 128860854Sdarrenr hv += icmp->icmp_seq; 128955929Sguido hv %= fr_statesize; 129055929Sguido 129164580Sdarrenr savelen = oip->ip_len; 129264580Sdarrenr oip->ip_len = len; 129395418Sdarrenr fr_makefrip(ohlen, oip, &ofin); 129464580Sdarrenr oip->ip_len = savelen; 129555929Sguido 129655929Sguido READ_ENTER(&ipf_state); 129760854Sdarrenr for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) 129860854Sdarrenr if ((is->is_p == pr) && (is->is_v == 4) && 129960854Sdarrenr fr_matchsrcdst(is, src, dst, &ofin, NULL) && 1300110916Sdarrenr fr_matchicmpqueryreply(is->is_v, is, icmp, fin->fin_rev)) { 130160854Sdarrenr ips_stats.iss_hits++; 130260854Sdarrenr is->is_pkts++; 130360854Sdarrenr is->is_bytes += ip->ip_len; 130460854Sdarrenr fr = is->is_rule; 130592685Sdarrenr break; 130655929Sguido } 130755929Sguido RWLOCK_EXIT(&ipf_state); 130892685Sdarrenr return fr; 130992685Sdarrenr 131092685Sdarrenr case IPPROTO_TCP : 131192685Sdarrenr case IPPROTO_UDP : 131295418Sdarrenr if (fin->fin_plen < ICMPERR_MAXPKTLEN) 131395418Sdarrenr return NULL; 131492685Sdarrenr break; 131592685Sdarrenr default : 131655929Sguido return NULL; 131792685Sdarrenr } 131855929Sguido 131995418Sdarrenr tcp = (tcphdr_t *)((char *)oip + ohlen); 132053642Sguido dport = tcp->th_dport; 132153642Sguido sport = tcp->th_sport; 132253642Sguido 132353642Sguido hv = (pr = oip->ip_p); 132460854Sdarrenr src.in4 = oip->ip_src; 132560854Sdarrenr hv += src.in4.s_addr; 132660854Sdarrenr dst.in4 = oip->ip_dst; 132760854Sdarrenr hv += dst.in4.s_addr; 132853642Sguido hv += dport; 132953642Sguido hv += sport; 133053642Sguido hv %= fr_statesize; 133153642Sguido /* 133253642Sguido * we make an fin entry to be able to feed it to 133353642Sguido * matchsrcdst note that not all fields are encessary 133453642Sguido * but this is the cleanest way. Note further we fill 133553642Sguido * in fin_mp such that if someone uses it we'll get 133653642Sguido * a kernel panic. fr_matchsrcdst does not use this. 133753642Sguido * 133853642Sguido * watch out here, as ip is in host order and oip in network 133953642Sguido * order. Any change we make must be undone afterwards. 134053642Sguido */ 134155929Sguido savelen = oip->ip_len; 134264580Sdarrenr oip->ip_len = len; 134395418Sdarrenr fr_makefrip(ohlen, oip, &ofin); 134455929Sguido oip->ip_len = savelen; 134553642Sguido READ_ENTER(&ipf_state); 134660854Sdarrenr for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) { 134753642Sguido /* 134853642Sguido * Only allow this icmp though if the 134953642Sguido * encapsulated packet was allowed through the 135053642Sguido * other way around. Note that the minimal amount 135153642Sguido * of info present does not allow for checking against 135253642Sguido * tcp internals such as seq and ack numbers. 135353642Sguido */ 135460854Sdarrenr if ((is->is_p == pr) && (is->is_v == 4) && 135553642Sguido fr_matchsrcdst(is, src, dst, &ofin, tcp)) { 135653642Sguido fr = is->is_rule; 135753642Sguido ips_stats.iss_hits++; 135853642Sguido is->is_pkts++; 135960854Sdarrenr is->is_bytes += fin->fin_plen; 136053642Sguido /* 136153642Sguido * we deliberately do not touch the timeouts 136253642Sguido * for the accompanying state table entry. 136353642Sguido * It remains to be seen if that is correct. XXX 136453642Sguido */ 136592685Sdarrenr break; 136653642Sguido } 136753642Sguido } 136853642Sguido RWLOCK_EXIT(&ipf_state); 136992685Sdarrenr return fr; 137053642Sguido} 137153642Sguido 137267614Sdarrenr 137392685Sdarrenr/* 137492685Sdarrenr * Move a state hash table entry from its old location at is->is_hv to 137592685Sdarrenr * its new location, indexed by hv % fr_statesize. 137692685Sdarrenr */ 137767614Sdarrenrstatic void fr_ipsmove(isp, is, hv) 137867614Sdarrenripstate_t **isp, *is; 137967614Sdarrenru_int hv; 138067614Sdarrenr{ 138167614Sdarrenr u_int hvm; 138267614Sdarrenr 138367614Sdarrenr hvm = is->is_hv; 138467614Sdarrenr /* 138567614Sdarrenr * Remove the hash from the old location... 138667614Sdarrenr */ 138767614Sdarrenr if (is->is_hnext) 138867614Sdarrenr is->is_hnext->is_phnext = isp; 138967614Sdarrenr *isp = is->is_hnext; 139067614Sdarrenr if (ips_table[hvm] == NULL) 139167614Sdarrenr ips_stats.iss_inuse--; 139267614Sdarrenr 139367614Sdarrenr /* 139467614Sdarrenr * ...and put the hash in the new one. 139567614Sdarrenr */ 139667614Sdarrenr hvm = hv % fr_statesize; 139767853Sdarrenr is->is_hv = hvm; 139867614Sdarrenr isp = &ips_table[hvm]; 139967614Sdarrenr if (*isp) 140067614Sdarrenr (*isp)->is_phnext = &is->is_hnext; 140167614Sdarrenr else 140267614Sdarrenr ips_stats.iss_inuse++; 140367614Sdarrenr is->is_phnext = isp; 140467614Sdarrenr is->is_hnext = *isp; 140567614Sdarrenr *isp = is; 140667614Sdarrenr} 140767614Sdarrenr 140867614Sdarrenr 140953642Sguido/* 141053642Sguido * Check if a packet has a registered state. 141153642Sguido */ 141253642Sguidofrentry_t *fr_checkstate(ip, fin) 141353642Sguidoip_t *ip; 141453642Sguidofr_info_t *fin; 141553642Sguido{ 141660854Sdarrenr union i6addr dst, src; 141753642Sguido register ipstate_t *is, **isp; 141853642Sguido register u_char pr; 141960854Sdarrenr u_int hv, hvm, hlen, tryagain, pass, v; 142053642Sguido struct icmp *ic; 142153642Sguido frentry_t *fr; 142253642Sguido tcphdr_t *tcp; 142392685Sdarrenr int rev; 142453642Sguido 1425102520Sdarrenr if ((ips_list == NULL) || (fin->fin_off != 0) || fr_state_lock || 1426102520Sdarrenr (fin->fin_fl & FI_SHORT)) 142753642Sguido return NULL; 142853642Sguido 142953642Sguido is = NULL; 143053642Sguido hlen = fin->fin_hlen; 143153642Sguido tcp = (tcphdr_t *)((char *)ip + hlen); 143253642Sguido ic = (struct icmp *)tcp; 143360854Sdarrenr hv = (pr = fin->fin_fi.fi_p); 143460854Sdarrenr src = fin->fin_fi.fi_src; 143560854Sdarrenr dst = fin->fin_fi.fi_dst; 143660854Sdarrenr hv += src.in4.s_addr; 143760854Sdarrenr hv += dst.in4.s_addr; 143853642Sguido 143953642Sguido /* 144053642Sguido * Search the hash table for matching packet header info. 144192685Sdarrenr * At the bottom of this switch statement, the following is expected: 144292685Sdarrenr * is == NULL, no lock on ipf_state is held. 144392685Sdarrenr * is != NULL, a lock on ipf_state is held. 144453642Sguido */ 144560854Sdarrenr v = fin->fin_fi.fi_v; 144692685Sdarrenr#ifdef USE_INET6 144792685Sdarrenr if (v == 6) { 144892685Sdarrenr hv += fin->fin_fi.fi_src.i6[1]; 144992685Sdarrenr hv += fin->fin_fi.fi_src.i6[2]; 145092685Sdarrenr hv += fin->fin_fi.fi_src.i6[3]; 145192685Sdarrenr 145292685Sdarrenr if ((fin->fin_p == IPPROTO_ICMPV6) && 145392685Sdarrenr IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 145492685Sdarrenr hv -= dst.in4.s_addr; 145592685Sdarrenr } else { 145692685Sdarrenr hv += fin->fin_fi.fi_dst.i6[1]; 145792685Sdarrenr hv += fin->fin_fi.fi_dst.i6[2]; 145892685Sdarrenr hv += fin->fin_fi.fi_dst.i6[3]; 145992685Sdarrenr } 146092685Sdarrenr } 146192685Sdarrenr#endif 146292685Sdarrenr 146392685Sdarrenr switch (fin->fin_p) 146453642Sguido { 146560854Sdarrenr#ifdef USE_INET6 146660854Sdarrenr case IPPROTO_ICMPV6 : 146792685Sdarrenr tcp = NULL; 146892685Sdarrenr tryagain = 0; 146960854Sdarrenr if (v == 6) { 147060854Sdarrenr if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 147160854Sdarrenr (ic->icmp_type == ICMP6_ECHO_REPLY)) { 147260854Sdarrenr hv += ic->icmp_id; 147360854Sdarrenr hv += ic->icmp_seq; 147460854Sdarrenr } 147560854Sdarrenr } 147692685Sdarrenr READ_ENTER(&ipf_state); 147792685Sdarrenricmp6again: 147892685Sdarrenr hvm = hv % fr_statesize; 147992685Sdarrenr for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext) 148092685Sdarrenr if ((is->is_p == pr) && (is->is_v == v) && 148192685Sdarrenr fr_matchsrcdst(is, src, dst, fin, NULL) && 1482110916Sdarrenr fr_matchicmpqueryreply(v, is, ic, fin->fin_rev)) { 148392685Sdarrenr rev = fin->fin_rev; 148492685Sdarrenr if (is->is_frage[rev] != 0) 148592685Sdarrenr is->is_age = is->is_frage[rev]; 148698004Sdarrenr else if (rev != 0) 148792685Sdarrenr is->is_age = fr_icmpacktimeout; 148892685Sdarrenr else 148992685Sdarrenr is->is_age = fr_icmptimeout; 149092685Sdarrenr break; 149192685Sdarrenr } 149292685Sdarrenr 149392685Sdarrenr if (is != NULL) { 149492685Sdarrenr if (tryagain && !(is->is_flags & FI_W_DADDR)) { 149592685Sdarrenr hv += fin->fin_fi.fi_src.i6[0]; 149692685Sdarrenr hv += fin->fin_fi.fi_src.i6[1]; 149792685Sdarrenr hv += fin->fin_fi.fi_src.i6[2]; 149892685Sdarrenr hv += fin->fin_fi.fi_src.i6[3]; 149992685Sdarrenr fr_ipsmove(isp, is, hv); 150092685Sdarrenr MUTEX_DOWNGRADE(&ipf_state); 150192685Sdarrenr } 150292685Sdarrenr break; 150392685Sdarrenr } 150492685Sdarrenr RWLOCK_EXIT(&ipf_state); 150592685Sdarrenr 150692685Sdarrenr /* 150792685Sdarrenr * No matching icmp state entry. Perhaps this is a 150892685Sdarrenr * response to another state entry. 150992685Sdarrenr */ 151092685Sdarrenr if ((ips_wild != 0) && (v == 6) && (tryagain == 0) && 151192685Sdarrenr !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 151292685Sdarrenr hv -= fin->fin_fi.fi_src.i6[0]; 151392685Sdarrenr hv -= fin->fin_fi.fi_src.i6[1]; 151492685Sdarrenr hv -= fin->fin_fi.fi_src.i6[2]; 151592685Sdarrenr hv -= fin->fin_fi.fi_src.i6[3]; 151692685Sdarrenr tryagain = 1; 151792685Sdarrenr WRITE_ENTER(&ipf_state); 151892685Sdarrenr goto icmp6again; 151992685Sdarrenr } 152092685Sdarrenr 152192685Sdarrenr fr = fr_checkicmp6matchingstate((ip6_t *)ip, fin); 152292685Sdarrenr if (fr) 152392685Sdarrenr return fr; 152492685Sdarrenr break; 152560854Sdarrenr#endif 152653642Sguido case IPPROTO_ICMP : 152792685Sdarrenr tcp = NULL; 152860854Sdarrenr if (v == 4) { 152955929Sguido hv += ic->icmp_id; 153055929Sguido hv += ic->icmp_seq; 153155929Sguido } 153292685Sdarrenr hvm = hv % fr_statesize; 153353642Sguido READ_ENTER(&ipf_state); 153492685Sdarrenr for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext) 153560854Sdarrenr if ((is->is_p == pr) && (is->is_v == v) && 153660854Sdarrenr fr_matchsrcdst(is, src, dst, fin, NULL) && 1537110916Sdarrenr fr_matchicmpqueryreply(v, is, ic, fin->fin_rev)) { 153892685Sdarrenr rev = fin->fin_rev; 153992685Sdarrenr if (is->is_frage[rev] != 0) 154092685Sdarrenr is->is_age = is->is_frage[rev]; 154192685Sdarrenr else if (fin->fin_rev) 154280482Sdarrenr is->is_age = fr_icmpacktimeout; 154380482Sdarrenr else 154480482Sdarrenr is->is_age = fr_icmptimeout; 154553642Sguido break; 154653642Sguido } 154792685Sdarrenr 154853642Sguido if (is != NULL) 154953642Sguido break; 155053642Sguido RWLOCK_EXIT(&ipf_state); 155153642Sguido /* 155253642Sguido * No matching icmp state entry. Perhaps this is a 155353642Sguido * response to another state entry. 155453642Sguido */ 155592685Sdarrenr fr = fr_checkicmpmatchingstate(ip, fin); 155653642Sguido if (fr) 155753642Sguido return fr; 155853642Sguido break; 155953642Sguido case IPPROTO_TCP : 156064580Sdarrenr /* 156164580Sdarrenr * Just plain ignore RST flag set with either FIN or SYN. 156264580Sdarrenr */ 156392685Sdarrenr if ((tcp->th_flags & TH_RST) && 156492685Sdarrenr ((tcp->th_flags & (TH_FIN|TH_SYN|TH_RST)) != TH_RST)) 156564580Sdarrenr break; 156667614Sdarrenr case IPPROTO_UDP : 156792685Sdarrenr { 156892685Sdarrenr register u_short dport, sport; 156992685Sdarrenr 157067614Sdarrenr dport = tcp->th_dport; 157167614Sdarrenr sport = tcp->th_sport; 157253642Sguido tryagain = 0; 157353642Sguido hv += dport; 157453642Sguido hv += sport; 157567614Sdarrenr READ_ENTER(&ipf_state); 157667614Sdarrenrretry_tcpudp: 157753642Sguido hvm = hv % fr_statesize; 157867614Sdarrenr for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext) 157960854Sdarrenr if ((is->is_p == pr) && (is->is_v == v) && 158053642Sguido fr_matchsrcdst(is, src, dst, fin, tcp)) { 158192685Sdarrenr rev = fin->fin_rev; 158267614Sdarrenr if ((pr == IPPROTO_TCP)) { 158395418Sdarrenr if (!fr_tcpstate(is, fin, ip, tcp)) 158495418Sdarrenr is = NULL; 158580482Sdarrenr } else if ((pr == IPPROTO_UDP)) { 158692685Sdarrenr if (is->is_frage[rev] != 0) 158792685Sdarrenr is->is_age = is->is_frage[rev]; 158892685Sdarrenr else if (fin->fin_rev) 158980482Sdarrenr is->is_age = fr_udpacktimeout; 159080482Sdarrenr else 159180482Sdarrenr is->is_age = fr_udptimeout; 159267614Sdarrenr } 159353642Sguido break; 159453642Sguido } 159567614Sdarrenr if (is != NULL) { 159667614Sdarrenr if (tryagain && 159767614Sdarrenr !(is->is_flags & (FI_WILDP|FI_WILDA))) { 159867614Sdarrenr hv += dport; 159967614Sdarrenr hv += sport; 160067614Sdarrenr fr_ipsmove(isp, is, hv); 160167614Sdarrenr MUTEX_DOWNGRADE(&ipf_state); 160267614Sdarrenr } 160353642Sguido break; 160467614Sdarrenr } 160595418Sdarrenr 160653642Sguido RWLOCK_EXIT(&ipf_state); 160767614Sdarrenr if (!tryagain && ips_wild) { 160867614Sdarrenr hv -= dport; 160967614Sdarrenr hv -= sport; 161053642Sguido tryagain = 1; 161167614Sdarrenr WRITE_ENTER(&ipf_state); 161267614Sdarrenr goto retry_tcpudp; 161353642Sguido } 161453642Sguido break; 161553642Sguido } 161653642Sguido default : 161792685Sdarrenr tcp = NULL; 161892685Sdarrenr hv %= fr_statesize; 161992685Sdarrenr READ_ENTER(&ipf_state); 162092685Sdarrenr for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) { 162192685Sdarrenr if ((is->is_p == pr) && (is->is_v == v) && 162292685Sdarrenr fr_matchsrcdst(is, src, dst, fin, NULL)) { 162392685Sdarrenr rev = fin->fin_rev; 162492685Sdarrenr if (is->is_frage[rev] != 0) 162592685Sdarrenr is->is_age = is->is_frage[rev]; 162692685Sdarrenr else 162792685Sdarrenr is->is_age = fr_udptimeout; 162892685Sdarrenr break; 162992685Sdarrenr } 163092685Sdarrenr } 163192685Sdarrenr if (is == NULL) { 163292685Sdarrenr RWLOCK_EXIT(&ipf_state); 163392685Sdarrenr } 163453642Sguido break; 163553642Sguido } 163692685Sdarrenr 163753642Sguido if (is == NULL) { 163860854Sdarrenr ATOMIC_INCL(ips_stats.iss_miss); 163953642Sguido return NULL; 164053642Sguido } 164192685Sdarrenr 164260854Sdarrenr MUTEX_ENTER(&is->is_lock); 164360854Sdarrenr is->is_bytes += fin->fin_plen; 164453642Sguido ips_stats.iss_hits++; 164553642Sguido is->is_pkts++; 164660854Sdarrenr MUTEX_EXIT(&is->is_lock); 164753642Sguido fr = is->is_rule; 164892685Sdarrenr fin->fin_rule = is->is_rulen; 164992685Sdarrenr if (fr != NULL) { 165092685Sdarrenr fin->fin_group = fr->fr_group; 165192685Sdarrenr fin->fin_icode = fr->fr_icode; 165292685Sdarrenr } 165353642Sguido fin->fin_fr = fr; 165453642Sguido pass = is->is_pass; 165592685Sdarrenr RWLOCK_EXIT(&ipf_state); 165692685Sdarrenr if ((fin->fin_fl & FI_FRAG) && (pass & FR_KEEPFRAG)) 1657102520Sdarrenr ipfr_newfrag(ip, fin); 165860854Sdarrenr#ifndef _KERNEL 165992685Sdarrenr if ((tcp != NULL) && (tcp->th_flags & TCP_CLOSE)) 166060854Sdarrenr fr_delstate(is); 166160854Sdarrenr#endif 166253642Sguido return fr; 166353642Sguido} 166453642Sguido 166553642Sguido 166692685Sdarrenr/* 166792685Sdarrenr * Sync. state entries. If interfaces come or go or just change position, 166892685Sdarrenr * this is needed. 166992685Sdarrenr */ 167060854Sdarrenrvoid ip_statesync(ifp) 167160854Sdarrenrvoid *ifp; 167260854Sdarrenr{ 167360854Sdarrenr register ipstate_t *is; 167492685Sdarrenr int i; 167560854Sdarrenr 167660854Sdarrenr WRITE_ENTER(&ipf_state); 167760854Sdarrenr for (is = ips_list; is; is = is->is_next) { 167892685Sdarrenr for (i = 0; i < 4; i++) { 167992685Sdarrenr if (is->is_ifp[i] == ifp) { 168092685Sdarrenr is->is_ifpin = GETUNIT(is->is_ifname[i], 168192685Sdarrenr is->is_v); 168292685Sdarrenr if (!is->is_ifp[i]) 168392685Sdarrenr is->is_ifp[i] = (void *)-1; 168492685Sdarrenr } 168560854Sdarrenr } 168660854Sdarrenr } 168760854Sdarrenr RWLOCK_EXIT(&ipf_state); 168860854Sdarrenr} 168960854Sdarrenr 169060854Sdarrenr 169172006Sdarrenr/* 169272006Sdarrenr * Must always be called with fr_ipfstate held as a write lock. 169372006Sdarrenr */ 169453642Sguidostatic void fr_delstate(is) 169553642Sguidoipstate_t *is; 169653642Sguido{ 169753642Sguido frentry_t *fr; 169853642Sguido 169967614Sdarrenr if (is->is_flags & (FI_WILDP|FI_WILDA)) 170067614Sdarrenr ips_wild--; 170160854Sdarrenr if (is->is_next) 170260854Sdarrenr is->is_next->is_pnext = is->is_pnext; 170360854Sdarrenr *is->is_pnext = is->is_next; 170460854Sdarrenr if (is->is_hnext) 170560854Sdarrenr is->is_hnext->is_phnext = is->is_phnext; 170660854Sdarrenr *is->is_phnext = is->is_hnext; 170760854Sdarrenr if (ips_table[is->is_hv] == NULL) 170860854Sdarrenr ips_stats.iss_inuse--; 170992685Sdarrenr if (is->is_me) 171092685Sdarrenr *is->is_me = NULL; 171160854Sdarrenr 171253642Sguido fr = is->is_rule; 171353642Sguido if (fr != NULL) { 171472006Sdarrenr fr->fr_ref--; 171572006Sdarrenr if (fr->fr_ref == 0) { 171653642Sguido KFREE(fr); 171772006Sdarrenr } 171853642Sguido } 171960854Sdarrenr#ifdef _KERNEL 172060854Sdarrenr MUTEX_DESTROY(&is->is_lock); 172160854Sdarrenr#endif 172253642Sguido KFREE(is); 172360854Sdarrenr ips_num--; 172453642Sguido} 172553642Sguido 172653642Sguido 172753642Sguido/* 172853642Sguido * Free memory in use by all state info. kept. 172953642Sguido */ 173053642Sguidovoid fr_stateunload() 173153642Sguido{ 173260854Sdarrenr register ipstate_t *is; 173353642Sguido 173453642Sguido WRITE_ENTER(&ipf_state); 173560854Sdarrenr while ((is = ips_list)) 173660854Sdarrenr fr_delstate(is); 173753642Sguido ips_stats.iss_inuse = 0; 173853642Sguido ips_num = 0; 173953642Sguido RWLOCK_EXIT(&ipf_state); 174080482Sdarrenr if (ips_table) 174180482Sdarrenr KFREES(ips_table, fr_statesize * sizeof(ipstate_t *)); 174253642Sguido ips_table = NULL; 174353642Sguido} 174453642Sguido 174553642Sguido 174653642Sguido/* 174753642Sguido * Slowly expire held state for thingslike UDP and ICMP. Timeouts are set 174853642Sguido * in expectation of this being called twice per second. 174953642Sguido */ 175053642Sguidovoid fr_timeoutstate() 175153642Sguido{ 175253642Sguido register ipstate_t *is, **isp; 175353642Sguido#if defined(_KERNEL) && !SOLARIS 175453642Sguido int s; 175553642Sguido#endif 175653642Sguido 175753642Sguido SPL_NET(s); 175853642Sguido WRITE_ENTER(&ipf_state); 175960854Sdarrenr for (isp = &ips_list; (is = *isp); ) 176060854Sdarrenr if (is->is_age && !--is->is_age) { 176160854Sdarrenr if (is->is_p == IPPROTO_TCP) 176260854Sdarrenr ips_stats.iss_fin++; 176360854Sdarrenr else 176460854Sdarrenr ips_stats.iss_expire++; 176553642Sguido#ifdef IPFILTER_LOG 176660854Sdarrenr ipstate_log(is, ISL_EXPIRE); 176753642Sguido#endif 176860854Sdarrenr fr_delstate(is); 176960854Sdarrenr } else 177060854Sdarrenr isp = &is->is_next; 177155929Sguido if (fr_state_doflush) { 1772110916Sdarrenr (void) fr_state_flush(2, 0); 177355929Sguido fr_state_doflush = 0; 177455929Sguido } 177572006Sdarrenr RWLOCK_EXIT(&ipf_state); 177672006Sdarrenr SPL_X(s); 177753642Sguido} 177853642Sguido 177953642Sguido 178053642Sguido/* 178153642Sguido * Original idea freom Pradeep Krishnan for use primarily with NAT code. 178253642Sguido * (pkrishna@netcom.com) 178364580Sdarrenr * 178464580Sdarrenr * Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: 178564580Sdarrenr * 178664580Sdarrenr * - (try to) base state transitions on real evidence only, 178764580Sdarrenr * i.e. packets that are sent and have been received by ipfilter; 178864580Sdarrenr * diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. 178964580Sdarrenr * 179064580Sdarrenr * - deal with half-closed connections correctly; 179164580Sdarrenr * 179264580Sdarrenr * - store the state of the source in state[0] such that ipfstat 179364580Sdarrenr * displays the state as source/dest instead of dest/source; the calls 179464580Sdarrenr * to fr_tcp_age have been changed accordingly. 179564580Sdarrenr * 179664580Sdarrenr * Parameters: 179764580Sdarrenr * 179864580Sdarrenr * state[0] = state of source (host that initiated connection) 179964580Sdarrenr * state[1] = state of dest (host that accepted the connection) 180064580Sdarrenr * 180164580Sdarrenr * dir == 0 : a packet from source to dest 180264580Sdarrenr * dir == 1 : a packet from dest to source 180364580Sdarrenr * 180453642Sguido */ 180595418Sdarrenrint fr_tcp_age(age, state, fin, dir, fsm) 180653642Sguidou_long *age; 180753642Sguidou_char *state; 180853642Sguidofr_info_t *fin; 180995418Sdarrenrint dir, fsm; 181053642Sguido{ 181153642Sguido tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp; 181253642Sguido u_char flags = tcp->th_flags; 181353642Sguido int dlen, ostate; 181495418Sdarrenr u_long newage; 181553642Sguido 181653642Sguido ostate = state[1 - dir]; 181753642Sguido 181860854Sdarrenr dlen = fin->fin_plen - fin->fin_hlen - (tcp->th_off << 2); 181953642Sguido 182053642Sguido if (flags & TH_RST) { 182153642Sguido if (!(tcp->th_flags & TH_PUSH) && !dlen) { 182253642Sguido *age = fr_tcpclosed; 182353642Sguido state[dir] = TCPS_CLOSED; 182453642Sguido } else { 182553642Sguido *age = fr_tcpclosewait; 182653642Sguido state[dir] = TCPS_CLOSE_WAIT; 182753642Sguido } 182895418Sdarrenr return 0; 182953642Sguido } 183053642Sguido 183195418Sdarrenr newage = 0; 183253642Sguido 183353642Sguido switch(state[dir]) 183453642Sguido { 183564580Sdarrenr case TCPS_CLOSED: /* 0 */ 183664580Sdarrenr if ((flags & TH_OPENING) == TH_OPENING) { 183764580Sdarrenr /* 183864580Sdarrenr * 'dir' received an S and sends SA in response, 183964580Sdarrenr * CLOSED -> SYN_RECEIVED 184064580Sdarrenr */ 184164580Sdarrenr state[dir] = TCPS_SYN_RECEIVED; 184295418Sdarrenr newage = fr_tcptimeout; 184395418Sdarrenr } else if ((flags & TH_OPENING) == TH_SYN) { 184464580Sdarrenr /* 'dir' sent S, CLOSED -> SYN_SENT */ 184564580Sdarrenr state[dir] = TCPS_SYN_SENT; 184695418Sdarrenr newage = fr_tcptimeout; 184764580Sdarrenr } 184864580Sdarrenr /* 184964580Sdarrenr * The next piece of code makes it possible to get 185064580Sdarrenr * already established connections into the state table 185164580Sdarrenr * after a restart or reload of the filter rules; this 185264580Sdarrenr * does not work when a strict 'flags S keep state' is 185364580Sdarrenr * used for tcp connections of course 185464580Sdarrenr */ 185595418Sdarrenr if (!fsm && (flags & (TH_FIN|TH_SYN|TH_RST|TH_ACK)) == TH_ACK) { 185664580Sdarrenr /* we saw an A, guess 'dir' is in ESTABLISHED mode */ 185792685Sdarrenr if (state[1 - dir] == TCPS_CLOSED || 185892685Sdarrenr state[1 - dir] == TCPS_ESTABLISHED) { 185992685Sdarrenr state[dir] = TCPS_ESTABLISHED; 186095418Sdarrenr newage = fr_tcpidletimeout; 186192685Sdarrenr } 186253642Sguido } 186364580Sdarrenr /* 186464580Sdarrenr * TODO: besides regular ACK packets we can have other 186564580Sdarrenr * packets as well; it is yet to be determined how we 186664580Sdarrenr * should initialize the states in those cases 186764580Sdarrenr */ 186864580Sdarrenr break; 186964580Sdarrenr 187064580Sdarrenr case TCPS_LISTEN: /* 1 */ 187164580Sdarrenr /* NOT USED */ 187264580Sdarrenr break; 187364580Sdarrenr 187464580Sdarrenr case TCPS_SYN_SENT: /* 2 */ 187595563Sdarrenr if ((flags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 187664580Sdarrenr /* 187795418Sdarrenr * A retransmitted SYN packet. We do not reset the 187895418Sdarrenr * timeout here to fr_tcptimeout because a connection 187995418Sdarrenr * connect timeout does not renew after every packet 188095418Sdarrenr * that is sent. We need to set newage to something 188195418Sdarrenr * to indicate the packet has passed the check for its 188295418Sdarrenr * flags being valid in the TCP FSM. 188395418Sdarrenr */ 188495418Sdarrenr newage = *age; 188595418Sdarrenr } else if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 188695418Sdarrenr /* 188764580Sdarrenr * We see an A from 'dir' which is in SYN_SENT 188864580Sdarrenr * state: 'dir' sent an A in response to an SA 188964580Sdarrenr * which it received, SYN_SENT -> ESTABLISHED 189064580Sdarrenr */ 189164580Sdarrenr state[dir] = TCPS_ESTABLISHED; 189295418Sdarrenr newage = fr_tcpidletimeout; 189364580Sdarrenr } else if (flags & TH_FIN) { 189464580Sdarrenr /* 189564580Sdarrenr * We see an F from 'dir' which is in SYN_SENT 189664580Sdarrenr * state and wants to close its side of the 189764580Sdarrenr * connection; SYN_SENT -> FIN_WAIT_1 189864580Sdarrenr */ 189964580Sdarrenr state[dir] = TCPS_FIN_WAIT_1; 190095418Sdarrenr newage = fr_tcpidletimeout; /* or fr_tcptimeout? */ 190164580Sdarrenr } else if ((flags & TH_OPENING) == TH_OPENING) { 190264580Sdarrenr /* 190364580Sdarrenr * We see an SA from 'dir' which is already in 190464580Sdarrenr * SYN_SENT state, this means we have a 190564580Sdarrenr * simultaneous open; SYN_SENT -> SYN_RECEIVED 190664580Sdarrenr */ 190753642Sguido state[dir] = TCPS_SYN_RECEIVED; 190895418Sdarrenr newage = fr_tcptimeout; 190964580Sdarrenr } 191053642Sguido break; 191164580Sdarrenr 191264580Sdarrenr case TCPS_SYN_RECEIVED: /* 3 */ 191364580Sdarrenr if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 191464580Sdarrenr /* 191564580Sdarrenr * We see an A from 'dir' which was in SYN_RECEIVED 191664580Sdarrenr * state so it must now be in established state, 191764580Sdarrenr * SYN_RECEIVED -> ESTABLISHED 191864580Sdarrenr */ 191953642Sguido state[dir] = TCPS_ESTABLISHED; 192095418Sdarrenr newage = fr_tcpidletimeout; 192195563Sdarrenr } else if ((flags & ~(TH_ECN|TH_CWR)) == TH_OPENING) { 192295563Sdarrenr /* 192395563Sdarrenr * We see an SA from 'dir' which is already in 192495563Sdarrenr * SYN_RECEIVED state. 192595563Sdarrenr */ 192695563Sdarrenr newage = fr_tcptimeout; 192764580Sdarrenr } else if (flags & TH_FIN) { 192864580Sdarrenr /* 192964580Sdarrenr * We see an F from 'dir' which is in SYN_RECEIVED 193064580Sdarrenr * state and wants to close its side of the connection; 193164580Sdarrenr * SYN_RECEIVED -> FIN_WAIT_1 193264580Sdarrenr */ 193364580Sdarrenr state[dir] = TCPS_FIN_WAIT_1; 193495418Sdarrenr newage = fr_tcpidletimeout; 193553642Sguido } 193653642Sguido break; 193764580Sdarrenr 193864580Sdarrenr case TCPS_ESTABLISHED: /* 4 */ 193953642Sguido if (flags & TH_FIN) { 194064580Sdarrenr /* 194164580Sdarrenr * 'dir' closed its side of the connection; this 194264580Sdarrenr * gives us a half-closed connection; 194364580Sdarrenr * ESTABLISHED -> FIN_WAIT_1 194464580Sdarrenr */ 194564580Sdarrenr state[dir] = TCPS_FIN_WAIT_1; 194695418Sdarrenr newage = fr_tcphalfclosed; 194764580Sdarrenr } else if (flags & TH_ACK) { 194864580Sdarrenr /* an ACK, should we exclude other flags here? */ 194964580Sdarrenr if (ostate == TCPS_FIN_WAIT_1) { 195064580Sdarrenr /* 195164580Sdarrenr * We know the other side did an active close, 195264580Sdarrenr * so we are ACKing the recvd FIN packet (does 195364580Sdarrenr * the window matching code guarantee this?) 195464580Sdarrenr * and go into CLOSE_WAIT state; this gives us 195564580Sdarrenr * a half-closed connection 195664580Sdarrenr */ 195764580Sdarrenr state[dir] = TCPS_CLOSE_WAIT; 195895418Sdarrenr newage = fr_tcphalfclosed; 195964580Sdarrenr } else if (ostate < TCPS_CLOSE_WAIT) 196064580Sdarrenr /* 196164580Sdarrenr * Still a fully established connection, 196264580Sdarrenr * reset timeout 196364580Sdarrenr */ 196495418Sdarrenr newage = fr_tcpidletimeout; 196553642Sguido } 196653642Sguido break; 196764580Sdarrenr 196864580Sdarrenr case TCPS_CLOSE_WAIT: /* 5 */ 196964580Sdarrenr if (flags & TH_FIN) { 197064580Sdarrenr /* 197164580Sdarrenr * Application closed and 'dir' sent a FIN, we're now 197264580Sdarrenr * going into LAST_ACK state 197364580Sdarrenr */ 197495418Sdarrenr newage = fr_tcplastack; 197553642Sguido state[dir] = TCPS_LAST_ACK; 197664580Sdarrenr } else { 197764580Sdarrenr /* 197864580Sdarrenr * We remain in CLOSE_WAIT because the other side has 197964580Sdarrenr * closed already and we did not close our side yet; 198064580Sdarrenr * reset timeout 198164580Sdarrenr */ 198295418Sdarrenr newage = fr_tcphalfclosed; 198364580Sdarrenr } 198464580Sdarrenr break; 198564580Sdarrenr 198664580Sdarrenr case TCPS_FIN_WAIT_1: /* 6 */ 198764580Sdarrenr if ((flags & TH_ACK) && ostate > TCPS_CLOSE_WAIT) { 198864580Sdarrenr /* 198964580Sdarrenr * If the other side is not active anymore it has sent 199064580Sdarrenr * us a FIN packet that we are ack'ing now with an ACK; 199164580Sdarrenr * this means both sides have now closed the connection 199264580Sdarrenr * and we go into TIME_WAIT 199364580Sdarrenr */ 199464580Sdarrenr /* 199564580Sdarrenr * XXX: how do we know we really are ACKing the FIN 199664580Sdarrenr * packet here? does the window code guarantee that? 199764580Sdarrenr */ 199864580Sdarrenr state[dir] = TCPS_TIME_WAIT; 199995418Sdarrenr newage = fr_tcptimeout; 200053642Sguido } else 200164580Sdarrenr /* 200264580Sdarrenr * We closed our side of the connection already but the 200364580Sdarrenr * other side is still active (ESTABLISHED/CLOSE_WAIT); 200464580Sdarrenr * continue with this half-closed connection 200564580Sdarrenr */ 200695418Sdarrenr newage = fr_tcphalfclosed; 200753642Sguido break; 200864580Sdarrenr 200964580Sdarrenr case TCPS_CLOSING: /* 7 */ 201064580Sdarrenr /* NOT USED */ 201164580Sdarrenr break; 201264580Sdarrenr 201364580Sdarrenr case TCPS_LAST_ACK: /* 8 */ 201453642Sguido if (flags & TH_ACK) { 201564580Sdarrenr if ((flags & TH_PUSH) || dlen) 201664580Sdarrenr /* 201764580Sdarrenr * There is still data to be delivered, reset 201864580Sdarrenr * timeout 201964580Sdarrenr */ 202095418Sdarrenr newage = fr_tcplastack; 202195563Sdarrenr else 202295563Sdarrenr newage = *age; 202353642Sguido } 202464580Sdarrenr /* 202564580Sdarrenr * We cannot detect when we go out of LAST_ACK state to CLOSED 202664580Sdarrenr * because that is based on the reception of ACK packets; 202764580Sdarrenr * ipfilter can only detect that a packet has been sent by a 202864580Sdarrenr * host 202964580Sdarrenr */ 203053642Sguido break; 203164580Sdarrenr 203264580Sdarrenr case TCPS_FIN_WAIT_2: /* 9 */ 203364580Sdarrenr /* NOT USED */ 203464580Sdarrenr break; 203564580Sdarrenr 203664580Sdarrenr case TCPS_TIME_WAIT: /* 10 */ 203795418Sdarrenr newage = fr_tcptimeout; /* default 4 mins */ 203864580Sdarrenr /* we're in 2MSL timeout now */ 203964580Sdarrenr break; 204053642Sguido } 204195418Sdarrenr 204295418Sdarrenr if (newage != 0) { 204395418Sdarrenr *age = newage; 204495418Sdarrenr return 0; 204595418Sdarrenr } 204695418Sdarrenr return -1; 204753642Sguido} 204853642Sguido 204953642Sguido 205053642Sguido#ifdef IPFILTER_LOG 205153642Sguidovoid ipstate_log(is, type) 205253642Sguidostruct ipstate *is; 205353642Sguidou_int type; 205453642Sguido{ 205553642Sguido struct ipslog ipsl; 205653642Sguido void *items[1]; 205753642Sguido size_t sizes[1]; 205853642Sguido int types[1]; 205953642Sguido 206053642Sguido ipsl.isl_type = type; 206153642Sguido ipsl.isl_pkts = is->is_pkts; 206253642Sguido ipsl.isl_bytes = is->is_bytes; 206353642Sguido ipsl.isl_src = is->is_src; 206453642Sguido ipsl.isl_dst = is->is_dst; 206553642Sguido ipsl.isl_p = is->is_p; 206660854Sdarrenr ipsl.isl_v = is->is_v; 206753642Sguido ipsl.isl_flags = is->is_flags; 2068102520Sdarrenr ipsl.isl_rulen = is->is_rulen; 2069102520Sdarrenr ipsl.isl_group = is->is_group; 207053642Sguido if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 207153642Sguido ipsl.isl_sport = is->is_sport; 207253642Sguido ipsl.isl_dport = is->is_dport; 207353642Sguido if (ipsl.isl_p == IPPROTO_TCP) { 207453642Sguido ipsl.isl_state[0] = is->is_state[0]; 207553642Sguido ipsl.isl_state[1] = is->is_state[1]; 207653642Sguido } 207792685Sdarrenr } else if (ipsl.isl_p == IPPROTO_ICMP) { 207853642Sguido ipsl.isl_itype = is->is_icmp.ics_type; 207992685Sdarrenr } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 208092685Sdarrenr ipsl.isl_itype = is->is_icmp.ics_type; 208192685Sdarrenr } else { 208253642Sguido ipsl.isl_ps.isl_filler[0] = 0; 208353642Sguido ipsl.isl_ps.isl_filler[1] = 0; 208453642Sguido } 208553642Sguido items[0] = &ipsl; 208653642Sguido sizes[0] = sizeof(ipsl); 208753642Sguido types[0] = 0; 208853642Sguido 208953642Sguido (void) ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1); 209053642Sguido} 209153642Sguido#endif 209257096Sguido 209357096Sguido 209460854Sdarrenr#ifdef USE_INET6 209560854Sdarrenrfrentry_t *fr_checkicmp6matchingstate(ip, fin) 209660854Sdarrenrip6_t *ip; 209760854Sdarrenrfr_info_t *fin; 209857096Sguido{ 209960854Sdarrenr register ipstate_t *is, **isp; 210060854Sdarrenr register u_short sport, dport; 210160854Sdarrenr register u_char pr; 210260854Sdarrenr struct icmp6_hdr *ic, *oic; 210360854Sdarrenr union i6addr dst, src; 210460854Sdarrenr u_short savelen; 210560854Sdarrenr fr_info_t ofin; 210660854Sdarrenr tcphdr_t *tcp; 210760854Sdarrenr frentry_t *fr; 210860854Sdarrenr ip6_t *oip; 210960854Sdarrenr int type; 211060854Sdarrenr u_int hv; 211157096Sguido 211260854Sdarrenr /* 211360854Sdarrenr * Does it at least have the return (basic) IP header ? 211460854Sdarrenr * Only a basic IP header (no options) should be with 211560854Sdarrenr * an ICMP error header. 211660854Sdarrenr */ 211760854Sdarrenr if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN)) 211860854Sdarrenr return NULL; 211960854Sdarrenr ic = (struct icmp6_hdr *)fin->fin_dp; 212060854Sdarrenr type = ic->icmp6_type; 212160854Sdarrenr /* 212260854Sdarrenr * If it's not an error type, then return 212360854Sdarrenr */ 212460854Sdarrenr if ((type != ICMP6_DST_UNREACH) && (type != ICMP6_PACKET_TOO_BIG) && 212560854Sdarrenr (type != ICMP6_TIME_EXCEEDED) && (type != ICMP6_PARAM_PROB)) 212660854Sdarrenr return NULL; 212760854Sdarrenr 212860854Sdarrenr oip = (ip6_t *)((char *)ic + ICMPERR_ICMPHLEN); 212960854Sdarrenr if (fin->fin_plen < sizeof(*oip)) 213060854Sdarrenr return NULL; 213160854Sdarrenr 213295563Sdarrenr if ((oip->ip6_nxt != IPPROTO_TCP) && (oip->ip6_nxt != IPPROTO_UDP) && 213395563Sdarrenr (oip->ip6_nxt != IPPROTO_ICMPV6)) 213495563Sdarrenr return NULL; 213595563Sdarrenr 213695563Sdarrenr bzero((char *)&ofin, sizeof(ofin)); 213795563Sdarrenr ofin.fin_out = !fin->fin_out; 213895563Sdarrenr ofin.fin_ifp = fin->fin_ifp; 213995563Sdarrenr ofin.fin_v = 6; 214095563Sdarrenr 214160854Sdarrenr if (oip->ip6_nxt == IPPROTO_ICMPV6) { 214260854Sdarrenr oic = (struct icmp6_hdr *)(oip + 1); 214360854Sdarrenr /* 214460854Sdarrenr * a ICMP error can only be generated as a result of an 214560854Sdarrenr * ICMP query, not as the response on an ICMP error 214660854Sdarrenr * 214760854Sdarrenr * XXX theoretically ICMP_ECHOREP and the other reply's are 214860854Sdarrenr * ICMP query's as well, but adding them here seems strange XXX 214960854Sdarrenr */ 215060854Sdarrenr if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 215160854Sdarrenr return NULL; 215260854Sdarrenr 215360854Sdarrenr /* 215460854Sdarrenr * perform a lookup of the ICMP packet in the state table 215560854Sdarrenr */ 215660854Sdarrenr hv = (pr = oip->ip6_nxt); 215760854Sdarrenr src.in6 = oip->ip6_src; 215860854Sdarrenr hv += src.in4.s_addr; 215960854Sdarrenr dst.in6 = oip->ip6_dst; 216060854Sdarrenr hv += dst.in4.s_addr; 216160854Sdarrenr hv += oic->icmp6_id; 216260854Sdarrenr hv += oic->icmp6_seq; 216360854Sdarrenr hv %= fr_statesize; 216460854Sdarrenr 216560854Sdarrenr oip->ip6_plen = ntohs(oip->ip6_plen); 216660854Sdarrenr fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin); 216760854Sdarrenr oip->ip6_plen = htons(oip->ip6_plen); 216860854Sdarrenr 216960854Sdarrenr READ_ENTER(&ipf_state); 217060854Sdarrenr for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) 217160854Sdarrenr if ((is->is_p == pr) && 217260854Sdarrenr (oic->icmp6_id == is->is_icmp.ics_id) && 217360854Sdarrenr (oic->icmp6_seq == is->is_icmp.ics_seq) && 217460854Sdarrenr fr_matchsrcdst(is, src, dst, &ofin, NULL)) { 217560854Sdarrenr /* 217660854Sdarrenr * in the state table ICMP query's are stored 217760854Sdarrenr * with the type of the corresponding ICMP 217860854Sdarrenr * response. Correct here 217960854Sdarrenr */ 218060854Sdarrenr if (((is->is_type == ICMP6_ECHO_REPLY) && 218160854Sdarrenr (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 218260854Sdarrenr (is->is_type - 1 == oic->icmp6_type )) { 218360854Sdarrenr ips_stats.iss_hits++; 218463523Sdarrenr is->is_pkts++; 218563523Sdarrenr is->is_bytes += fin->fin_plen; 218660854Sdarrenr return is->is_rule; 218760854Sdarrenr } 218860854Sdarrenr } 218960854Sdarrenr RWLOCK_EXIT(&ipf_state); 219060854Sdarrenr 219160854Sdarrenr return NULL; 219295563Sdarrenr } 219360854Sdarrenr 219460854Sdarrenr tcp = (tcphdr_t *)(oip + 1); 219560854Sdarrenr dport = tcp->th_dport; 219660854Sdarrenr sport = tcp->th_sport; 219760854Sdarrenr 219860854Sdarrenr hv = (pr = oip->ip6_nxt); 219960854Sdarrenr src.in6 = oip->ip6_src; 220060854Sdarrenr hv += src.in4.s_addr; 220195418Sdarrenr hv += src.i6[1]; 220295418Sdarrenr hv += src.i6[2]; 220395418Sdarrenr hv += src.i6[3]; 220460854Sdarrenr dst.in6 = oip->ip6_dst; 220560854Sdarrenr hv += dst.in4.s_addr; 220695418Sdarrenr hv += dst.i6[1]; 220795418Sdarrenr hv += dst.i6[2]; 220895418Sdarrenr hv += dst.i6[3]; 220960854Sdarrenr hv += dport; 221060854Sdarrenr hv += sport; 221160854Sdarrenr hv %= fr_statesize; 221260854Sdarrenr /* 221360854Sdarrenr * we make an fin entry to be able to feed it to 221460854Sdarrenr * matchsrcdst note that not all fields are encessary 221560854Sdarrenr * but this is the cleanest way. Note further we fill 221660854Sdarrenr * in fin_mp such that if someone uses it we'll get 221760854Sdarrenr * a kernel panic. fr_matchsrcdst does not use this. 221860854Sdarrenr * 221960854Sdarrenr * watch out here, as ip is in host order and oip in network 222060854Sdarrenr * order. Any change we make must be undone afterwards. 222160854Sdarrenr */ 222260854Sdarrenr savelen = oip->ip6_plen; 222360854Sdarrenr oip->ip6_plen = ip->ip6_plen - sizeof(*ip) - ICMPERR_ICMPHLEN; 222460854Sdarrenr fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin); 222560854Sdarrenr oip->ip6_plen = savelen; 222660854Sdarrenr READ_ENTER(&ipf_state); 222760854Sdarrenr for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) { 222860854Sdarrenr /* 222960854Sdarrenr * Only allow this icmp though if the 223060854Sdarrenr * encapsulated packet was allowed through the 223160854Sdarrenr * other way around. Note that the minimal amount 223260854Sdarrenr * of info present does not allow for checking against 223360854Sdarrenr * tcp internals such as seq and ack numbers. 223460854Sdarrenr */ 223560854Sdarrenr if ((is->is_p == pr) && (is->is_v == 6) && 223660854Sdarrenr fr_matchsrcdst(is, src, dst, &ofin, tcp)) { 223760854Sdarrenr fr = is->is_rule; 223860854Sdarrenr ips_stats.iss_hits++; 223960854Sdarrenr is->is_pkts++; 224060854Sdarrenr is->is_bytes += fin->fin_plen; 224160854Sdarrenr /* 224260854Sdarrenr * we deliberately do not touch the timeouts 224360854Sdarrenr * for the accompanying state table entry. 224460854Sdarrenr * It remains to be seen if that is correct. XXX 224560854Sdarrenr */ 224660854Sdarrenr RWLOCK_EXIT(&ipf_state); 224760854Sdarrenr return fr; 224857096Sguido } 224960854Sdarrenr } 225057096Sguido RWLOCK_EXIT(&ipf_state); 225160854Sdarrenr return NULL; 225257096Sguido} 225360854Sdarrenr#endif 2254