ip_state.c revision 57096
1234353Sdim/*
2234353Sdim * Copyright (C) 1995-1998 by Darren Reed.
3193323Sed *
4193323Sed * Redistribution and use in source and binary forms are permitted
5193323Sed * provided that this notice is preserved and due credit is given
6193323Sed * to the original author and the contributors.
7234353Sdim */
8193323Sed#if !defined(lint)
9193323Sedstatic const char sccsid[] = "@(#)ip_state.c	1.8 6/5/96 (C) 1993-1995 Darren Reed";
10193323Sedstatic const char rcsid[] = "@(#)$Id: ip_state.c,v 2.3.2.18 2000/01/27 08:51:30 darrenr Exp $";
11193323Sed#endif
12193323Sed
13193323Sed#include <sys/errno.h>
14193323Sed#include <sys/types.h>
15193323Sed#include <sys/param.h>
16193323Sed#include <sys/file.h>
17193323Sed#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
18193323Sed    defined(_KERNEL)
19193323Sed# include "opt_ipfilter_log.h"
20193323Sed#endif
21193323Sed#if !defined(_KERNEL) && !defined(KERNEL) && !defined(__KERNEL__)
22193323Sed# include <stdio.h>
23249423Sdim# include <stdlib.h>
24249423Sdim# include <string.h>
25249423Sdim#else
26249423Sdim# ifdef linux
27193323Sed#  include <linux/kernel.h>
28193323Sed#  include <linux/module.h>
29193323Sed# endif
30193323Sed#endif
31193323Sed#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
32193323Sed# include <sys/filio.h>
33193323Sed# include <sys/fcntl.h>
34193323Sed# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
35193323Sed#  include "opt_ipfilter.h"
36193323Sed# endif
37193323Sed#else
38193323Sed# include <sys/ioctl.h>
39193323Sed#endif
40193323Sed#include <sys/time.h>
41193323Sed#include <sys/uio.h>
42198090Srdivacky#ifndef linux
43249423Sdim# include <sys/protosw.h>
44193323Sed#endif
45198090Srdivacky#include <sys/socket.h>
46249423Sdim#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux)
47193323Sed# include <sys/systm.h>
48193323Sed#endif
49193323Sed#if !defined(__SVR4) && !defined(__svr4__)
50193323Sed# ifndef linux
51193323Sed#  include <sys/mbuf.h>
52193323Sed# endif
53193323Sed#else
54193323Sed# include <sys/filio.h>
55193323Sed# include <sys/byteorder.h>
56193323Sed# ifdef _KERNEL
57193323Sed#  include <sys/dditypes.h>
58193323Sed# endif
59193323Sed# include <sys/stream.h>
60198090Srdivacky# include <sys/kmem.h>
61193323Sed#endif
62193323Sed
63193323Sed#include <net/if.h>
64193323Sed#ifdef sun
65249423Sdim# include <net/af.h>
66249423Sdim#endif
67249423Sdim#include <net/route.h>
68249423Sdim#include <netinet/in.h>
69249423Sdim#include <netinet/in_systm.h>
70249423Sdim#include <netinet/ip.h>
71249423Sdim#include <netinet/tcp.h>
72193323Sed#ifndef linux
73193323Sed# include <netinet/ip_var.h>
74249423Sdim# include <netinet/tcp_fsm.h>
75249423Sdim#endif
76193323Sed#include <netinet/udp.h>
77193323Sed#include <netinet/ip_icmp.h>
78249423Sdim#include "netinet/ip_compat.h"
79249423Sdim#include <netinet/tcpip.h>
80249423Sdim#include "netinet/ip_fil.h"
81249423Sdim#include "netinet/ip_nat.h"
82193323Sed#include "netinet/ip_frag.h"
83249423Sdim#include "netinet/ip_proxy.h"
84249423Sdim#include "netinet/ip_state.h"
85249423Sdim#if (__FreeBSD_version >= 300000)
86193323Sed# include <sys/malloc.h>
87249423Sdim# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM)
88249423Sdim#  include <sys/libkern.h>
89249423Sdim#  include <sys/systm.h>
90249423Sdim# endif
91193323Sed#endif
92193323Sed
93193323Sed#ifndef	MIN
94193323Sed# define	MIN(a,b)	(((a)<(b))?(a):(b))
95193323Sed#endif
96193323Sed
97193323Sed#define	TCP_CLOSE	(TH_FIN|TH_RST)
98198090Srdivacky
99193323Sedipstate_t **ips_table = NULL;
100193323Sedint	ips_num = 0;
101193323Sedips_stat_t ips_stats;
102249423Sdim#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
103249423Sdimextern	KRWLOCK_T	ipf_state, ipf_mutex;
104249423Sdimextern	kmutex_t	ipf_rw;
105249423Sdim#endif
106249423Sdim
107249423Sdimstatic int fr_matchsrcdst __P((ipstate_t *, struct in_addr, struct in_addr,
108249423Sdim			       fr_info_t *, tcphdr_t *));
109249423Sdimstatic frentry_t *fr_checkicmpmatchingstate __P((ip_t *, fr_info_t *));
110249423Sdimstatic int fr_state_flush __P((int));
111249423Sdimstatic ips_stat_t *fr_statetstats __P((void));
112249423Sdimstatic void fr_delstate __P((ipstate_t *));
113249423Sdim
114249423Sdim
115249423Sdim#define	FIVE_DAYS	(2 * 5 * 86400)	/* 5 days: half closed session */
116193323Sed
117193323Sed#define	TCP_MSL	240			/* 2 minutes */
118193323Sedu_long	fr_tcpidletimeout = FIVE_DAYS,
119193323Sed	fr_tcpclosewait = 2 * TCP_MSL,
120193323Sed	fr_tcplastack = 2 * TCP_MSL,
121193323Sed	fr_tcptimeout = 2 * TCP_MSL,
122193323Sed	fr_tcpclosed = 1,
123193323Sed	fr_udptimeout = 240,
124193323Sed	fr_icmptimeout = 120;
125193323Sedint	fr_statemax = IPSTATE_MAX,
126218893Sdim	fr_statesize = IPSTATE_SIZE;
127193323Sedint	fr_state_doflush = 0;
128218893Sdim
129193323Sed
130193323Sedint fr_stateinit()
131249423Sdim{
132249423Sdim	KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *));
133249423Sdim	if (ips_table != NULL)
134249423Sdim		bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *));
135249423Sdim	else
136249423Sdim		return -1;
137201360Srdivacky	return 0;
138218893Sdim}
139201360Srdivacky
140243830Sdim
141243830Sdimstatic ips_stat_t *fr_statetstats()
142201360Srdivacky{
143243830Sdim	ips_stats.iss_active = ips_num;
144243830Sdim	ips_stats.iss_table = ips_table;
145193323Sed	return &ips_stats;
146218893Sdim}
147249423Sdim
148249423Sdim
149249423Sdim/*
150193323Sed * flush state tables.  two actions currently defined:
151193323Sed * which == 0 : flush all state table entries
152218893Sdim * which == 1 : flush TCP connections which have started to close but are
153193323Sed *              stuck for some reason.
154193323Sed */
155218893Sdimstatic int fr_state_flush(which)
156193323Sedint which;
157249423Sdim{
158249423Sdim	register int i;
159249423Sdim	register ipstate_t *is, **isp;
160249423Sdim#if defined(_KERNEL) && !SOLARIS
161249423Sdim	int s;
162249423Sdim#endif
163249423Sdim	int delete, removed = 0;
164249423Sdim
165263508Sdim	SPL_NET(s);
166263508Sdim	WRITE_ENTER(&ipf_state);
167263508Sdim	for (i = fr_statesize - 1; i >= 0; i--)
168263508Sdim		for (isp = &ips_table[i]; (is = *isp); ) {
169193323Sed			delete = 0;
170218893Sdim
171193323Sed			switch (which)
172193323Sed			{
173218893Sdim			case 0 :
174193323Sed				delete = 1;
175193323Sed				break;
176193323Sed			case 1 :
177193323Sed				if (is->is_p != IPPROTO_TCP)
178193323Sed					break;
179193323Sed				if ((is->is_state[0] != TCPS_ESTABLISHED) ||
180243830Sdim				    (is->is_state[1] != TCPS_ESTABLISHED))
181243830Sdim					delete = 1;
182243830Sdim				break;
183243830Sdim			}
184243830Sdim
185243830Sdim			if (delete) {
186193323Sed				*isp = is->is_next;
187193323Sed				if (is->is_p == IPPROTO_TCP)
188193323Sed					ips_stats.iss_fin++;
189193323Sed				else
190193323Sed					ips_stats.iss_expire++;
191193323Sed				if (ips_table[i] == NULL)
192249423Sdim					ips_stats.iss_inuse--;
193249423Sdim#ifdef	IPFILTER_LOG
194249423Sdim				ipstate_log(is, ISL_FLUSH);
195249423Sdim#endif
196249423Sdim				fr_delstate(is);
197249423Sdim				ips_num--;
198193323Sed				removed++;
199193323Sed			} else
200193323Sed				isp = &is->is_next;
201193323Sed		}
202193323Sed	RWLOCK_EXIT(&ipf_state);
203193323Sed	SPL_X(s);
204193323Sed	return removed;
205193323Sed}
206193323Sed
207193323Sed
208193323Sedint fr_state_ioctl(data, cmd, mode)
209193323Sedcaddr_t data;
210193323Sed#if defined(__NetBSD__) || defined(__OpenBSD__)
211193323Sedu_long cmd;
212193323Sed#else
213193323Sedint cmd;
214193323Sed#endif
215193323Sedint mode;
216193323Sed{
217193323Sed	int	arg, ret, error = 0;
218193323Sed
219193323Sed	switch (cmd)
220193323Sed	{
221193323Sed	case SIOCIPFFL :
222193323Sed		IRCOPY(data, (caddr_t)&arg, sizeof(arg));
223193323Sed		if (arg == 0 || arg == 1) {
224193323Sed			ret = fr_state_flush(arg);
225193323Sed			IWCOPY((caddr_t)&ret, data, sizeof(ret));
226193323Sed		} else
227193323Sed			error = EINVAL;
228193323Sed		break;
229193323Sed#ifdef	IPFILTER_LOG
230193323Sed	case SIOCIPFFB :
231193323Sed		if (!(mode & FWRITE))
232193323Sed			error = EPERM;
233193323Sed		else
234193323Sed			*(int *)data = ipflog_clear(IPL_LOGSTATE);
235193323Sed		break;
236193323Sed#endif
237193323Sed	case SIOCGIPST :
238193323Sed		IWCOPY((caddr_t)fr_statetstats(), data, sizeof(ips_stat_t));
239193323Sed		break;
240193323Sed	case FIONREAD :
241193323Sed#ifdef	IPFILTER_LOG
242193323Sed		IWCOPY((caddr_t)&iplused[IPL_LOGSTATE], (caddr_t)data,
243193323Sed		       sizeof(iplused[IPL_LOGSTATE]));
244193323Sed#endif
245193323Sed		break;
246193323Sed	default :
247193323Sed		error = EINVAL;
248193323Sed		break;
249193323Sed	}
250193323Sed	return error;
251193323Sed}
252193323Sed
253263508Sdim
254263508Sdim/*
255263508Sdim * Create a new ipstate structure and hang it off the hash table.
256263508Sdim */
257193323Sedipstate_t *fr_addstate(ip, fin, flags)
258263508Sdimip_t *ip;
259263508Sdimfr_info_t *fin;
260263508Sdimu_int flags;
261263508Sdim{
262263508Sdim	register ipstate_t *is;
263193323Sed	register u_int hv;
264193323Sed	ipstate_t ips;
265193323Sed	u_int pass;
266193323Sed
267193323Sed	if ((ip->ip_off & IP_OFFMASK) || (fin->fin_fi.fi_fl & FI_SHORT))
268193323Sed		return NULL;
269193323Sed	if (ips_num == fr_statemax) {
270193323Sed		ips_stats.iss_max++;
271193323Sed		fr_state_doflush = 1;
272193323Sed		return NULL;
273193323Sed	}
274193323Sed	is = &ips;
275193323Sed	bzero((char *)is, sizeof(*is));
276193323Sed	ips.is_age = 1;
277193323Sed	ips.is_state[0] = 0;
278193323Sed	ips.is_state[1] = 0;
279193323Sed	/*
280193323Sed	 * Copy and calculate...
281193323Sed	 */
282193323Sed	hv = (is->is_p = ip->ip_p);
283193323Sed	hv += (is->is_src.s_addr = ip->ip_src.s_addr);
284193323Sed	hv += (is->is_dst.s_addr = ip->ip_dst.s_addr);
285193323Sed
286193323Sed	switch (ip->ip_p)
287193323Sed	{
288193323Sed	case IPPROTO_ICMP :
289193323Sed	    {
290193323Sed		struct icmp *ic = (struct icmp *)fin->fin_dp;
291249423Sdim
292263508Sdim		switch (ic->icmp_type)
293249423Sdim		{
294249423Sdim		case ICMP_ECHO :
295249423Sdim			is->is_icmp.ics_type = ICMP_ECHOREPLY;	/* XXX */
296193323Sed			hv += (is->is_icmp.ics_id = ic->icmp_id);
297249423Sdim			hv += (is->is_icmp.ics_seq = ic->icmp_seq);
298249423Sdim			break;
299249423Sdim		case ICMP_TSTAMP :
300249423Sdim		case ICMP_IREQ :
301249423Sdim		case ICMP_MASKREQ :
302249423Sdim			is->is_icmp.ics_type = ic->icmp_type + 1;
303249423Sdim			break;
304249423Sdim		default :
305249423Sdim			return NULL;
306249423Sdim		}
307249423Sdim		ATOMIC_INC(ips_stats.iss_icmp);
308249423Sdim		is->is_age = fr_icmptimeout;
309249423Sdim		break;
310249423Sdim	    }
311249423Sdim	case IPPROTO_TCP :
312249423Sdim	    {
313249423Sdim		register tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
314249423Sdim
315249423Sdim		/*
316249423Sdim		 * The endian of the ports doesn't matter, but the ack and
317249423Sdim		 * sequence numbers do as we do mathematics on them later.
318249423Sdim		 */
319249423Sdim		is->is_dport = tcp->th_dport;
320249423Sdim		is->is_sport = tcp->th_sport;
321249423Sdim		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
322249423Sdim			hv += tcp->th_dport;
323249423Sdim			hv += tcp->th_sport;
324193323Sed		}
325193323Sed		if (tcp->th_seq != 0) {
326193323Sed			is->is_send = ntohl(tcp->th_seq) + ip->ip_len -
327193323Sed				      fin->fin_hlen - (tcp->th_off << 2) +
328251662Sdim				      ((tcp->th_flags & TH_SYN) ? 1 : 0) +
329193323Sed				      ((tcp->th_flags & TH_FIN) ? 1 : 0);
330193323Sed			is->is_maxsend = is->is_send + 1;
331193323Sed		}
332193323Sed		is->is_dend = 0;
333193323Sed		is->is_maxswin = ntohs(tcp->th_win);
334193323Sed		if (is->is_maxswin == 0)
335193323Sed			is->is_maxswin = 1;
336193323Sed		/*
337193323Sed		 * If we're creating state for a starting connection, start the
338193323Sed		 * timer on it as we'll never see an error if it fails to
339193323Sed		 * connect.
340193323Sed		 */
341251662Sdim		MUTEX_ENTER(&ipf_rw);
342251662Sdim		ips_stats.iss_tcp++;
343251662Sdim		fr_tcp_age(&is->is_age, is->is_state, ip, fin,
344251662Sdim			   tcp->th_sport == is->is_sport);
345251662Sdim		MUTEX_EXIT(&ipf_rw);
346251662Sdim		break;
347251662Sdim	    }
348251662Sdim	case IPPROTO_UDP :
349251662Sdim	    {
350251662Sdim		register tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
351251662Sdim
352251662Sdim		is->is_dport = tcp->th_dport;
353251662Sdim		is->is_sport = tcp->th_sport;
354251662Sdim		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
355251662Sdim			hv += tcp->th_dport;
356251662Sdim			hv += tcp->th_sport;
357251662Sdim		}
358251662Sdim		ATOMIC_INC(ips_stats.iss_udp);
359251662Sdim		is->is_age = fr_udptimeout;
360251662Sdim		break;
361251662Sdim	    }
362251662Sdim	default :
363251662Sdim		return NULL;
364251662Sdim	}
365251662Sdim
366251662Sdim	KMALLOC(is, ipstate_t *);
367251662Sdim	if (is == NULL) {
368251662Sdim		ATOMIC_INC(ips_stats.iss_nomem);
369251662Sdim		return NULL;
370251662Sdim	}
371251662Sdim	bcopy((char *)&ips, (char *)is, sizeof(*is));
372251662Sdim	hv %= fr_statesize;
373251662Sdim	RW_UPGRADE(&ipf_mutex);
374251662Sdim	is->is_rule = fin->fin_fr;
375251662Sdim	if (is->is_rule != NULL) {
376251662Sdim		is->is_rule->fr_ref++;
377251662Sdim		pass = is->is_rule->fr_flags;
378251662Sdim	} else
379251662Sdim		pass = fr_flags;
380251662Sdim	MUTEX_DOWNGRADE(&ipf_mutex);
381251662Sdim	WRITE_ENTER(&ipf_state);
382251662Sdim
383251662Sdim	is->is_rout = pass & FR_OUTQUE ? 1 : 0;
384251662Sdim	is->is_pass = pass;
385251662Sdim	is->is_pkts = 1;
386251662Sdim	is->is_bytes = ip->ip_len;
387251662Sdim	/*
388251662Sdim	 * We want to check everything that is a property of this packet,
389251662Sdim	 * but we don't (automatically) care about it's fragment status as
390251662Sdim	 * this may change.
391251662Sdim	 */
392251662Sdim	is->is_opt = fin->fin_fi.fi_optmsk;
393251662Sdim	is->is_optmsk = 0xffffffff;
394251662Sdim	is->is_sec = fin->fin_fi.fi_secmsk;
395263508Sdim	is->is_secmsk = 0xffff;
396251662Sdim	is->is_auth = fin->fin_fi.fi_auth;
397251662Sdim	is->is_authmsk = 0xffff;
398251662Sdim	is->is_flags = fin->fin_fi.fi_fl & FI_CMP;
399251662Sdim	is->is_flags |= FI_CMP << 4;
400251662Sdim	is->is_flags |= flags & (FI_W_DPORT|FI_W_SPORT);
401251662Sdim	/*
402251662Sdim	 * add into table.
403251662Sdim	 */
404251662Sdim	is->is_next = ips_table[hv];
405251662Sdim	ips_table[hv] = is;
406251662Sdim	if (is->is_next == NULL)
407251662Sdim		ips_stats.iss_inuse++;
408251662Sdim	if (fin->fin_out) {
409251662Sdim		is->is_ifpin = NULL;
410251662Sdim		is->is_ifpout = fin->fin_ifp;
411193323Sed	} else {
412193323Sed		is->is_ifpin = fin->fin_ifp;
413251662Sdim		is->is_ifpout = NULL;
414193323Sed	}
415251662Sdim	if (pass & FR_LOGFIRST)
416251662Sdim		is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
417251662Sdim	ATOMIC_INC(ips_num);
418251662Sdim#ifdef	IPFILTER_LOG
419193323Sed	ipstate_log(is, ISL_NEW);
420193323Sed#endif
421251662Sdim	RWLOCK_EXIT(&ipf_state);
422193323Sed	fin->fin_rev = (is->is_dst.s_addr != ip->ip_dst.s_addr);
423251662Sdim	if (fin->fin_fi.fi_fl & FI_FRAG)
424251662Sdim		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
425251662Sdim	return is;
426251662Sdim}
427193323Sed
428193323Sed
429251662Sdim
430193323Sed/*
431251662Sdim * check to see if a packet with TCP headers fits within the TCP window.
432251662Sdim * change timeout depending on whether new packet is a SYN-ACK returning for a
433251662Sdim * SYN or a RST or FIN which indicate time to close up shop.
434251662Sdim */
435193323Sedint fr_tcpstate(is, fin, ip, tcp)
436193323Sedregister ipstate_t *is;
437263508Sdimfr_info_t *fin;
438251662Sdimip_t *ip;
439193323Sedtcphdr_t *tcp;
440251662Sdim{
441251662Sdim	register tcp_seq seq, ack, end;
442251662Sdim	register int ackskew;
443251662Sdim	tcpdata_t  *fdata, *tdata;
444193323Sed	u_short	win, maxwin;
445193323Sed	int ret = 0;
446263508Sdim	int source;
447251662Sdim
448193323Sed	/*
449263508Sdim	 * Find difference between last checked packet and this packet.
450263508Sdim	 */
451263508Sdim	source = (ip->ip_src.s_addr == is->is_src.s_addr);
452263508Sdim	fdata = &is->is_tcp.ts_data[!source];
453263508Sdim	tdata = &is->is_tcp.ts_data[source];
454263508Sdim	seq = ntohl(tcp->th_seq);
455263508Sdim	ack = ntohl(tcp->th_ack);
456263508Sdim	win = ntohs(tcp->th_win);
457263508Sdim	end = seq + ip->ip_len - fin->fin_hlen - (tcp->th_off << 2) +
458263508Sdim	       ((tcp->th_flags & TH_SYN) ? 1 : 0) +
459263508Sdim	       ((tcp->th_flags & TH_FIN) ? 1 : 0);
460263508Sdim
461263508Sdim	if (fdata->td_end == 0) {
462263508Sdim		/*
463263508Sdim		 * Must be a (outgoing) SYN-ACK in reply to a SYN.
464263508Sdim		 */
465218893Sdim		fdata->td_end = end;
466193323Sed		fdata->td_maxwin = 1;
467218893Sdim		fdata->td_maxend = end + 1;
468263508Sdim	}
469193323Sed
470263508Sdim	if (!(tcp->th_flags & TH_ACK)) {  /* Pretend an ack was sent */
471263508Sdim		ack = tdata->td_end;
472263508Sdim		win = 1;
473263508Sdim		if ((tcp->th_flags == TH_SYN) && (tdata->td_maxwin == 0))
474263508Sdim			 tdata->td_maxwin = 1;
475263508Sdim	} else if (((tcp->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
476263508Sdim		   (ack == 0)) {
477263508Sdim		/* gross hack to get around certain broken tcp stacks */
478263508Sdim		ack = tdata->td_end;
479218893Sdim	}
480218893Sdim
481218893Sdim	if (seq == end)
482263508Sdim		seq = end = fdata->td_end;
483218893Sdim
484263508Sdim	maxwin = tdata->td_maxwin;
485263508Sdim	ackskew = tdata->td_end - ack;
486263508Sdim
487263508Sdim#define	SEQ_GE(a,b)	((int)((a) - (b)) >= 0)
488263508Sdim#define	SEQ_GT(a,b)	((int)((a) - (b)) > 0)
489193323Sed	if ((SEQ_GE(fdata->td_maxend, end)) &&
490263508Sdim	    (SEQ_GE(seq, fdata->td_end - maxwin)) &&
491218893Sdim/* XXX what about big packets */
492263508Sdim#define MAXACKWINDOW 66000
493193323Sed	    (ackskew >= -MAXACKWINDOW) &&
494263508Sdim	    (ackskew <= MAXACKWINDOW)) {
495263508Sdim		/* if ackskew < 0 then this should be due to fragented
496263508Sdim		 * packets. There is no way to know the length of the
497263508Sdim		 * total packet in advance.
498193323Sed		 * We do know the total length from the fragment cache though.
499251662Sdim		 * Note however that there might be more sessions with
500251662Sdim		 * exactly the same source and destination paramters in the
501251662Sdim		 * state cache (and source and destination is the only stuff
502193323Sed		 * that is saved in the fragment cache). Note further that
503193323Sed		 * some TCP connections in the state cache are hashed with
504218893Sdim		 * sport and dport as well which makes it not worthwhile to
505251662Sdim		 * look for them.
506193323Sed		 * Thus, when ackskew is negative but still seems to belong
507193323Sed		 * to this session, we bump up the destinations end value.
508249423Sdim		 */
509251662Sdim		if (ackskew < 0)
510251662Sdim			tdata->td_end = ack;
511251662Sdim
512251662Sdim		/* update max window seen */
513251662Sdim		if (fdata->td_maxwin < win)
514251662Sdim			fdata->td_maxwin = win;
515251662Sdim		if (SEQ_GT(end, fdata->td_end))
516251662Sdim			fdata->td_end = end;
517251662Sdim		if (SEQ_GE(ack + win, tdata->td_maxend)) {
518251662Sdim			tdata->td_maxend = ack + win;
519251662Sdim			if (win == 0)
520251662Sdim				tdata->td_maxend++;
521251662Sdim		}
522249423Sdim
523251662Sdim		ATOMIC_INC(ips_stats.iss_hits);
524251662Sdim		is->is_pkts++;
525263508Sdim		is->is_bytes += ip->ip_len;
526251662Sdim		/*
527251662Sdim		 * Nearing end of connection, start timeout.
528251662Sdim		 */
529251662Sdim		MUTEX_ENTER(&ipf_rw);
530251662Sdim		fr_tcp_age(&is->is_age, is->is_state, ip, fin, source);
531251662Sdim		MUTEX_EXIT(&ipf_rw);
532263508Sdim		ret = 1;
533251662Sdim	}
534251662Sdim	return ret;
535251662Sdim}
536251662Sdim
537249423Sdim
538193323Sedstatic int fr_matchsrcdst(is, src, dst, fin, tcp)
539193323Sedipstate_t *is;
540249423Sdimstruct in_addr src, dst;
541218893Sdimfr_info_t *fin;
542193323Sedtcphdr_t *tcp;
543193323Sed{
544193323Sed	int ret = 0, rev, out, flags;
545251662Sdim	u_short sp, dp;
546193323Sed	void *ifp;
547263508Sdim
548263508Sdim	rev = fin->fin_rev = (is->is_dst.s_addr != dst.s_addr);
549249423Sdim	ifp = fin->fin_ifp;
550218893Sdim	out = fin->fin_out;
551193323Sed
552193323Sed	if (tcp != NULL) {
553249423Sdim		flags = is->is_flags;
554249423Sdim		sp = tcp->th_sport;
555249423Sdim		dp = tcp->th_dport;
556249423Sdim	} else {
557249423Sdim		flags = 0;
558249423Sdim		sp = 0;
559249423Sdim		dp = 0;
560249423Sdim	}
561193323Sed
562251662Sdim	if (rev == 0) {
563193323Sed		if (!out) {
564193323Sed			if (is->is_ifpin == ifp)
565193323Sed				ret = 1;
566193323Sed		} else {
567193323Sed			if (is->is_ifpout == NULL || is->is_ifpout == ifp)
568193323Sed				ret = 1;
569263508Sdim		}
570193323Sed	} else {
571249423Sdim		if (out) {
572249423Sdim			if (is->is_ifpin == ifp)
573249423Sdim				ret = 1;
574249423Sdim		} else {
575193323Sed			if (is->is_ifpout == NULL || is->is_ifpout == ifp)
576193323Sed				ret = 1;
577193323Sed		}
578193323Sed	}
579193323Sed	if (ret == 0)
580193323Sed		return 0;
581193323Sed	ret = 0;
582234353Sdim
583266715Sdim	if (rev == 0) {
584193323Sed		if ((is->is_dst.s_addr == dst.s_addr) &&
585193323Sed		    (is->is_src.s_addr == src.s_addr) &&
586251662Sdim		    (!tcp || ((sp == is->is_sport || flags & FI_W_SPORT) &&
587251662Sdim		     (dp == is->is_dport || flags & FI_W_DPORT)))) {
588251662Sdim			ret = 1;
589251662Sdim		}
590251662Sdim	} else {
591251662Sdim		if ((is->is_dst.s_addr == src.s_addr) &&
592251662Sdim		    (is->is_src.s_addr == dst.s_addr) &&
593251662Sdim		    (!tcp || ((sp == is->is_dport || flags & FI_W_DPORT) &&
594251662Sdim		     (dp == is->is_sport || flags & FI_W_SPORT)))) {
595251662Sdim			ret = 1;
596251662Sdim		}
597251662Sdim	}
598251662Sdim	if (ret == 0)
599251662Sdim		return 0;
600251662Sdim
601251662Sdim	/*
602251662Sdim	 * Whether or not this should be here, is questionable, but the aim
603251662Sdim	 * is to get this out of the main line.
604251662Sdim	 */
605251662Sdim	if (tcp == NULL)
606251662Sdim		flags = is->is_flags & (FI_CMP|(FI_CMP<<4));
607251662Sdim
608251662Sdim	if (((fin->fin_fi.fi_fl & (flags >> 4)) != (flags & FI_CMP)) ||
609251662Sdim	    ((fin->fin_fi.fi_optmsk & is->is_optmsk) != is->is_opt) ||
610251662Sdim	    ((fin->fin_fi.fi_secmsk & is->is_secmsk) != is->is_sec) ||
611251662Sdim	    ((fin->fin_fi.fi_auth & is->is_authmsk) != is->is_auth))
612251662Sdim		return 0;
613251662Sdim
614251662Sdim	if ((flags & (FI_W_SPORT|FI_W_DPORT))) {
615251662Sdim		if ((flags & FI_W_SPORT) != 0) {
616251662Sdim			if (rev == 0) {
617251662Sdim				is->is_sport = sp;
618251662Sdim				is->is_send = htonl(tcp->th_seq);
619251662Sdim			} else {
620251662Sdim				is->is_sport = dp;
621251662Sdim				is->is_send = htonl(tcp->th_ack);
622251662Sdim			}
623251662Sdim			is->is_maxsend = is->is_send + 1;
624251662Sdim		} else if ((flags & FI_W_DPORT) != 0) {
625251662Sdim			if (rev == 0) {
626251662Sdim				is->is_dport = dp;
627251662Sdim				is->is_dend = htonl(tcp->th_ack);
628251662Sdim			} else {
629251662Sdim				is->is_dport = sp;
630251662Sdim				is->is_dend = htonl(tcp->th_seq);
631251662Sdim			}
632251662Sdim			is->is_maxdend = is->is_dend + 1;
633251662Sdim		}
634251662Sdim		is->is_flags &= ~(FI_W_SPORT|FI_W_DPORT);
635251662Sdim	}
636251662Sdim
637251662Sdim	if (!rev) {
638251662Sdim		if (out && (out == is->is_rout)) {
639251662Sdim			if (!is->is_ifpout)
640251662Sdim				is->is_ifpout = ifp;
641251662Sdim		} else {
642251662Sdim			if (!is->is_ifpin)
643251662Sdim				is->is_ifpin = ifp;
644251662Sdim		}
645251662Sdim	} else {
646251662Sdim		if (!out && (out != is->is_rout)) {
647251662Sdim			if (!is->is_ifpin)
648251662Sdim				is->is_ifpin = ifp;
649251662Sdim		} else {
650251662Sdim			if (!is->is_ifpout)
651251662Sdim				is->is_ifpout = ifp;
652251662Sdim		}
653251662Sdim	}
654251662Sdim	return 1;
655251662Sdim}
656251662Sdim
657251662Sdimfrentry_t *fr_checkicmpmatchingstate(ip, fin)
658251662Sdimip_t *ip;
659251662Sdimfr_info_t *fin;
660251662Sdim{
661251662Sdim	register struct in_addr	dst, src;
662251662Sdim	register ipstate_t *is, **isp;
663251662Sdim	register u_short sport, dport;
664251662Sdim	register u_char	pr;
665251662Sdim	struct icmp *ic;
666251662Sdim	u_short savelen;
667251662Sdim	fr_info_t ofin;
668251662Sdim	tcphdr_t *tcp;
669251662Sdim	icmphdr_t *icmp;
670251662Sdim	frentry_t *fr;
671251662Sdim	ip_t *oip;
672251662Sdim	int type;
673251662Sdim	u_int hv;
674251662Sdim
675251662Sdim	/*
676251662Sdim	 * Does it at least have the return (basic) IP header ?
677251662Sdim	 * Only a basic IP header (no options) should be with
678251662Sdim	 * an ICMP error header.
679251662Sdim	 */
680251662Sdim	if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
681251662Sdim		return NULL;
682251662Sdim	ic = (struct icmp *)((char *)ip + fin->fin_hlen);
683251662Sdim	type = ic->icmp_type;
684251662Sdim	/*
685251662Sdim	 * If it's not an error type, then return
686251662Sdim	 */
687251662Sdim	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
688251662Sdim    	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
689251662Sdim    	    (type != ICMP_PARAMPROB))
690251662Sdim		return NULL;
691251662Sdim
692251662Sdim	oip = (ip_t *)((char *)fin->fin_dp + ICMPERR_ICMPHLEN);
693251662Sdim	if (ip->ip_len < ICMPERR_MAXPKTLEN + ((oip->ip_hl - 5) << 2))
694251662Sdim		return NULL;
695251662Sdim
696251662Sdim	if (oip->ip_p == IPPROTO_ICMP) {
697251662Sdim
698251662Sdim		icmp = (icmphdr_t *)((char *)oip + (oip->ip_hl << 2));
699251662Sdim
700251662Sdim		/*
701251662Sdim		 * a ICMP error can only be generated as a result of an
702251662Sdim		 * ICMP query, not as the response on an ICMP error
703251662Sdim		 *
704251662Sdim		 * XXX theoretically ICMP_ECHOREP and the other reply's are
705251662Sdim		 * ICMP query's as well, but adding them here seems strange XXX
706251662Sdim		 */
707251662Sdim		 if ((icmp->icmp_type != ICMP_ECHO) &&
708251662Sdim		     (icmp->icmp_type != ICMP_TSTAMP) &&
709251662Sdim		     (icmp->icmp_type != ICMP_IREQ) &&
710251662Sdim		     (icmp->icmp_type != ICMP_MASKREQ))
711251662Sdim		    	return NULL;
712251662Sdim
713251662Sdim		/*
714251662Sdim		 * perform a lookup of the ICMP packet in the state table
715251662Sdim		 */
716251662Sdim
717251662Sdim		hv = (pr = oip->ip_p);
718251662Sdim		hv += (src.s_addr = oip->ip_src.s_addr);
719251662Sdim		hv += (dst.s_addr = oip->ip_dst.s_addr);
720251662Sdim		if (icmp->icmp_type == ICMP_ECHO) {
721251662Sdim			hv += icmp->icmp_id;
722251662Sdim			hv += icmp->icmp_seq;
723251662Sdim		}
724251662Sdim		hv %= fr_statesize;
725251662Sdim
726251662Sdim		oip->ip_len = ntohs(oip->ip_len);
727251662Sdim		fr_makefrip(oip->ip_hl << 2, oip, &ofin);
728251662Sdim		oip->ip_len = htons(oip->ip_len);
729251662Sdim		ofin.fin_ifp = fin->fin_ifp;
730251662Sdim		ofin.fin_out = !fin->fin_out;
731251662Sdim		ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
732251662Sdim
733251662Sdim		READ_ENTER(&ipf_state);
734251662Sdim		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_next)
735251662Sdim			if ((is->is_p == pr) &&
736251662Sdim			    fr_matchsrcdst(is, src, dst, &ofin, NULL)) {
737251662Sdim			    	/*
738251662Sdim			    	 * in the state table ICMP query's are stored
739251662Sdim			    	 * with the type of the corresponding ICMP
740251662Sdim			    	 * response. Correct here
741251662Sdim			    	 */
742251662Sdim				if (((is->is_type == ICMP_ECHOREPLY) &&
743251662Sdim				     (icmp->icmp_id == is->is_icmp.ics_id) &&
744251662Sdim				     (icmp->icmp_seq == is->is_icmp.ics_seq) &&
745251662Sdim				     (icmp->icmp_type == ICMP_ECHO)) ||
746251662Sdim				    (is->is_type - 1 == ic->icmp_type)) {
747251662Sdim				    	ips_stats.iss_hits++;
748251662Sdim    		                        is->is_pkts++;
749251662Sdim                	                is->is_bytes += ip->ip_len;
750251662Sdim					fr = is->is_rule;
751251662Sdim					RWLOCK_EXIT(&ipf_state);
752251662Sdim					return fr;
753251662Sdim				}
754251662Sdim			}
755251662Sdim		RWLOCK_EXIT(&ipf_state);
756251662Sdim		return NULL;
757251662Sdim	};
758251662Sdim
759251662Sdim	if ((oip->ip_p != IPPROTO_TCP) && (oip->ip_p != IPPROTO_UDP))
760251662Sdim		return NULL;
761251662Sdim
762251662Sdim	tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
763251662Sdim	dport = tcp->th_dport;
764251662Sdim	sport = tcp->th_sport;
765251662Sdim
766251662Sdim	hv = (pr = oip->ip_p);
767251662Sdim	hv += (src.s_addr = oip->ip_src.s_addr);
768251662Sdim	hv += (dst.s_addr = oip->ip_dst.s_addr);
769251662Sdim	hv += dport;
770251662Sdim	hv += sport;
771251662Sdim	hv %= fr_statesize;
772251662Sdim	/*
773251662Sdim	 * we make an fin entry to be able to feed it to
774251662Sdim	 * matchsrcdst note that not all fields are encessary
775251662Sdim	 * but this is the cleanest way. Note further we fill
776251662Sdim	 * in fin_mp such that if someone uses it we'll get
777251662Sdim	 * a kernel panic. fr_matchsrcdst does not use this.
778251662Sdim	 *
779251662Sdim	 * watch out here, as ip is in host order and oip in network
780251662Sdim	 * order. Any change we make must be undone afterwards.
781251662Sdim	 */
782251662Sdim	savelen = oip->ip_len;
783251662Sdim	oip->ip_len = ip->ip_len - (ip->ip_hl << 2) - ICMPERR_ICMPHLEN;
784251662Sdim	fr_makefrip(oip->ip_hl << 2, oip, &ofin);
785251662Sdim	oip->ip_len = savelen;
786251662Sdim	ofin.fin_ifp = fin->fin_ifp;
787251662Sdim	ofin.fin_out = !fin->fin_out;
788251662Sdim	ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
789263508Sdim	READ_ENTER(&ipf_state);
790263508Sdim	for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_next) {
791263508Sdim		/*
792263508Sdim		 * Only allow this icmp though if the
793263508Sdim		 * encapsulated packet was allowed through the
794263508Sdim		 * other way around. Note that the minimal amount
795263508Sdim		 * of info present does not allow for checking against
796263508Sdim		 * tcp internals such as seq and ack numbers.
797263508Sdim		 */
798263508Sdim		if ((is->is_p == pr) &&
799263508Sdim		    fr_matchsrcdst(is, src, dst, &ofin, tcp)) {
800263508Sdim			fr = is->is_rule;
801263508Sdim			ips_stats.iss_hits++;
802263508Sdim			/*
803251662Sdim			 * we must swap src and dst here because the icmp
804251662Sdim			 * comes the other way around
805251662Sdim			 */
806251662Sdim			is->is_pkts++;
807251662Sdim			is->is_bytes += ip->ip_len;
808251662Sdim			/*
809251662Sdim			 * we deliberately do not touch the timeouts
810251662Sdim			 * for the accompanying state table entry.
811251662Sdim			 * It remains to be seen if that is correct. XXX
812251662Sdim			 */
813251662Sdim			RWLOCK_EXIT(&ipf_state);
814251662Sdim			return fr;
815251662Sdim		}
816251662Sdim	}
817251662Sdim	RWLOCK_EXIT(&ipf_state);
818251662Sdim	return NULL;
819251662Sdim}
820251662Sdim
821251662Sdim/*
822251662Sdim * Check if a packet has a registered state.
823251662Sdim */
824251662Sdimfrentry_t *fr_checkstate(ip, fin)
825251662Sdimip_t *ip;
826251662Sdimfr_info_t *fin;
827251662Sdim{
828251662Sdim	register struct in_addr dst, src;
829251662Sdim	register ipstate_t *is, **isp;
830251662Sdim	register u_char pr;
831251662Sdim	u_int hv, hvm, hlen, tryagain, pass;
832251662Sdim	struct icmp *ic;
833251662Sdim	frentry_t *fr;
834251662Sdim	tcphdr_t *tcp;
835251662Sdim
836251662Sdim	if ((ip->ip_off & IP_OFFMASK) || (fin->fin_fi.fi_fl & FI_SHORT))
837251662Sdim		return NULL;
838251662Sdim
839251662Sdim	is = NULL;
840251662Sdim	hlen = fin->fin_hlen;
841251662Sdim	tcp = (tcphdr_t *)((char *)ip + hlen);
842251662Sdim	ic = (struct icmp *)tcp;
843251662Sdim	hv = (pr = ip->ip_p);
844251662Sdim	hv += (src.s_addr = ip->ip_src.s_addr);
845251662Sdim	hv += (dst.s_addr = ip->ip_dst.s_addr);
846193323Sed
847193323Sed	/*
848193323Sed	 * Search the hash table for matching packet header info.
849193323Sed	 */
850193323Sed	switch (ip->ip_p)
851193323Sed	{
852243830Sdim	case IPPROTO_ICMP :
853193323Sed		if ((ic->icmp_type == ICMP_ECHO) ||
854243830Sdim		    (ic->icmp_type == ICMP_ECHOREPLY)) {
855193323Sed			hv += ic->icmp_id;
856193323Sed			hv += ic->icmp_seq;
857193323Sed		}
858251662Sdim		hv %= fr_statesize;
859193323Sed		READ_ENTER(&ipf_state);
860193323Sed		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_next)
861193323Sed			if ((is->is_p == pr) &&
862193323Sed			    fr_matchsrcdst(is, src, dst, fin, NULL)) {
863251662Sdim				if ((is->is_type == ICMP_ECHOREPLY) &&
864249423Sdim				    (ic->icmp_type == ICMP_ECHO) &&
865249423Sdim				    (ic->icmp_id == is->is_icmp.ics_id) &&
866193323Sed				    (ic->icmp_seq == is->is_icmp.ics_seq))
867198892Srdivacky					;
868198892Srdivacky				else if (is->is_type != ic->icmp_type)
869198892Srdivacky					continue;
870193323Sed				is->is_age = fr_icmptimeout;
871249423Sdim				break;
872249423Sdim			}
873249423Sdim		if (is != NULL)
874251662Sdim			break;
875251662Sdim		RWLOCK_EXIT(&ipf_state);
876243830Sdim		/*
877193323Sed		 * No matching icmp state entry. Perhaps this is a
878251662Sdim		 * response to another state entry.
879251662Sdim		 */
880243830Sdim		fr = fr_checkicmpmatchingstate(ip, fin);
881193323Sed		if (fr)
882251662Sdim			return fr;
883243830Sdim		break;
884193323Sed	case IPPROTO_TCP :
885251662Sdim	    {
886243830Sdim		register u_short dport = tcp->th_dport, sport = tcp->th_sport;
887193323Sed
888251662Sdim		tryagain = 0;
889243830Sdimretry_tcp:
890193323Sed		hvm = hv % fr_statesize;
891193323Sed		WRITE_ENTER(&ipf_state);
892193323Sed		for (isp = &ips_table[hvm]; (is = *isp);
893193323Sed		     isp = &is->is_next)
894193323Sed			if ((is->is_p == pr) &&
895234353Sdim			    fr_matchsrcdst(is, src, dst, fin, tcp)) {
896251662Sdim				if (fr_tcpstate(is, fin, ip, tcp)) {
897243830Sdim#ifndef	_KERNEL
898193323Sed					if (tcp->th_flags & TCP_CLOSE) {
899234353Sdim						*isp = is->is_next;
900234353Sdim						isp = &ips_table[hvm];
901234353Sdim						if (ips_table[hvm] == NULL)
902251662Sdim							ips_stats.iss_inuse--;
903243830Sdim						fr_delstate(is);
904234353Sdim						ips_num--;
905193323Sed					}
906249423Sdim#endif
907249423Sdim					break;
908249423Sdim				}
909251662Sdim				is = NULL;
910193323Sed				break;
911251662Sdim			}
912251662Sdim		if (is != NULL)
913251662Sdim			break;
914263508Sdim		RWLOCK_EXIT(&ipf_state);
915251662Sdim		hv += dport;
916193323Sed		hv += sport;
917193323Sed		if (tryagain == 0) {
918193323Sed			tryagain = 1;
919243830Sdim			goto retry_tcp;
920193323Sed		}
921193323Sed		break;
922193323Sed	    }
923193323Sed	case IPPROTO_UDP :
924218893Sdim	    {
925193323Sed		register u_short dport = tcp->th_dport, sport = tcp->th_sport;
926193323Sed
927263508Sdim		tryagain = 0;
928263508Sdimretry_udp:
929193323Sed		hvm = hv % fr_statesize;
930193323Sed		/*
931193323Sed		 * Nothing else to match on but ports. and IP#'s
932193323Sed		 */
933243830Sdim		READ_ENTER(&ipf_state);
934251662Sdim		for (is = ips_table[hvm]; is; is = is->is_next)
935243830Sdim			if ((is->is_p == pr) &&
936263508Sdim			    fr_matchsrcdst(is, src, dst, fin, tcp)) {
937251662Sdim				is->is_age = fr_udptimeout;
938263508Sdim				break;
939263508Sdim			}
940263508Sdim		if (is != NULL)
941251662Sdim			break;
942251662Sdim		RWLOCK_EXIT(&ipf_state);
943263508Sdim		hv += dport;
944263508Sdim		hv += sport;
945239462Sdim		if (tryagain == 0) {
946263508Sdim			tryagain = 1;
947263508Sdim			goto retry_udp;
948251662Sdim		}
949263508Sdim		break;
950251662Sdim	    }
951263508Sdim	default :
952263508Sdim		break;
953263508Sdim	}
954263508Sdim	if (is == NULL) {
955263508Sdim		ATOMIC_INC(ips_stats.iss_miss);
956263508Sdim		return NULL;
957263508Sdim	}
958263508Sdim	MUTEX_ENTER(&ipf_rw);
959251662Sdim	is->is_bytes += ip->ip_len;
960251662Sdim	ips_stats.iss_hits++;
961239462Sdim	is->is_pkts++;
962243830Sdim	MUTEX_EXIT(&ipf_rw);
963243830Sdim	fr = is->is_rule;
964243830Sdim	fin->fin_fr = fr;
965243830Sdim	pass = is->is_pass;
966263508Sdim	RWLOCK_EXIT(&ipf_state);
967263508Sdim	if (fin->fin_fi.fi_fl & FI_FRAG)
968263508Sdim		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
969263508Sdim	return fr;
970263508Sdim}
971263508Sdim
972263508Sdim
973263508Sdimstatic void fr_delstate(is)
974263508Sdimipstate_t *is;
975263508Sdim{
976263508Sdim	frentry_t *fr;
977263508Sdim
978263508Sdim	fr = is->is_rule;
979263508Sdim	if (fr != NULL) {
980263508Sdim		ATOMIC_DEC(fr->fr_ref);
981263508Sdim		if (fr->fr_ref == 0)
982263508Sdim			KFREE(fr);
983263508Sdim	}
984263508Sdim	KFREE(is);
985263508Sdim}
986239462Sdim
987193323Sed
988193323Sed/*
989249423Sdim * Free memory in use by all state info. kept.
990249423Sdim */
991249423Sdimvoid fr_stateunload()
992249423Sdim{
993249423Sdim	register int i;
994193323Sed	register ipstate_t *is, **isp;
995193323Sed
996193323Sed	WRITE_ENTER(&ipf_state);
997234353Sdim	for (i = fr_statesize - 1; i >= 0; i--)
998193323Sed		for (isp = &ips_table[i]; (is = *isp); ) {
999193323Sed			*isp = is->is_next;
1000249423Sdim			fr_delstate(is);
1001249423Sdim			ips_num--;
1002263508Sdim		}
1003249423Sdim	ips_stats.iss_inuse = 0;
1004263508Sdim	ips_num = 0;
1005263508Sdim	RWLOCK_EXIT(&ipf_state);
1006263508Sdim	KFREES(ips_table, fr_statesize * sizeof(ipstate_t *));
1007263508Sdim	ips_table = NULL;
1008263508Sdim}
1009263508Sdim
1010263508Sdim
1011193323Sed/*
1012193323Sed * Slowly expire held state for thingslike UDP and ICMP.  Timeouts are set
1013249423Sdim * in expectation of this being called twice per second.
1014249423Sdim */
1015249423Sdimvoid fr_timeoutstate()
1016251662Sdim{
1017251662Sdim	register int i;
1018251662Sdim	register ipstate_t *is, **isp;
1019263508Sdim#if defined(_KERNEL) && !SOLARIS
1020193323Sed	int s;
1021263508Sdim#endif
1022263508Sdim
1023263508Sdim	SPL_NET(s);
1024263508Sdim	WRITE_ENTER(&ipf_state);
1025263508Sdim	for (i = fr_statesize - 1; i >= 0; i--)
1026263508Sdim		for (isp = &ips_table[i]; (is = *isp); )
1027263508Sdim			if (is->is_age && !--is->is_age) {
1028263508Sdim				*isp = is->is_next;
1029263508Sdim				if (is->is_p == IPPROTO_TCP)
1030263508Sdim					ips_stats.iss_fin++;
1031263508Sdim				else
1032263508Sdim					ips_stats.iss_expire++;
1033263508Sdim				if (ips_table[i] == NULL)
1034263508Sdim					ips_stats.iss_inuse--;
1035263508Sdim#ifdef	IPFILTER_LOG
1036263508Sdim				ipstate_log(is, ISL_EXPIRE);
1037263508Sdim#endif
1038263508Sdim				fr_delstate(is);
1039263508Sdim				ips_num--;
1040263508Sdim			} else
1041263508Sdim				isp = &is->is_next;
1042263508Sdim	RWLOCK_EXIT(&ipf_state);
1043263508Sdim	SPL_X(s);
1044263508Sdim	if (fr_state_doflush) {
1045263508Sdim		(void) fr_state_flush(1);
1046263508Sdim		fr_state_doflush = 0;
1047263508Sdim	}
1048263508Sdim}
1049263508Sdim
1050263508Sdim
1051263508Sdim/*
1052263508Sdim * Original idea freom Pradeep Krishnan for use primarily with NAT code.
1053263508Sdim * (pkrishna@netcom.com)
1054263508Sdim */
1055263508Sdimvoid fr_tcp_age(age, state, ip, fin, dir)
1056263508Sdimu_long *age;
1057263508Sdimu_char *state;
1058263508Sdimip_t *ip;
1059263508Sdimfr_info_t *fin;
1060263508Sdimint dir;
1061263508Sdim{
1062263508Sdim	tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
1063263508Sdim	u_char flags = tcp->th_flags;
1064263508Sdim	int dlen, ostate;
1065263508Sdim
1066263508Sdim	ostate = state[1 - dir];
1067263508Sdim
1068263508Sdim	dlen = ip->ip_len - fin->fin_hlen - (tcp->th_off << 2);
1069193323Sed
1070193323Sed	if (flags & TH_RST) {
1071193323Sed		if (!(tcp->th_flags & TH_PUSH) && !dlen) {
1072193323Sed			*age = fr_tcpclosed;
1073239462Sdim			state[dir] = TCPS_CLOSED;
1074193323Sed		} else {
1075193323Sed			*age = fr_tcpclosewait;
1076193323Sed			state[dir] = TCPS_CLOSE_WAIT;
1077193323Sed		}
1078193323Sed		return;
1079263508Sdim	}
1080193323Sed
1081193323Sed	*age = fr_tcptimeout; /* 1 min */
1082193323Sed
1083193323Sed	switch(state[dir])
1084239462Sdim	{
1085193323Sed	case TCPS_CLOSED:
1086193323Sed		if ((flags & (TH_FIN|TH_SYN|TH_RST|TH_ACK)) == TH_ACK) {
1087193323Sed			state[dir] = TCPS_ESTABLISHED;
1088193323Sed			*age = fr_tcpidletimeout;
1089249423Sdim		}
1090249423Sdim	case TCPS_FIN_WAIT_2:
1091193323Sed		if ((flags & TH_OPENING) == TH_OPENING)
1092193323Sed			state[dir] = TCPS_SYN_RECEIVED;
1093193323Sed		else if (flags & TH_SYN)
1094193323Sed			state[dir] = TCPS_SYN_SENT;
1095193323Sed		break;
1096193323Sed	case TCPS_SYN_RECEIVED:
1097193323Sed	case TCPS_SYN_SENT:
1098193323Sed		if ((flags & (TH_FIN|TH_ACK)) == TH_ACK) {
1099193323Sed			state[dir] = TCPS_ESTABLISHED;
1100193323Sed			*age = fr_tcpidletimeout;
1101193323Sed		} else if ((flags & (TH_FIN|TH_ACK)) == (TH_FIN|TH_ACK)) {
1102193323Sed			state[dir] = TCPS_CLOSE_WAIT;
1103193323Sed			if (!(flags & TH_PUSH) && !dlen &&
1104263508Sdim			    ostate > TCPS_ESTABLISHED)
1105193323Sed				*age  = fr_tcplastack;
1106193323Sed			else
1107193323Sed				*age  = fr_tcpclosewait;
1108263508Sdim		}
1109263508Sdim		break;
1110249423Sdim	case TCPS_ESTABLISHED:
1111263508Sdim		if (flags & TH_FIN) {
1112251662Sdim			state[dir] = TCPS_CLOSE_WAIT;
1113249423Sdim			if (!(flags & TH_PUSH) && !dlen &&
1114249423Sdim			    ostate > TCPS_ESTABLISHED)
1115249423Sdim				*age  = fr_tcplastack;
1116249423Sdim			else
1117249423Sdim				*age  = fr_tcpclosewait;
1118249423Sdim		} else {
1119249423Sdim			if (ostate < TCPS_CLOSE_WAIT)
1120249423Sdim				*age = fr_tcpidletimeout;
1121249423Sdim		}
1122193323Sed		break;
1123249423Sdim	case TCPS_CLOSE_WAIT:
1124249423Sdim		if ((flags & TH_FIN) && !(flags & TH_PUSH) && !dlen &&
1125249423Sdim		    ostate > TCPS_ESTABLISHED) {
1126249423Sdim			*age  = fr_tcplastack;
1127249423Sdim			state[dir] = TCPS_LAST_ACK;
1128263508Sdim		} else
1129263508Sdim			*age  = fr_tcpclosewait;
1130263508Sdim		break;
1131263508Sdim	case TCPS_LAST_ACK:
1132263508Sdim		if (flags & TH_ACK) {
1133263508Sdim			state[dir] = TCPS_FIN_WAIT_2;
1134193323Sed			if (!(flags & TH_PUSH) && !dlen &&
1135193323Sed			    ostate > TCPS_ESTABLISHED)
1136193323Sed				*age  = fr_tcplastack;
1137193323Sed			else {
1138193323Sed				*age  = fr_tcpclosewait;
1139193323Sed				state[dir] = TCPS_CLOSE_WAIT;
1140193323Sed			}
1141193323Sed		}
1142193323Sed		break;
1143193323Sed	}
1144193323Sed}
1145193323Sed
1146193323Sed
1147193323Sed#ifdef	IPFILTER_LOG
1148193323Sedvoid ipstate_log(is, type)
1149193323Sedstruct ipstate *is;
1150193323Sedu_int type;
1151193323Sed{
1152193323Sed	struct	ipslog	ipsl;
1153193323Sed	void *items[1];
1154193323Sed	size_t sizes[1];
1155193323Sed	int types[1];
1156193323Sed
1157193323Sed	ipsl.isl_type = type;
1158193323Sed	ipsl.isl_pkts = is->is_pkts;
1159193323Sed	ipsl.isl_bytes = is->is_bytes;
1160234353Sdim	ipsl.isl_src = is->is_src;
1161234353Sdim	ipsl.isl_dst = is->is_dst;
1162234353Sdim	ipsl.isl_p = is->is_p;
1163193323Sed	ipsl.isl_flags = is->is_flags;
1164198892Srdivacky	if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
1165221345Sdim		ipsl.isl_sport = is->is_sport;
1166193323Sed		ipsl.isl_dport = is->is_dport;
1167251662Sdim		if (ipsl.isl_p == IPPROTO_TCP) {
1168249423Sdim			ipsl.isl_state[0] = is->is_state[0];
1169193323Sed			ipsl.isl_state[1] = is->is_state[1];
1170251662Sdim		}
1171249423Sdim	} else if (ipsl.isl_p == IPPROTO_ICMP)
1172193323Sed		ipsl.isl_itype = is->is_icmp.ics_type;
1173251662Sdim	else {
1174249423Sdim		ipsl.isl_ps.isl_filler[0] = 0;
1175193323Sed		ipsl.isl_ps.isl_filler[1] = 0;
1176251662Sdim	}
1177249423Sdim	items[0] = &ipsl;
1178193323Sed	sizes[0] = sizeof(ipsl);
1179251662Sdim	types[0] = 0;
1180249423Sdim
1181193323Sed	(void) ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1);
1182251662Sdim}
1183249423Sdim#endif
1184193323Sed
1185251662Sdim
1186249423Sdimvoid ip_statesync(ifp)
1187193323Sedvoid *ifp;
1188251662Sdim{
1189249423Sdim	register ipstate_t *is;
1190193323Sed	register int i;
1191251662Sdim
1192249423Sdim	WRITE_ENTER(&ipf_state);
1193193323Sed	for (i = fr_statesize - 1; i >= 0; i--)
1194251662Sdim		for (is = ips_table[i]; is != NULL; is = is->is_next) {
1195249423Sdim			if (is->is_ifpin == ifp)
1196193323Sed				is->is_ifpin = NULL;
1197251662Sdim			if (is->is_ifpout == ifp)
1198249423Sdim				is->is_ifpout = NULL;
1199193323Sed		}
1200251662Sdim	RWLOCK_EXIT(&ipf_state);
1201249423Sdim}
1202193323Sed