ip_state.c revision 92685
153642Sguido/*
292685Sdarrenr * Copyright (C) 1995-2002 by Darren Reed.
353642Sguido *
480482Sdarrenr * See the IPFILTER.LICENCE file for details on licencing.
553642Sguido */
653642Sguido
792685Sdarrenr#ifdef __sgi
892685Sdarrenr# include <sys/ptimers.h>
992685Sdarrenr#endif
1053642Sguido#include <sys/errno.h>
1153642Sguido#include <sys/types.h>
1253642Sguido#include <sys/param.h>
1353642Sguido#include <sys/file.h>
1453642Sguido#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
1553642Sguido    defined(_KERNEL)
1653642Sguido# include "opt_ipfilter_log.h"
1753642Sguido#endif
1860854Sdarrenr#if defined(_KERNEL) && defined(__FreeBSD_version) && \
1960854Sdarrenr    (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
2060854Sdarrenr#include "opt_inet6.h"
2160854Sdarrenr#endif
2253642Sguido#if !defined(_KERNEL) && !defined(KERNEL) && !defined(__KERNEL__)
2353642Sguido# include <stdio.h>
2453642Sguido# include <stdlib.h>
2553642Sguido# include <string.h>
2653642Sguido#else
2753642Sguido# ifdef linux
2853642Sguido#  include <linux/kernel.h>
2953642Sguido#  include <linux/module.h>
3053642Sguido# endif
3153642Sguido#endif
3260854Sdarrenr#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
3353642Sguido# include <sys/filio.h>
3453642Sguido# include <sys/fcntl.h>
3553642Sguido# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
3653642Sguido#  include "opt_ipfilter.h"
3753642Sguido# endif
3853642Sguido#else
3953642Sguido# include <sys/ioctl.h>
4053642Sguido#endif
4153642Sguido#include <sys/time.h>
4253642Sguido#ifndef linux
4353642Sguido# include <sys/protosw.h>
4453642Sguido#endif
4553642Sguido#include <sys/socket.h>
4657096Sguido#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux)
4753642Sguido# include <sys/systm.h>
4853642Sguido#endif
4953642Sguido#if !defined(__SVR4) && !defined(__svr4__)
5053642Sguido# ifndef linux
5153642Sguido#  include <sys/mbuf.h>
5253642Sguido# endif
5353642Sguido#else
5453642Sguido# include <sys/filio.h>
5553642Sguido# include <sys/byteorder.h>
5653642Sguido# ifdef _KERNEL
5753642Sguido#  include <sys/dditypes.h>
5853642Sguido# endif
5953642Sguido# include <sys/stream.h>
6053642Sguido# include <sys/kmem.h>
6153642Sguido#endif
6253642Sguido
6353642Sguido#include <net/if.h>
6453642Sguido#ifdef sun
6553642Sguido# include <net/af.h>
6653642Sguido#endif
6753642Sguido#include <net/route.h>
6853642Sguido#include <netinet/in.h>
6953642Sguido#include <netinet/in_systm.h>
7053642Sguido#include <netinet/ip.h>
7153642Sguido#include <netinet/tcp.h>
7253642Sguido#ifndef linux
7353642Sguido# include <netinet/ip_var.h>
7453642Sguido# include <netinet/tcp_fsm.h>
7553642Sguido#endif
7653642Sguido#include <netinet/udp.h>
7753642Sguido#include <netinet/ip_icmp.h>
7853642Sguido#include "netinet/ip_compat.h"
7953642Sguido#include <netinet/tcpip.h>
8053642Sguido#include "netinet/ip_fil.h"
8153642Sguido#include "netinet/ip_nat.h"
8253642Sguido#include "netinet/ip_frag.h"
8353642Sguido#include "netinet/ip_state.h"
8460854Sdarrenr#ifdef	USE_INET6
8560854Sdarrenr#include <netinet/icmp6.h>
8660854Sdarrenr#endif
8753642Sguido#if (__FreeBSD_version >= 300000)
8853642Sguido# include <sys/malloc.h>
8953642Sguido# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM)
9053642Sguido#  include <sys/libkern.h>
9153642Sguido#  include <sys/systm.h>
9253642Sguido# endif
9353642Sguido#endif
9453642Sguido
9580482Sdarrenr#if !defined(lint)
9680482Sdarrenrstatic const char sccsid[] = "@(#)ip_state.c	1.8 6/5/96 (C) 1993-2000 Darren Reed";
9780482Sdarrenr/* static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.30.2.38 2001/07/23 13:49:46 darrenr Exp $"; */
9880482Sdarrenrstatic const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_state.c 92685 2002-03-19 11:44:16Z darrenr $";
9980482Sdarrenr#endif
10080482Sdarrenr
10153642Sguido#ifndef	MIN
10253642Sguido# define	MIN(a,b)	(((a)<(b))?(a):(b))
10353642Sguido#endif
10453642Sguido
10553642Sguido#define	TCP_CLOSE	(TH_FIN|TH_RST)
10653642Sguido
10760854Sdarrenrstatic ipstate_t **ips_table = NULL;
10860854Sdarrenrstatic int	ips_num = 0;
10967614Sdarrenrstatic int	ips_wild = 0;
11060854Sdarrenrstatic ips_stat_t ips_stats;
11153642Sguido#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
11253642Sguidoextern	KRWLOCK_T	ipf_state, ipf_mutex;
11353642Sguidoextern	kmutex_t	ipf_rw;
11453642Sguido#endif
11553642Sguido
11660854Sdarrenr#ifdef	USE_INET6
11760854Sdarrenrstatic frentry_t *fr_checkicmp6matchingstate __P((ip6_t *, fr_info_t *));
11860854Sdarrenr#endif
11960854Sdarrenrstatic int fr_matchsrcdst __P((ipstate_t *, union i6addr, union i6addr,
12053642Sguido			       fr_info_t *, tcphdr_t *));
12153642Sguidostatic frentry_t *fr_checkicmpmatchingstate __P((ip_t *, fr_info_t *));
12260854Sdarrenrstatic int fr_matchicmpqueryreply __P((int, ipstate_t *, icmphdr_t *));
12353642Sguidostatic int fr_state_flush __P((int));
12453642Sguidostatic ips_stat_t *fr_statetstats __P((void));
12553642Sguidostatic void fr_delstate __P((ipstate_t *));
12660854Sdarrenrstatic int fr_state_remove __P((caddr_t));
12767614Sdarrenrstatic void fr_ipsmove __P((ipstate_t **, ipstate_t *, u_int));
12860854Sdarrenrint fr_stputent __P((caddr_t));
12960854Sdarrenrint fr_stgetent __P((caddr_t));
13060854Sdarrenrvoid fr_stinsert __P((ipstate_t *));
13153642Sguido
13253642Sguido
13353642Sguido#define	FIVE_DAYS	(2 * 5 * 86400)	/* 5 days: half closed session */
13453642Sguido
13553642Sguido#define	TCP_MSL	240			/* 2 minutes */
13653642Sguidou_long	fr_tcpidletimeout = FIVE_DAYS,
13753642Sguido	fr_tcpclosewait = 2 * TCP_MSL,
13853642Sguido	fr_tcplastack = 2 * TCP_MSL,
13953642Sguido	fr_tcptimeout = 2 * TCP_MSL,
14067614Sdarrenr	fr_tcpclosed = 120,
14167614Sdarrenr	fr_tcphalfclosed = 2 * 2 * 3600,    /* 2 hours */
14253642Sguido	fr_udptimeout = 240,
14380482Sdarrenr	fr_udpacktimeout = 24,
14480482Sdarrenr	fr_icmptimeout = 120,
14580482Sdarrenr	fr_icmpacktimeout = 12;
14653642Sguidoint	fr_statemax = IPSTATE_MAX,
14753642Sguido	fr_statesize = IPSTATE_SIZE;
14860854Sdarrenrint	fr_state_doflush = 0,
14960854Sdarrenr	fr_state_lock = 0;
15092685Sdarrenripstate_t *ips_list = NULL;
15153642Sguido
15260854Sdarrenrstatic 	int icmpreplytype4[ICMP_MAXTYPE + 1];
15392685Sdarrenr#ifdef	USE_INET6
15492685Sdarrenrstatic 	int icmpreplytype6[ICMP6_MAXTYPE + 1];
15592685Sdarrenr#endif
15653642Sguido
15753642Sguidoint fr_stateinit()
15853642Sguido{
15960854Sdarrenr	int i;
16060854Sdarrenr
16153642Sguido	KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *));
16253642Sguido	if (ips_table != NULL)
16353642Sguido		bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *));
16453642Sguido	else
16553642Sguido		return -1;
16660854Sdarrenr
16760854Sdarrenr	/* fill icmp reply type table */
16860854Sdarrenr	for (i = 0; i <= ICMP_MAXTYPE; i++)
16960854Sdarrenr		icmpreplytype4[i] = -1;
17060854Sdarrenr	icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
17160854Sdarrenr	icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
17260854Sdarrenr	icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
17360854Sdarrenr	icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
17492685Sdarrenr#ifdef	USE_INET6
17592685Sdarrenr	/* fill icmp reply type table */
17692685Sdarrenr	for (i = 0; i <= ICMP6_MAXTYPE; i++)
17792685Sdarrenr		icmpreplytype6[i] = -1;
17892685Sdarrenr	icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY;
17992685Sdarrenr	icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT;
18092685Sdarrenr	icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY;
18192685Sdarrenr	icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT;
18292685Sdarrenr	icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT;
18392685Sdarrenr#endif
18460854Sdarrenr
18553642Sguido	return 0;
18653642Sguido}
18753642Sguido
18853642Sguido
18953642Sguidostatic ips_stat_t *fr_statetstats()
19053642Sguido{
19153642Sguido	ips_stats.iss_active = ips_num;
19253642Sguido	ips_stats.iss_table = ips_table;
19360854Sdarrenr	ips_stats.iss_list = ips_list;
19453642Sguido	return &ips_stats;
19553642Sguido}
19653642Sguido
19753642Sguido
19853642Sguido/*
19953642Sguido * flush state tables.  two actions currently defined:
20053642Sguido * which == 0 : flush all state table entries
20153642Sguido * which == 1 : flush TCP connections which have started to close but are
20264580Sdarrenr *	        stuck for some reason.
20392685Sdarrenr * which == 2 : flush TCP connections which have been idle for a long time,
20492685Sdarrenr *              starting at > 4 days idle and working back in successive half-
20592685Sdarrenr *              days to at most 12 hours old.
20653642Sguido */
20753642Sguidostatic int fr_state_flush(which)
20853642Sguidoint which;
20953642Sguido{
21092685Sdarrenr	ipstate_t *is, **isp;
21153642Sguido#if defined(_KERNEL) && !SOLARIS
21253642Sguido	int s;
21353642Sguido#endif
21492685Sdarrenr	int delete, removed = 0, try;
21553642Sguido
21653642Sguido	SPL_NET(s);
21760854Sdarrenr	for (isp = &ips_list; (is = *isp); ) {
21860854Sdarrenr		delete = 0;
21953642Sguido
22060854Sdarrenr		switch (which)
22160854Sdarrenr		{
22260854Sdarrenr		case 0 :
22360854Sdarrenr			delete = 1;
22460854Sdarrenr			break;
22560854Sdarrenr		case 1 :
22692685Sdarrenr		case 2 :
22760854Sdarrenr			if (is->is_p != IPPROTO_TCP)
22860854Sdarrenr				break;
22960854Sdarrenr			if ((is->is_state[0] != TCPS_ESTABLISHED) ||
23060854Sdarrenr			    (is->is_state[1] != TCPS_ESTABLISHED))
23153642Sguido				delete = 1;
23260854Sdarrenr			break;
23360854Sdarrenr		}
23453642Sguido
23560854Sdarrenr		if (delete) {
23660854Sdarrenr			if (is->is_p == IPPROTO_TCP)
23760854Sdarrenr				ips_stats.iss_fin++;
23860854Sdarrenr			else
23960854Sdarrenr				ips_stats.iss_expire++;
24053642Sguido#ifdef	IPFILTER_LOG
24160854Sdarrenr			ipstate_log(is, ISL_FLUSH);
24253642Sguido#endif
24360854Sdarrenr			fr_delstate(is);
24460854Sdarrenr			removed++;
24560854Sdarrenr		} else
24660854Sdarrenr			isp = &is->is_next;
24760854Sdarrenr	}
24892685Sdarrenr
24992685Sdarrenr	/*
25092685Sdarrenr	 * Asked to remove inactive entries, try again if first attempt
25192685Sdarrenr	 * failed.  In this case, 86400 is half a day because the counter is
25292685Sdarrenr	 * activated every half second.
25392685Sdarrenr	 */
25492685Sdarrenr	if ((which == 2) && (removed == 0)) {
25592685Sdarrenr		try = 86400;	/* half a day */
25692685Sdarrenr		for (; (try < FIVE_DAYS) && (removed == 0); try += 86400) {
25792685Sdarrenr			for (isp = &ips_list; (is = *isp); ) {
25892685Sdarrenr				delete = 0;
25992685Sdarrenr				if ((is->is_p == IPPROTO_TCP) &&
26092685Sdarrenr				    ((is->is_state[0] == TCPS_ESTABLISHED) ||
26192685Sdarrenr				     (is->is_state[1] == TCPS_ESTABLISHED)) &&
26292685Sdarrenr				    (is->is_age < try)) {
26392685Sdarrenr					ips_stats.iss_fin++;
26492685Sdarrenr					delete = 1;
26592685Sdarrenr				} else if ((is->is_p != IPPROTO_TCP) &&
26692685Sdarrenr					   (is->is_pkts > 1)) {
26792685Sdarrenr					ips_stats.iss_expire++;
26892685Sdarrenr					delete = 1;
26992685Sdarrenr				}
27092685Sdarrenr				if (delete) {
27192685Sdarrenr#ifdef	IPFILTER_LOG
27292685Sdarrenr					ipstate_log(is, ISL_FLUSH);
27392685Sdarrenr#endif
27492685Sdarrenr					fr_delstate(is);
27592685Sdarrenr					removed++;
27692685Sdarrenr				} else
27792685Sdarrenr					isp = &is->is_next;
27892685Sdarrenr			}
27992685Sdarrenr		}
28092685Sdarrenr	}
28192685Sdarrenr
28253642Sguido	SPL_X(s);
28353642Sguido	return removed;
28453642Sguido}
28553642Sguido
28653642Sguido
28760854Sdarrenrstatic int fr_state_remove(data)
28860854Sdarrenrcaddr_t data;
28960854Sdarrenr{
29060854Sdarrenr	ipstate_t *sp, st;
29160854Sdarrenr	int error;
29260854Sdarrenr
29360854Sdarrenr	sp = &st;
29460854Sdarrenr	error = IRCOPYPTR(data, (caddr_t)&st, sizeof(st));
29560854Sdarrenr	if (error)
29660854Sdarrenr		return EFAULT;
29760854Sdarrenr
29880482Sdarrenr	WRITE_ENTER(&ipf_state);
29960854Sdarrenr	for (sp = ips_list; sp; sp = sp->is_next)
30060854Sdarrenr		if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
30167614Sdarrenr		    !bcmp((char *)&sp->is_src, (char *)&st.is_src,
30267614Sdarrenr			  sizeof(st.is_src)) &&
30367614Sdarrenr		    !bcmp((char *)&sp->is_dst, (char *)&st.is_src,
30467614Sdarrenr			  sizeof(st.is_dst)) &&
30567614Sdarrenr		    !bcmp((char *)&sp->is_ps, (char *)&st.is_ps,
30667614Sdarrenr			  sizeof(st.is_ps))) {
30760854Sdarrenr#ifdef	IPFILTER_LOG
30860854Sdarrenr			ipstate_log(sp, ISL_REMOVE);
30960854Sdarrenr#endif
31060854Sdarrenr			fr_delstate(sp);
31160854Sdarrenr			RWLOCK_EXIT(&ipf_state);
31260854Sdarrenr			return 0;
31360854Sdarrenr		}
31480482Sdarrenr	RWLOCK_EXIT(&ipf_state);
31560854Sdarrenr	return ESRCH;
31660854Sdarrenr}
31760854Sdarrenr
31860854Sdarrenr
31953642Sguidoint fr_state_ioctl(data, cmd, mode)
32053642Sguidocaddr_t data;
32153642Sguido#if defined(__NetBSD__) || defined(__OpenBSD__)
32253642Sguidou_long cmd;
32353642Sguido#else
32453642Sguidoint cmd;
32553642Sguido#endif
32653642Sguidoint mode;
32753642Sguido{
32860854Sdarrenr	int arg, ret, error = 0;
32953642Sguido
33053642Sguido	switch (cmd)
33153642Sguido	{
33260854Sdarrenr	case SIOCDELST :
33360854Sdarrenr		error = fr_state_remove(data);
33460854Sdarrenr		break;
33553642Sguido	case SIOCIPFFL :
33660854Sdarrenr		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
33760854Sdarrenr		if (error)
33860854Sdarrenr			break;
33953642Sguido		if (arg == 0 || arg == 1) {
34060854Sdarrenr			WRITE_ENTER(&ipf_state);
34153642Sguido			ret = fr_state_flush(arg);
34260854Sdarrenr			RWLOCK_EXIT(&ipf_state);
34360854Sdarrenr			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
34453642Sguido		} else
34553642Sguido			error = EINVAL;
34653642Sguido		break;
34755929Sguido#ifdef	IPFILTER_LOG
34855929Sguido	case SIOCIPFFB :
34955929Sguido		if (!(mode & FWRITE))
35055929Sguido			error = EPERM;
35160854Sdarrenr		else {
35260854Sdarrenr			int tmp;
35360854Sdarrenr
35460854Sdarrenr			tmp = ipflog_clear(IPL_LOGSTATE);
35560854Sdarrenr			IWCOPY((char *)&tmp, data, sizeof(tmp));
35660854Sdarrenr		}
35755929Sguido		break;
35855929Sguido#endif
35960854Sdarrenr	case SIOCGETFS :
36060854Sdarrenr		error = IWCOPYPTR((caddr_t)fr_statetstats(), data,
36160854Sdarrenr				  sizeof(ips_stat_t));
36253642Sguido		break;
36353642Sguido	case FIONREAD :
36453642Sguido#ifdef	IPFILTER_LOG
36572006Sdarrenr		arg = (int)iplused[IPL_LOGSTATE];
36672006Sdarrenr		error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
36753642Sguido#endif
36853642Sguido		break;
36960854Sdarrenr	case SIOCSTLCK :
37060854Sdarrenr		error = fr_lock(data, &fr_state_lock);
37160854Sdarrenr		break;
37260854Sdarrenr	case SIOCSTPUT :
37360854Sdarrenr		if (!fr_state_lock) {
37460854Sdarrenr			error = EACCES;
37560854Sdarrenr			break;
37660854Sdarrenr		}
37760854Sdarrenr		error = fr_stputent(data);
37860854Sdarrenr		break;
37960854Sdarrenr	case SIOCSTGET :
38060854Sdarrenr		if (!fr_state_lock) {
38160854Sdarrenr			error = EACCES;
38260854Sdarrenr			break;
38360854Sdarrenr		}
38460854Sdarrenr		error = fr_stgetent(data);
38560854Sdarrenr		break;
38653642Sguido	default :
38753642Sguido		error = EINVAL;
38853642Sguido		break;
38953642Sguido	}
39053642Sguido	return error;
39153642Sguido}
39253642Sguido
39353642Sguido
39492685Sdarrenr/*
39592685Sdarrenr * Copy out state information from the kernel to a user space process.
39692685Sdarrenr */
39760854Sdarrenrint fr_stgetent(data)
39860854Sdarrenrcaddr_t data;
39960854Sdarrenr{
40060854Sdarrenr	register ipstate_t *is, *isn;
40192685Sdarrenr	ipstate_save_t ips;
40260854Sdarrenr	int error;
40360854Sdarrenr
40492685Sdarrenr	error = IRCOPYPTR(data, (caddr_t)&ips, sizeof(ips));
40560854Sdarrenr	if (error)
40692685Sdarrenr		return error;
40760854Sdarrenr
40860854Sdarrenr	isn = ips.ips_next;
40960854Sdarrenr	if (!isn) {
41060854Sdarrenr		isn = ips_list;
41160854Sdarrenr		if (isn == NULL) {
41260854Sdarrenr			if (ips.ips_next == NULL)
41360854Sdarrenr				return ENOENT;
41460854Sdarrenr			return 0;
41560854Sdarrenr		}
41660854Sdarrenr	} else {
41760854Sdarrenr		/*
41860854Sdarrenr		 * Make sure the pointer we're copying from exists in the
41960854Sdarrenr		 * current list of entries.  Security precaution to prevent
42060854Sdarrenr		 * copying of random kernel data.
42160854Sdarrenr		 */
42260854Sdarrenr		for (is = ips_list; is; is = is->is_next)
42360854Sdarrenr			if (is == isn)
42460854Sdarrenr				break;
42560854Sdarrenr		if (!is)
42660854Sdarrenr			return ESRCH;
42760854Sdarrenr	}
42860854Sdarrenr	ips.ips_next = isn->is_next;
42960854Sdarrenr	bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
43060854Sdarrenr	if (isn->is_rule)
43160854Sdarrenr		bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
43260854Sdarrenr		      sizeof(ips.ips_fr));
43392685Sdarrenr	error = IWCOPYPTR((caddr_t)&ips, data, sizeof(ips));
43460854Sdarrenr	if (error)
43564580Sdarrenr		error = EFAULT;
43664580Sdarrenr	return error;
43760854Sdarrenr}
43860854Sdarrenr
43960854Sdarrenr
44060854Sdarrenrint fr_stputent(data)
44160854Sdarrenrcaddr_t data;
44260854Sdarrenr{
44360854Sdarrenr	register ipstate_t *is, *isn;
44492685Sdarrenr	ipstate_save_t ips;
44592685Sdarrenr	int error, out, i;
44660854Sdarrenr	frentry_t *fr;
44792685Sdarrenr	char *name;
44860854Sdarrenr
44992685Sdarrenr	error = IRCOPYPTR(data, (caddr_t)&ips, sizeof(ips));
45060854Sdarrenr	if (error)
45192685Sdarrenr		return error;
45260854Sdarrenr
45360854Sdarrenr	KMALLOC(isn, ipstate_t *);
45460854Sdarrenr	if (isn == NULL)
45560854Sdarrenr		return ENOMEM;
45660854Sdarrenr
45760854Sdarrenr	bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
45860854Sdarrenr	fr = isn->is_rule;
45960854Sdarrenr	if (fr != NULL) {
46060854Sdarrenr		if (isn->is_flags & FI_NEWFR) {
46160854Sdarrenr			KMALLOC(fr, frentry_t *);
46260854Sdarrenr			if (fr == NULL) {
46360854Sdarrenr				KFREE(isn);
46460854Sdarrenr				return ENOMEM;
46560854Sdarrenr			}
46660854Sdarrenr			bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
46763523Sdarrenr			out = fr->fr_flags & FR_OUTQUE ? 1 : 0;
46860854Sdarrenr			isn->is_rule = fr;
46960854Sdarrenr			ips.ips_is.is_rule = fr;
47092685Sdarrenr
47192685Sdarrenr			/*
47292685Sdarrenr			 * Look up all the interface names in the rule.
47392685Sdarrenr			 */
47492685Sdarrenr			for (i = 0; i < 4; i++) {
47592685Sdarrenr				name = fr->fr_ifnames[i];
47692685Sdarrenr				if ((name[1] == '\0') &&
47792685Sdarrenr				    ((name[0] == '-') || (name[0] == '*'))) {
47892685Sdarrenr					fr->fr_ifas[i] = NULL;
47992685Sdarrenr				} else if (*name != '\0') {
48092685Sdarrenr					fr->fr_ifas[i] = GETUNIT(name,
48192685Sdarrenr								 fr->fr_v);
48292685Sdarrenr					if (fr->fr_ifas[i] == NULL)
48392685Sdarrenr						fr->fr_ifas[i] = (void *)-1;
48492685Sdarrenr					else {
48592685Sdarrenr						strncpy(isn->is_ifname[i],
48692685Sdarrenr							IFNAME(fr->fr_ifas[i]),
48792685Sdarrenr							IFNAMSIZ);
48892685Sdarrenr					}
48963523Sdarrenr				}
49092685Sdarrenr				isn->is_ifp[out] = fr->fr_ifas[i];
49192685Sdarrenr			}
49292685Sdarrenr
49363523Sdarrenr			/*
49463523Sdarrenr			 * send a copy back to userland of what we ended up
49563523Sdarrenr			 * to allow for verification.
49663523Sdarrenr			 */
49792685Sdarrenr			error = IWCOPYPTR((caddr_t)&ips, data, sizeof(ips));
49860854Sdarrenr			if (error) {
49960854Sdarrenr				KFREE(isn);
50060854Sdarrenr				KFREE(fr);
50160854Sdarrenr				return EFAULT;
50260854Sdarrenr			}
50360854Sdarrenr		} else {
50460854Sdarrenr			for (is = ips_list; is; is = is->is_next)
50560854Sdarrenr				if (is->is_rule == fr)
50660854Sdarrenr					break;
50760854Sdarrenr			if (!is) {
50860854Sdarrenr				KFREE(isn);
50960854Sdarrenr				return ESRCH;
51060854Sdarrenr			}
51160854Sdarrenr		}
51260854Sdarrenr	}
51360854Sdarrenr	fr_stinsert(isn);
51460854Sdarrenr	return 0;
51560854Sdarrenr}
51660854Sdarrenr
51760854Sdarrenr
51892685Sdarrenr/*
51992685Sdarrenr * Insert a state table entry manually.
52092685Sdarrenr */
52160854Sdarrenrvoid fr_stinsert(is)
52260854Sdarrenrregister ipstate_t *is;
52360854Sdarrenr{
52460854Sdarrenr	register u_int hv = is->is_hv;
52592685Sdarrenr	char *name;
52692685Sdarrenr	int i;
52760854Sdarrenr
52860854Sdarrenr	MUTEX_INIT(&is->is_lock, "ipf state entry", NULL);
52960854Sdarrenr
53092685Sdarrenr	/*
53192685Sdarrenr	 * Look up all the interface names in the state entry.
53292685Sdarrenr	 */
53392685Sdarrenr	for (i = 0; i < 4; i++) {
53492685Sdarrenr		name = is->is_ifname[i];
53592685Sdarrenr		if ((name[1] == '\0') &&
53692685Sdarrenr		    ((name[0] == '-') || (name[0] == '*'))) {
53792685Sdarrenr			is->is_ifp[0] = NULL;
53892685Sdarrenr		} else if (*name != '\0') {
53992685Sdarrenr			is->is_ifp[i] = GETUNIT(name, is->is_v);
54092685Sdarrenr			if (is->is_ifp[i] == NULL)
54192685Sdarrenr				is->is_ifp[i] = (void *)-1;
54292685Sdarrenr		}
54360854Sdarrenr	}
54460854Sdarrenr
54592685Sdarrenr
54660854Sdarrenr	/*
54760854Sdarrenr	 * add into list table.
54860854Sdarrenr	 */
54960854Sdarrenr	if (ips_list)
55060854Sdarrenr		ips_list->is_pnext = &is->is_next;
55160854Sdarrenr	is->is_pnext = &ips_list;
55260854Sdarrenr	is->is_next = ips_list;
55360854Sdarrenr	ips_list = is;
55460854Sdarrenr	if (ips_table[hv])
55560854Sdarrenr		ips_table[hv]->is_phnext = &is->is_hnext;
55660854Sdarrenr	else
55760854Sdarrenr		ips_stats.iss_inuse++;
55860854Sdarrenr	is->is_phnext = ips_table + hv;
55960854Sdarrenr	is->is_hnext = ips_table[hv];
56060854Sdarrenr	ips_table[hv] = is;
56164580Sdarrenr	ips_num++;
56260854Sdarrenr}
56360854Sdarrenr
56460854Sdarrenr
56553642Sguido/*
56653642Sguido * Create a new ipstate structure and hang it off the hash table.
56753642Sguido */
56892685Sdarrenripstate_t *fr_addstate(ip, fin, stsave, flags)
56953642Sguidoip_t *ip;
57053642Sguidofr_info_t *fin;
57192685Sdarrenripstate_t **stsave;
57253642Sguidou_int flags;
57353642Sguido{
57460854Sdarrenr	register tcphdr_t *tcp = NULL;
57553642Sguido	register ipstate_t *is;
57653642Sguido	register u_int hv;
57792685Sdarrenr	struct icmp *ic;
57853642Sguido	ipstate_t ips;
57953642Sguido	u_int pass;
58092685Sdarrenr	void *ifp;
58160854Sdarrenr	int out;
58253642Sguido
58380482Sdarrenr	if (fr_state_lock || (fin->fin_off != 0) || (fin->fin_fl & FI_SHORT))
58453642Sguido		return NULL;
58553642Sguido	if (ips_num == fr_statemax) {
58653642Sguido		ips_stats.iss_max++;
58753642Sguido		fr_state_doflush = 1;
58853642Sguido		return NULL;
58953642Sguido	}
59060854Sdarrenr	out = fin->fin_out;
59153642Sguido	is = &ips;
59253642Sguido	bzero((char *)is, sizeof(*is));
59353642Sguido	ips.is_age = 1;
59453642Sguido	/*
59553642Sguido	 * Copy and calculate...
59653642Sguido	 */
59760854Sdarrenr	hv = (is->is_p = fin->fin_fi.fi_p);
59860854Sdarrenr	is->is_src = fin->fin_fi.fi_src;
59960854Sdarrenr	hv += is->is_saddr;
60060854Sdarrenr	is->is_dst = fin->fin_fi.fi_dst;
60160854Sdarrenr	hv += is->is_daddr;
60260854Sdarrenr#ifdef	USE_INET6
60360854Sdarrenr	if (fin->fin_v == 6) {
60492685Sdarrenr		if ((is->is_p == IPPROTO_ICMPV6) &&
60592685Sdarrenr		    IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
60692685Sdarrenr			/*
60792685Sdarrenr			 * So you can do keep state with neighbour discovery.
60892685Sdarrenr			 */
60992685Sdarrenr			flags |= FI_W_DADDR;
61092685Sdarrenr			hv -= is->is_daddr;
61192685Sdarrenr		} else {
61292685Sdarrenr			hv += is->is_dst.i6[1];
61392685Sdarrenr			hv += is->is_dst.i6[2];
61492685Sdarrenr			hv += is->is_dst.i6[3];
61560854Sdarrenr		}
61692685Sdarrenr		hv += is->is_src.i6[1];
61792685Sdarrenr		hv += is->is_src.i6[2];
61892685Sdarrenr		hv += is->is_src.i6[3];
61960854Sdarrenr	}
62060854Sdarrenr#endif
62153642Sguido
62260854Sdarrenr	switch (is->is_p)
62353642Sguido	{
62460854Sdarrenr#ifdef	USE_INET6
62560854Sdarrenr	case IPPROTO_ICMPV6 :
62692685Sdarrenr		ic = (struct icmp *)fin->fin_dp;
62792685Sdarrenr		if ((ic->icmp_type & ICMP6_INFOMSG_MASK) == 0)
62892685Sdarrenr			return NULL;
62953642Sguido
63053642Sguido		switch (ic->icmp_type)
63153642Sguido		{
63260854Sdarrenr		case ICMP6_ECHO_REQUEST :
63392685Sdarrenr			is->is_icmp.ics_type = ic->icmp_type;
63453642Sguido			hv += (is->is_icmp.ics_id = ic->icmp_id);
63553642Sguido			hv += (is->is_icmp.ics_seq = ic->icmp_seq);
63653642Sguido			break;
63760854Sdarrenr		case ICMP6_MEMBERSHIP_QUERY :
63860854Sdarrenr		case ND_ROUTER_SOLICIT :
63960854Sdarrenr		case ND_NEIGHBOR_SOLICIT :
64092685Sdarrenr		case ICMP6_NI_QUERY :
64192685Sdarrenr			is->is_icmp.ics_type = ic->icmp_type;
64260854Sdarrenr			break;
64392685Sdarrenr		default :
64492685Sdarrenr			return NULL;
64592685Sdarrenr		}
64692685Sdarrenr		ATOMIC_INCL(ips_stats.iss_icmp);
64792685Sdarrenr		is->is_age = fr_icmptimeout;
64892685Sdarrenr		break;
64960854Sdarrenr#endif
65092685Sdarrenr	case IPPROTO_ICMP :
65192685Sdarrenr		ic = (struct icmp *)fin->fin_dp;
65292685Sdarrenr
65392685Sdarrenr		switch (ic->icmp_type)
65492685Sdarrenr		{
65560854Sdarrenr		case ICMP_ECHO :
65653642Sguido		case ICMP_TSTAMP :
65753642Sguido		case ICMP_IREQ :
65853642Sguido		case ICMP_MASKREQ :
65960854Sdarrenr			is->is_icmp.ics_type = ic->icmp_type;
66060854Sdarrenr			hv += (is->is_icmp.ics_id = ic->icmp_id);
66160854Sdarrenr			hv += (is->is_icmp.ics_seq = ic->icmp_seq);
66253642Sguido			break;
66353642Sguido		default :
66453642Sguido			return NULL;
66553642Sguido		}
66660854Sdarrenr		ATOMIC_INCL(ips_stats.iss_icmp);
66753642Sguido		is->is_age = fr_icmptimeout;
66853642Sguido		break;
66953642Sguido	case IPPROTO_TCP :
67060854Sdarrenr		tcp = (tcphdr_t *)fin->fin_dp;
67153642Sguido
67260854Sdarrenr		if (tcp->th_flags & TH_RST)
67360854Sdarrenr			return NULL;
67453642Sguido		/*
67553642Sguido		 * The endian of the ports doesn't matter, but the ack and
67653642Sguido		 * sequence numbers do as we do mathematics on them later.
67753642Sguido		 */
67892685Sdarrenr		is->is_sport = htons(fin->fin_data[0]);
67992685Sdarrenr		is->is_dport = htons(fin->fin_data[1]);
68053642Sguido		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
68192685Sdarrenr			hv += is->is_sport;
68292685Sdarrenr			hv += is->is_dport;
68353642Sguido		}
68467614Sdarrenr		is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
68567614Sdarrenr			      (tcp->th_off << 2) +
68660854Sdarrenr			      ((tcp->th_flags & TH_SYN) ? 1 : 0) +
68760854Sdarrenr			      ((tcp->th_flags & TH_FIN) ? 1 : 0);
68860854Sdarrenr		is->is_maxsend = is->is_send;
68953642Sguido		is->is_dend = 0;
69060854Sdarrenr		is->is_maxdwin = 1;
69153642Sguido		is->is_maxswin = ntohs(tcp->th_win);
69253642Sguido		if (is->is_maxswin == 0)
69353642Sguido			is->is_maxswin = 1;
69453642Sguido		/*
69553642Sguido		 * If we're creating state for a starting connection, start the
69653642Sguido		 * timer on it as we'll never see an error if it fails to
69753642Sguido		 * connect.
69853642Sguido		 */
69960854Sdarrenr		ATOMIC_INCL(ips_stats.iss_tcp);
70053642Sguido		break;
70192685Sdarrenr
70253642Sguido	case IPPROTO_UDP :
70360854Sdarrenr		tcp = (tcphdr_t *)fin->fin_dp;
70453642Sguido
70592685Sdarrenr		is->is_sport = htons(fin->fin_data[0]);
70692685Sdarrenr		is->is_dport = htons(fin->fin_data[1]);
70753642Sguido		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
70892685Sdarrenr			hv += is->is_sport;
70992685Sdarrenr			hv += is->is_dport;
71053642Sguido		}
71160854Sdarrenr		ATOMIC_INCL(ips_stats.iss_udp);
71253642Sguido		is->is_age = fr_udptimeout;
71353642Sguido		break;
71453642Sguido	default :
71592685Sdarrenr		is->is_age = fr_udptimeout;
71692685Sdarrenr		break;
71753642Sguido	}
71853642Sguido
71953642Sguido	KMALLOC(is, ipstate_t *);
72053642Sguido	if (is == NULL) {
72160854Sdarrenr		ATOMIC_INCL(ips_stats.iss_nomem);
72253642Sguido		return NULL;
72353642Sguido	}
72453642Sguido	bcopy((char *)&ips, (char *)is, sizeof(*is));
72553642Sguido	hv %= fr_statesize;
72660854Sdarrenr	is->is_hv = hv;
72753642Sguido	is->is_rule = fin->fin_fr;
72853642Sguido	if (is->is_rule != NULL) {
72960854Sdarrenr		ATOMIC_INC32(is->is_rule->fr_ref);
73053642Sguido		pass = is->is_rule->fr_flags;
73192685Sdarrenr		is->is_frage[0] = is->is_rule->fr_age[0];
73292685Sdarrenr		is->is_frage[1] = is->is_rule->fr_age[1];
73392685Sdarrenr		if (is->is_frage[0] != 0)
73492685Sdarrenr			is->is_age = is->is_frage[0];
73592685Sdarrenr
73692685Sdarrenr		is->is_ifp[(out << 1) + 1] = is->is_rule->fr_ifas[1];
73792685Sdarrenr		is->is_ifp[(1 - out) << 1] = is->is_rule->fr_ifas[2];
73892685Sdarrenr		is->is_ifp[((1 - out) << 1) + 1] = is->is_rule->fr_ifas[3];
73992685Sdarrenr
74092685Sdarrenr		if (((ifp = is->is_rule->fr_ifas[1]) != NULL) &&
74192685Sdarrenr		    (ifp != (void *)-1))
74292685Sdarrenr			strncpy(is->is_ifname[(out << 1) + 1],
74392685Sdarrenr				IFNAME(ifp), IFNAMSIZ);
74492685Sdarrenr		if (((ifp = is->is_rule->fr_ifas[2]) != NULL) &&
74592685Sdarrenr		    (ifp != (void *)-1))
74692685Sdarrenr			strncpy(is->is_ifname[(1 - out) << 1],
74792685Sdarrenr				IFNAME(ifp), IFNAMSIZ);
74892685Sdarrenr		if (((ifp = is->is_rule->fr_ifas[3]) != NULL) &&
74992685Sdarrenr		    (ifp != (void *)-1))
75092685Sdarrenr			strncpy(is->is_ifname[((1 - out) << 1) + 1],
75192685Sdarrenr				IFNAME(ifp), IFNAMSIZ);
75253642Sguido	} else
75353642Sguido		pass = fr_flags;
75492685Sdarrenr
75592685Sdarrenr	is->is_ifp[out << 1] = fin->fin_ifp;
75692685Sdarrenr	strncpy(is->is_ifname[out << 1], IFNAME(fin->fin_ifp), IFNAMSIZ);
75792685Sdarrenr
75853642Sguido	WRITE_ENTER(&ipf_state);
75953642Sguido
76053642Sguido	is->is_pass = pass;
76192685Sdarrenr	if ((flags & FI_IGNOREPKT) == 0) {
76292685Sdarrenr		is->is_pkts = 1;
76392685Sdarrenr		is->is_bytes = fin->fin_dlen + fin->fin_hlen;
76492685Sdarrenr	}
76553642Sguido	/*
76653642Sguido	 * We want to check everything that is a property of this packet,
76753642Sguido	 * but we don't (automatically) care about it's fragment status as
76853642Sguido	 * this may change.
76953642Sguido	 */
77092685Sdarrenr	is->is_v = fin->fin_v;
77192685Sdarrenr	is->is_rulen = fin->fin_rule;
77253642Sguido	is->is_opt = fin->fin_fi.fi_optmsk;
77353642Sguido	is->is_optmsk = 0xffffffff;
77453642Sguido	is->is_sec = fin->fin_fi.fi_secmsk;
77553642Sguido	is->is_secmsk = 0xffff;
77653642Sguido	is->is_auth = fin->fin_fi.fi_auth;
77753642Sguido	is->is_authmsk = 0xffff;
77880482Sdarrenr	is->is_flags = fin->fin_fl & FI_CMP;
77953642Sguido	is->is_flags |= FI_CMP << 4;
78060854Sdarrenr	is->is_flags |= flags & (FI_WILDP|FI_WILDA);
78167614Sdarrenr	if (flags & (FI_WILDP|FI_WILDA))
78267614Sdarrenr		ips_wild++;
78392685Sdarrenr
78453642Sguido	if (pass & FR_LOGFIRST)
78553642Sguido		is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
78660854Sdarrenr	fr_stinsert(is);
78792685Sdarrenr	is->is_me = stsave;
78860854Sdarrenr	if (is->is_p == IPPROTO_TCP) {
78960854Sdarrenr		fr_tcp_age(&is->is_age, is->is_state, fin,
79064580Sdarrenr			   0); /* 0 = packet from the source */
79160854Sdarrenr	}
79253642Sguido#ifdef	IPFILTER_LOG
79353642Sguido	ipstate_log(is, ISL_NEW);
79453642Sguido#endif
79553642Sguido	RWLOCK_EXIT(&ipf_state);
79660854Sdarrenr	fin->fin_rev = IP6NEQ(is->is_dst, fin->fin_fi.fi_dst);
79775262Sdarrenr	if ((fin->fin_fi.fi_fl & FI_FRAG) && (pass & FR_KEEPFRAG))
79853642Sguido		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
79953642Sguido	return is;
80053642Sguido}
80153642Sguido
80253642Sguido
80353642Sguido
80453642Sguido/*
80553642Sguido * check to see if a packet with TCP headers fits within the TCP window.
80653642Sguido * change timeout depending on whether new packet is a SYN-ACK returning for a
80753642Sguido * SYN or a RST or FIN which indicate time to close up shop.
80853642Sguido */
80953642Sguidoint fr_tcpstate(is, fin, ip, tcp)
81053642Sguidoregister ipstate_t *is;
81153642Sguidofr_info_t *fin;
81253642Sguidoip_t *ip;
81353642Sguidotcphdr_t *tcp;
81453642Sguido{
81553642Sguido	register tcp_seq seq, ack, end;
81653642Sguido	register int ackskew;
81753642Sguido	tcpdata_t  *fdata, *tdata;
81853642Sguido	u_short	win, maxwin;
81953642Sguido	int ret = 0;
82053642Sguido	int source;
82153642Sguido
82253642Sguido	/*
82353642Sguido	 * Find difference between last checked packet and this packet.
82453642Sguido	 */
82560854Sdarrenr	source = IP6EQ(fin->fin_fi.fi_src, is->is_src);
82680482Sdarrenr	if (source && (ntohs(is->is_sport) != fin->fin_data[0]))
82780482Sdarrenr		source = 0;
82853642Sguido	fdata = &is->is_tcp.ts_data[!source];
82953642Sguido	tdata = &is->is_tcp.ts_data[source];
83053642Sguido	seq = ntohl(tcp->th_seq);
83153642Sguido	ack = ntohl(tcp->th_ack);
83253642Sguido	win = ntohs(tcp->th_win);
83360854Sdarrenr	end = seq + fin->fin_dlen - (tcp->th_off << 2) +
83453642Sguido	       ((tcp->th_flags & TH_SYN) ? 1 : 0) +
83557096Sguido	       ((tcp->th_flags & TH_FIN) ? 1 : 0);
83653642Sguido
83767614Sdarrenr	MUTEX_ENTER(&is->is_lock);
83853642Sguido	if (fdata->td_end == 0) {
83953642Sguido		/*
84053642Sguido		 * Must be a (outgoing) SYN-ACK in reply to a SYN.
84153642Sguido		 */
84253642Sguido		fdata->td_end = end;
84353642Sguido		fdata->td_maxwin = 1;
84453642Sguido		fdata->td_maxend = end + 1;
84553642Sguido	}
84653642Sguido
84753642Sguido	if (!(tcp->th_flags & TH_ACK)) {  /* Pretend an ack was sent */
84853642Sguido		ack = tdata->td_end;
84953642Sguido	} else if (((tcp->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
85053642Sguido		   (ack == 0)) {
85153642Sguido		/* gross hack to get around certain broken tcp stacks */
85253642Sguido		ack = tdata->td_end;
85353642Sguido	}
85453642Sguido
85553642Sguido	if (seq == end)
85653642Sguido		seq = end = fdata->td_end;
85753642Sguido
85853642Sguido	maxwin = tdata->td_maxwin;
85953642Sguido	ackskew = tdata->td_end - ack;
86053642Sguido
86153642Sguido#define	SEQ_GE(a,b)	((int)((a) - (b)) >= 0)
86253642Sguido#define	SEQ_GT(a,b)	((int)((a) - (b)) > 0)
86353642Sguido	if ((SEQ_GE(fdata->td_maxend, end)) &&
86457096Sguido	    (SEQ_GE(seq, fdata->td_end - maxwin)) &&
86553642Sguido/* XXX what about big packets */
86653642Sguido#define MAXACKWINDOW 66000
86753642Sguido	    (ackskew >= -MAXACKWINDOW) &&
86853642Sguido	    (ackskew <= MAXACKWINDOW)) {
86953642Sguido		/* if ackskew < 0 then this should be due to fragented
87053642Sguido		 * packets. There is no way to know the length of the
87153642Sguido		 * total packet in advance.
87253642Sguido		 * We do know the total length from the fragment cache though.
87353642Sguido		 * Note however that there might be more sessions with
87453642Sguido		 * exactly the same source and destination paramters in the
87553642Sguido		 * state cache (and source and destination is the only stuff
87653642Sguido		 * that is saved in the fragment cache). Note further that
87753642Sguido		 * some TCP connections in the state cache are hashed with
87853642Sguido		 * sport and dport as well which makes it not worthwhile to
87953642Sguido		 * look for them.
88053642Sguido		 * Thus, when ackskew is negative but still seems to belong
88153642Sguido		 * to this session, we bump up the destinations end value.
88253642Sguido		 */
88353642Sguido		if (ackskew < 0)
88453642Sguido			tdata->td_end = ack;
88553642Sguido
88653642Sguido		/* update max window seen */
88753642Sguido		if (fdata->td_maxwin < win)
88853642Sguido			fdata->td_maxwin = win;
88953642Sguido		if (SEQ_GT(end, fdata->td_end))
89053642Sguido			fdata->td_end = end;
89153642Sguido		if (SEQ_GE(ack + win, tdata->td_maxend)) {
89253642Sguido			tdata->td_maxend = ack + win;
89353642Sguido			if (win == 0)
89453642Sguido				tdata->td_maxend++;
89553642Sguido		}
89653642Sguido
89760854Sdarrenr		ATOMIC_INCL(ips_stats.iss_hits);
89853642Sguido		/*
89953642Sguido		 * Nearing end of connection, start timeout.
90053642Sguido		 */
90164580Sdarrenr		/* source ? 0 : 1 -> !source */
90264580Sdarrenr		fr_tcp_age(&is->is_age, is->is_state, fin, !source);
90353642Sguido		ret = 1;
90453642Sguido	}
90567614Sdarrenr	MUTEX_EXIT(&is->is_lock);
90653642Sguido	return ret;
90753642Sguido}
90853642Sguido
90953642Sguido
91092685Sdarrenr/*
91192685Sdarrenr * Match a state table entry against an IP packet.
91292685Sdarrenr */
91353642Sguidostatic int fr_matchsrcdst(is, src, dst, fin, tcp)
91453642Sguidoipstate_t *is;
91560854Sdarrenrunion i6addr src, dst;
91653642Sguidofr_info_t *fin;
91753642Sguidotcphdr_t *tcp;
91853642Sguido{
91992685Sdarrenr	int ret = 0, rev, out, flags, idx;
92053642Sguido	u_short sp, dp;
92153642Sguido	void *ifp;
92253642Sguido
92380482Sdarrenr	rev = IP6NEQ(is->is_dst, dst);
92453642Sguido	ifp = fin->fin_ifp;
92553642Sguido	out = fin->fin_out;
92692685Sdarrenr	flags = is->is_flags & (FI_WILDA|FI_WILDP);
92792685Sdarrenr	sp = 0;
92892685Sdarrenr	dp = 0;
92953642Sguido
93053642Sguido	if (tcp != NULL) {
93153642Sguido		flags = is->is_flags;
93253642Sguido		sp = tcp->th_sport;
93353642Sguido		dp = tcp->th_dport;
93480482Sdarrenr		if (!rev) {
93580482Sdarrenr			if (!(flags & FI_W_SPORT) && (sp != is->is_sport))
93680482Sdarrenr				rev = 1;
93780482Sdarrenr			else if (!(flags & FI_W_DPORT) && (dp != is->is_dport))
93880482Sdarrenr				rev = 1;
93980482Sdarrenr		}
94053642Sguido	}
94153642Sguido
94292685Sdarrenr	idx = (out << 1) + rev;
94392685Sdarrenr
94492685Sdarrenr	if ((is->is_ifp[idx] == NULL &&
94592685Sdarrenr	     (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) ||
94692685Sdarrenr	    is->is_ifp[idx] == ifp)
94792685Sdarrenr		ret = 1;
94892685Sdarrenr
94953642Sguido	if (ret == 0)
95053642Sguido		return 0;
95153642Sguido	ret = 0;
95253642Sguido
95353642Sguido	if (rev == 0) {
95492685Sdarrenr		if ((IP6EQ(is->is_dst, dst) || (flags & FI_W_DADDR)) &&
95560854Sdarrenr		    (IP6EQ(is->is_src, src) || (flags & FI_W_SADDR)) &&
95653642Sguido		    (!tcp || ((sp == is->is_sport || flags & FI_W_SPORT) &&
95753642Sguido		     (dp == is->is_dport || flags & FI_W_DPORT)))) {
95853642Sguido			ret = 1;
95953642Sguido		}
96053642Sguido	} else {
96192685Sdarrenr		if ((IP6EQ(is->is_dst, src) || (flags & FI_W_DADDR)) &&
96260854Sdarrenr		    (IP6EQ(is->is_src, dst) || (flags & FI_W_SADDR)) &&
96353642Sguido		    (!tcp || ((sp == is->is_dport || flags & FI_W_DPORT) &&
96453642Sguido		     (dp == is->is_sport || flags & FI_W_SPORT)))) {
96553642Sguido			ret = 1;
96653642Sguido		}
96753642Sguido	}
96853642Sguido	if (ret == 0)
96953642Sguido		return 0;
97053642Sguido
97153642Sguido	/*
97253642Sguido	 * Whether or not this should be here, is questionable, but the aim
97353642Sguido	 * is to get this out of the main line.
97453642Sguido	 */
97553642Sguido	if (tcp == NULL)
97653642Sguido		flags = is->is_flags & (FI_CMP|(FI_CMP<<4));
97753642Sguido
97880482Sdarrenr	if (((fin->fin_fl & (flags >> 4)) != (flags & FI_CMP)) ||
97980482Sdarrenr	    (fin->fin_fi.fi_optmsk != is->is_opt) ||
98080482Sdarrenr	    (fin->fin_fi.fi_secmsk != is->is_sec) ||
98180482Sdarrenr	    (fin->fin_fi.fi_auth != is->is_auth))
98253642Sguido		return 0;
98353642Sguido
98492685Sdarrenr	flags = is->is_flags & (FI_WILDA|FI_WILDP);
98592685Sdarrenr	if ((flags & (FI_W_SADDR|FI_W_DADDR))) {
98692685Sdarrenr		if ((flags & FI_W_SADDR) != 0) {
98792685Sdarrenr			if (rev == 0) {
98892685Sdarrenr				is->is_src = fin->fin_fi.fi_src;
98992685Sdarrenr			} else {
99092685Sdarrenr				is->is_src = fin->fin_fi.fi_dst;
99192685Sdarrenr			}
99292685Sdarrenr		} else if ((flags & FI_W_DPORT) != 0) {
99392685Sdarrenr			if (rev == 0) {
99492685Sdarrenr				is->is_dst = fin->fin_fi.fi_dst;
99592685Sdarrenr			} else {
99692685Sdarrenr				is->is_dst = fin->fin_fi.fi_src;
99792685Sdarrenr			}
99892685Sdarrenr		}
99992685Sdarrenr		is->is_flags &= ~(FI_W_SADDR|FI_W_DADDR);
100092685Sdarrenr		if ((is->is_flags & (FI_WILDA|FI_WILDP)) == 0)
100192685Sdarrenr			ips_wild--;
100292685Sdarrenr	}
100392685Sdarrenr
100453642Sguido	if ((flags & (FI_W_SPORT|FI_W_DPORT))) {
100553642Sguido		if ((flags & FI_W_SPORT) != 0) {
100653642Sguido			if (rev == 0) {
100753642Sguido				is->is_sport = sp;
100853642Sguido				is->is_send = htonl(tcp->th_seq);
100953642Sguido			} else {
101053642Sguido				is->is_sport = dp;
101153642Sguido				is->is_send = htonl(tcp->th_ack);
101253642Sguido			}
101353642Sguido			is->is_maxsend = is->is_send + 1;
101453642Sguido		} else if ((flags & FI_W_DPORT) != 0) {
101553642Sguido			if (rev == 0) {
101653642Sguido				is->is_dport = dp;
101753642Sguido				is->is_dend = htonl(tcp->th_ack);
101853642Sguido			} else {
101953642Sguido				is->is_dport = sp;
102053642Sguido				is->is_dend = htonl(tcp->th_seq);
102153642Sguido			}
102253642Sguido			is->is_maxdend = is->is_dend + 1;
102353642Sguido		}
102453642Sguido		is->is_flags &= ~(FI_W_SPORT|FI_W_DPORT);
102567614Sdarrenr		ips_wild--;
102653642Sguido	}
102753642Sguido
102860854Sdarrenr	ret = -1;
102960854Sdarrenr
103092685Sdarrenr	if (is->is_ifp[idx] == NULL &&
103192685Sdarrenr	    (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*'))
103292685Sdarrenr		ret = idx;
103360854Sdarrenr
103460854Sdarrenr	if (ret >= 0) {
103560854Sdarrenr		is->is_ifp[ret] = ifp;
103692685Sdarrenr		strncpy(is->is_ifname[ret], IFNAME(ifp),
103780482Sdarrenr			sizeof(is->is_ifname[ret]));
103860854Sdarrenr	}
103980482Sdarrenr	fin->fin_rev = rev;
104053642Sguido	return 1;
104153642Sguido}
104253642Sguido
104360854Sdarrenrstatic int fr_matchicmpqueryreply(v, is, icmp)
104460854Sdarrenrint v;
104560854Sdarrenripstate_t *is;
104660854Sdarrenricmphdr_t *icmp;
104760854Sdarrenr{
104860854Sdarrenr	if (v == 4) {
104960854Sdarrenr		/*
105060854Sdarrenr		 * If we matched its type on the way in, then when going out
105160854Sdarrenr		 * it will still be the same type.
105260854Sdarrenr		 */
105360854Sdarrenr		if (((icmp->icmp_type == is->is_type) ||
105492685Sdarrenr		     (icmpreplytype4[is->is_type] == icmp->icmp_type))) {
105592685Sdarrenr			if (icmp->icmp_type != ICMP_ECHOREPLY)
105692685Sdarrenr				return 1;
105792685Sdarrenr			if ((icmp->icmp_id == is->is_icmp.ics_id) &&
105892685Sdarrenr			    (icmp->icmp_seq == is->is_icmp.ics_seq))
105992685Sdarrenr				return 1;
106092685Sdarrenr		}
106160854Sdarrenr	}
106260854Sdarrenr#ifdef	USE_INET6
106360854Sdarrenr	else if (is->is_v == 6) {
106492685Sdarrenr		if (((icmp->icmp_type == is->is_type) ||
106592685Sdarrenr		     (icmpreplytype6[is->is_type] == icmp->icmp_type))) {
106692685Sdarrenr			if (icmp->icmp_type != ICMP6_ECHO_REPLY)
106792685Sdarrenr				return 1;
106892685Sdarrenr			if ((icmp->icmp_id == is->is_icmp.ics_id) &&
106992685Sdarrenr			    (icmp->icmp_seq == is->is_icmp.ics_seq))
107092685Sdarrenr				return 1;
107192685Sdarrenr		}
107260854Sdarrenr	}
107360854Sdarrenr#endif
107460854Sdarrenr	return 0;
107560854Sdarrenr}
107660854Sdarrenr
107760854Sdarrenrstatic frentry_t *fr_checkicmpmatchingstate(ip, fin)
107853642Sguidoip_t *ip;
107953642Sguidofr_info_t *fin;
108053642Sguido{
108153642Sguido	register ipstate_t *is, **isp;
108253642Sguido	register u_short sport, dport;
108353642Sguido	register u_char	pr;
108460854Sdarrenr	union i6addr dst, src;
108553642Sguido	struct icmp *ic;
108655929Sguido	u_short savelen;
108764580Sdarrenr	icmphdr_t *icmp;
108853642Sguido	fr_info_t ofin;
108964580Sdarrenr	int type, len;
109053642Sguido	tcphdr_t *tcp;
109153642Sguido	frentry_t *fr;
109253642Sguido	ip_t *oip;
109355929Sguido	u_int hv;
109453642Sguido
109557096Sguido	/*
109657096Sguido	 * Does it at least have the return (basic) IP header ?
109753642Sguido	 * Only a basic IP header (no options) should be with
109853642Sguido	 * an ICMP error header.
109953642Sguido	 */
110067614Sdarrenr	if (((ip->ip_v != 4) || (ip->ip_hl != 5)) ||
110160854Sdarrenr	    (fin->fin_plen < ICMPERR_MINPKTLEN))
110253642Sguido		return NULL;
110392685Sdarrenr
110460854Sdarrenr	ic = (struct icmp *)fin->fin_dp;
110553642Sguido	type = ic->icmp_type;
110653642Sguido	/*
110753642Sguido	 * If it's not an error type, then return
110853642Sguido	 */
110953642Sguido	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
111053642Sguido    	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
111153642Sguido    	    (type != ICMP_PARAMPROB))
111253642Sguido		return NULL;
111353642Sguido
111460854Sdarrenr	oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
111560854Sdarrenr	if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((oip->ip_hl - 5) << 2))
111653642Sguido		return NULL;
111755929Sguido
111864580Sdarrenr	/*
111964580Sdarrenr	 * Sanity checks.
112064580Sdarrenr	 */
112164580Sdarrenr	len = fin->fin_dlen - ICMPERR_ICMPHLEN;
112264580Sdarrenr	if ((len <= 0) || ((oip->ip_hl << 2) > len))
112364580Sdarrenr		return NULL;
112464580Sdarrenr
112564580Sdarrenr	/*
112664580Sdarrenr	 * Is the buffer big enough for all of it ?  It's the size of the IP
112764580Sdarrenr	 * header claimed in the encapsulated part which is of concern.  It
112864580Sdarrenr	 * may be too big to be in this buffer but not so big that it's
112964580Sdarrenr	 * outside the ICMP packet, leading to TCP deref's causing problems.
113064580Sdarrenr	 * This is possible because we don't know how big oip_hl is when we
113164580Sdarrenr	 * do the pullup early in fr_check() and thus can't gaurantee it is
113264580Sdarrenr	 * all here now.
113364580Sdarrenr	 */
113464580Sdarrenr#ifdef  _KERNEL
113564580Sdarrenr	{
113664580Sdarrenr	mb_t *m;
113764580Sdarrenr
113864580Sdarrenr# if SOLARIS
113964580Sdarrenr	m = fin->fin_qfm;
114064580Sdarrenr	if ((char *)oip + len > (char *)m->b_wptr)
114164580Sdarrenr		return NULL;
114264580Sdarrenr# else
114364580Sdarrenr	m = *(mb_t **)fin->fin_mp;
114464580Sdarrenr	if ((char *)oip + len > (char *)ip + m->m_len)
114564580Sdarrenr		return NULL;
114664580Sdarrenr# endif
114764580Sdarrenr	}
114864580Sdarrenr#endif
114964580Sdarrenr
115064580Sdarrenr	/*
115164580Sdarrenr	 * in the IPv4 case we must zero the i6addr union otherwise
115264580Sdarrenr	 * the IP6EQ and IP6NEQ macros produce the wrong results because
115364580Sdarrenr	 * of the 'junk' in the unused part of the union
115464580Sdarrenr	 */
115567614Sdarrenr	bzero((char *)&src, sizeof(src));
115667614Sdarrenr	bzero((char *)&dst, sizeof(dst));
115792685Sdarrenr	fr = NULL;
115864580Sdarrenr
115992685Sdarrenr	switch (oip->ip_p)
116092685Sdarrenr	{
116192685Sdarrenr	case IPPROTO_ICMP :
116255929Sguido		icmp = (icmphdr_t *)((char *)oip + (oip->ip_hl << 2));
116355929Sguido
116455929Sguido		/*
116555929Sguido		 * a ICMP error can only be generated as a result of an
116655929Sguido		 * ICMP query, not as the response on an ICMP error
116755929Sguido		 *
116855929Sguido		 * XXX theoretically ICMP_ECHOREP and the other reply's are
116955929Sguido		 * ICMP query's as well, but adding them here seems strange XXX
117055929Sguido		 */
117155929Sguido		 if ((icmp->icmp_type != ICMP_ECHO) &&
117255929Sguido		     (icmp->icmp_type != ICMP_TSTAMP) &&
117355929Sguido		     (icmp->icmp_type != ICMP_IREQ) &&
117457096Sguido		     (icmp->icmp_type != ICMP_MASKREQ))
117555929Sguido		    	return NULL;
117655929Sguido
117757096Sguido		/*
117855929Sguido		 * perform a lookup of the ICMP packet in the state table
117955929Sguido		 */
118055929Sguido		hv = (pr = oip->ip_p);
118160854Sdarrenr		src.in4 = oip->ip_src;
118260854Sdarrenr		hv += src.in4.s_addr;
118360854Sdarrenr		dst.in4 = oip->ip_dst;
118460854Sdarrenr		hv += dst.in4.s_addr;
118560854Sdarrenr		hv += icmp->icmp_id;
118660854Sdarrenr		hv += icmp->icmp_seq;
118755929Sguido		hv %= fr_statesize;
118855929Sguido
118964580Sdarrenr		savelen = oip->ip_len;
119064580Sdarrenr		oip->ip_len = len;
119164580Sdarrenr		ofin.fin_v = 4;
119255929Sguido		fr_makefrip(oip->ip_hl << 2, oip, &ofin);
119364580Sdarrenr		oip->ip_len = savelen;
119455929Sguido		ofin.fin_ifp = fin->fin_ifp;
119555929Sguido		ofin.fin_out = !fin->fin_out;
119655929Sguido		ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
119755929Sguido
119855929Sguido		READ_ENTER(&ipf_state);
119960854Sdarrenr		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext)
120060854Sdarrenr			if ((is->is_p == pr) && (is->is_v == 4) &&
120160854Sdarrenr			    fr_matchsrcdst(is, src, dst, &ofin, NULL) &&
120260854Sdarrenr			    fr_matchicmpqueryreply(is->is_v, is, icmp)) {
120360854Sdarrenr				ips_stats.iss_hits++;
120460854Sdarrenr				is->is_pkts++;
120560854Sdarrenr				is->is_bytes += ip->ip_len;
120660854Sdarrenr				fr = is->is_rule;
120792685Sdarrenr				break;
120855929Sguido			}
120955929Sguido		RWLOCK_EXIT(&ipf_state);
121092685Sdarrenr		return fr;
121192685Sdarrenr
121292685Sdarrenr	case IPPROTO_TCP :
121392685Sdarrenr	case IPPROTO_UDP :
121492685Sdarrenr		break;
121592685Sdarrenr	default :
121655929Sguido		return NULL;
121792685Sdarrenr	}
121855929Sguido
121953642Sguido	tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
122053642Sguido	dport = tcp->th_dport;
122153642Sguido	sport = tcp->th_sport;
122253642Sguido
122353642Sguido	hv = (pr = oip->ip_p);
122460854Sdarrenr	src.in4 = oip->ip_src;
122560854Sdarrenr	hv += src.in4.s_addr;
122660854Sdarrenr	dst.in4 = oip->ip_dst;
122760854Sdarrenr	hv += dst.in4.s_addr;
122853642Sguido	hv += dport;
122953642Sguido	hv += sport;
123053642Sguido	hv %= fr_statesize;
123153642Sguido	/*
123253642Sguido	 * we make an fin entry to be able to feed it to
123353642Sguido	 * matchsrcdst note that not all fields are encessary
123453642Sguido	 * but this is the cleanest way. Note further we fill
123553642Sguido	 * in fin_mp such that if someone uses it we'll get
123653642Sguido	 * a kernel panic. fr_matchsrcdst does not use this.
123753642Sguido	 *
123853642Sguido	 * watch out here, as ip is in host order and oip in network
123953642Sguido	 * order. Any change we make must be undone afterwards.
124053642Sguido	 */
124155929Sguido	savelen = oip->ip_len;
124264580Sdarrenr	oip->ip_len = len;
124364580Sdarrenr	ofin.fin_v = 4;
124453642Sguido	fr_makefrip(oip->ip_hl << 2, oip, &ofin);
124555929Sguido	oip->ip_len = savelen;
124653642Sguido	ofin.fin_ifp = fin->fin_ifp;
124753642Sguido	ofin.fin_out = !fin->fin_out;
124853642Sguido	ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
124953642Sguido	READ_ENTER(&ipf_state);
125060854Sdarrenr	for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
125153642Sguido		/*
125253642Sguido		 * Only allow this icmp though if the
125353642Sguido		 * encapsulated packet was allowed through the
125453642Sguido		 * other way around. Note that the minimal amount
125553642Sguido		 * of info present does not allow for checking against
125653642Sguido		 * tcp internals such as seq and ack numbers.
125753642Sguido		 */
125860854Sdarrenr		if ((is->is_p == pr) && (is->is_v == 4) &&
125953642Sguido		    fr_matchsrcdst(is, src, dst, &ofin, tcp)) {
126053642Sguido			fr = is->is_rule;
126153642Sguido			ips_stats.iss_hits++;
126253642Sguido			is->is_pkts++;
126360854Sdarrenr			is->is_bytes += fin->fin_plen;
126453642Sguido			/*
126553642Sguido			 * we deliberately do not touch the timeouts
126653642Sguido			 * for the accompanying state table entry.
126753642Sguido			 * It remains to be seen if that is correct. XXX
126853642Sguido			 */
126992685Sdarrenr			break;
127053642Sguido		}
127153642Sguido	}
127253642Sguido	RWLOCK_EXIT(&ipf_state);
127392685Sdarrenr	return fr;
127453642Sguido}
127553642Sguido
127667614Sdarrenr
127792685Sdarrenr/*
127892685Sdarrenr * Move a state hash table entry from its old location at is->is_hv to
127992685Sdarrenr * its new location, indexed by hv % fr_statesize.
128092685Sdarrenr */
128167614Sdarrenrstatic void fr_ipsmove(isp, is, hv)
128267614Sdarrenripstate_t **isp, *is;
128367614Sdarrenru_int hv;
128467614Sdarrenr{
128567614Sdarrenr	u_int hvm;
128667614Sdarrenr
128767614Sdarrenr	hvm = is->is_hv;
128867614Sdarrenr	/*
128967614Sdarrenr	 * Remove the hash from the old location...
129067614Sdarrenr	 */
129167614Sdarrenr	if (is->is_hnext)
129267614Sdarrenr		is->is_hnext->is_phnext = isp;
129367614Sdarrenr	*isp = is->is_hnext;
129467614Sdarrenr	if (ips_table[hvm] == NULL)
129567614Sdarrenr		ips_stats.iss_inuse--;
129667614Sdarrenr
129767614Sdarrenr	/*
129867614Sdarrenr	 * ...and put the hash in the new one.
129967614Sdarrenr	 */
130067614Sdarrenr	hvm = hv % fr_statesize;
130167853Sdarrenr	is->is_hv = hvm;
130267614Sdarrenr	isp = &ips_table[hvm];
130367614Sdarrenr	if (*isp)
130467614Sdarrenr		(*isp)->is_phnext = &is->is_hnext;
130567614Sdarrenr	else
130667614Sdarrenr		ips_stats.iss_inuse++;
130767614Sdarrenr	is->is_phnext = isp;
130867614Sdarrenr	is->is_hnext = *isp;
130967614Sdarrenr	*isp = is;
131067614Sdarrenr}
131167614Sdarrenr
131267614Sdarrenr
131353642Sguido/*
131453642Sguido * Check if a packet has a registered state.
131553642Sguido */
131653642Sguidofrentry_t *fr_checkstate(ip, fin)
131753642Sguidoip_t *ip;
131853642Sguidofr_info_t *fin;
131953642Sguido{
132060854Sdarrenr	union i6addr dst, src;
132153642Sguido	register ipstate_t *is, **isp;
132253642Sguido	register u_char pr;
132360854Sdarrenr	u_int hv, hvm, hlen, tryagain, pass, v;
132453642Sguido	struct icmp *ic;
132553642Sguido	frentry_t *fr;
132653642Sguido	tcphdr_t *tcp;
132792685Sdarrenr	int rev;
132853642Sguido
132980482Sdarrenr	if (fr_state_lock || (fin->fin_off != 0) || (fin->fin_fl & FI_SHORT))
133053642Sguido		return NULL;
133153642Sguido
133253642Sguido	is = NULL;
133353642Sguido	hlen = fin->fin_hlen;
133453642Sguido	tcp = (tcphdr_t *)((char *)ip + hlen);
133553642Sguido	ic = (struct icmp *)tcp;
133660854Sdarrenr	hv = (pr = fin->fin_fi.fi_p);
133760854Sdarrenr	src = fin->fin_fi.fi_src;
133860854Sdarrenr	dst = fin->fin_fi.fi_dst;
133960854Sdarrenr	hv += src.in4.s_addr;
134060854Sdarrenr	hv += dst.in4.s_addr;
134153642Sguido
134253642Sguido	/*
134353642Sguido	 * Search the hash table for matching packet header info.
134492685Sdarrenr	 * At the bottom of this switch statement, the following is expected:
134592685Sdarrenr	 * is == NULL, no lock on ipf_state is held.
134692685Sdarrenr	 * is != NULL, a lock on ipf_state is held.
134753642Sguido	 */
134860854Sdarrenr	v = fin->fin_fi.fi_v;
134992685Sdarrenr#ifdef	USE_INET6
135092685Sdarrenr	if (v == 6) {
135192685Sdarrenr		hv += fin->fin_fi.fi_src.i6[1];
135292685Sdarrenr		hv += fin->fin_fi.fi_src.i6[2];
135392685Sdarrenr		hv += fin->fin_fi.fi_src.i6[3];
135492685Sdarrenr
135592685Sdarrenr		if ((fin->fin_p == IPPROTO_ICMPV6) &&
135692685Sdarrenr		    IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
135792685Sdarrenr			hv -= dst.in4.s_addr;
135892685Sdarrenr		} else {
135992685Sdarrenr			hv += fin->fin_fi.fi_dst.i6[1];
136092685Sdarrenr			hv += fin->fin_fi.fi_dst.i6[2];
136192685Sdarrenr			hv += fin->fin_fi.fi_dst.i6[3];
136292685Sdarrenr		}
136392685Sdarrenr	}
136492685Sdarrenr#endif
136592685Sdarrenr
136692685Sdarrenr	switch (fin->fin_p)
136753642Sguido	{
136860854Sdarrenr#ifdef	USE_INET6
136960854Sdarrenr	case IPPROTO_ICMPV6 :
137092685Sdarrenr		tcp = NULL;
137192685Sdarrenr		tryagain = 0;
137260854Sdarrenr		if (v == 6) {
137360854Sdarrenr			if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
137460854Sdarrenr			    (ic->icmp_type == ICMP6_ECHO_REPLY)) {
137560854Sdarrenr				hv += ic->icmp_id;
137660854Sdarrenr				hv += ic->icmp_seq;
137760854Sdarrenr			}
137860854Sdarrenr		}
137992685Sdarrenr		READ_ENTER(&ipf_state);
138092685Sdarrenricmp6again:
138192685Sdarrenr		hvm = hv % fr_statesize;
138292685Sdarrenr		for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext)
138392685Sdarrenr			if ((is->is_p == pr) && (is->is_v == v) &&
138492685Sdarrenr			    fr_matchsrcdst(is, src, dst, fin, NULL) &&
138592685Sdarrenr			    fr_matchicmpqueryreply(v, is, ic)) {
138692685Sdarrenr				rev = fin->fin_rev;
138792685Sdarrenr				if (is->is_frage[rev] != 0)
138892685Sdarrenr					is->is_age = is->is_frage[rev];
138992685Sdarrenr				else if (fin->fin_rev)
139092685Sdarrenr					is->is_age = fr_icmpacktimeout;
139192685Sdarrenr				else
139292685Sdarrenr					is->is_age = fr_icmptimeout;
139392685Sdarrenr				break;
139492685Sdarrenr			}
139592685Sdarrenr
139692685Sdarrenr		if (is != NULL) {
139792685Sdarrenr			if (tryagain && !(is->is_flags & FI_W_DADDR)) {
139892685Sdarrenr				hv += fin->fin_fi.fi_src.i6[0];
139992685Sdarrenr				hv += fin->fin_fi.fi_src.i6[1];
140092685Sdarrenr				hv += fin->fin_fi.fi_src.i6[2];
140192685Sdarrenr				hv += fin->fin_fi.fi_src.i6[3];
140292685Sdarrenr				fr_ipsmove(isp, is, hv);
140392685Sdarrenr				MUTEX_DOWNGRADE(&ipf_state);
140492685Sdarrenr			}
140592685Sdarrenr			break;
140692685Sdarrenr		}
140792685Sdarrenr		RWLOCK_EXIT(&ipf_state);
140892685Sdarrenr
140992685Sdarrenr		/*
141092685Sdarrenr		 * No matching icmp state entry. Perhaps this is a
141192685Sdarrenr		 * response to another state entry.
141292685Sdarrenr		 */
141392685Sdarrenr		if ((ips_wild != 0) && (v == 6) && (tryagain == 0) &&
141492685Sdarrenr		    !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) {
141592685Sdarrenr			hv -= fin->fin_fi.fi_src.i6[0];
141692685Sdarrenr			hv -= fin->fin_fi.fi_src.i6[1];
141792685Sdarrenr			hv -= fin->fin_fi.fi_src.i6[2];
141892685Sdarrenr			hv -= fin->fin_fi.fi_src.i6[3];
141992685Sdarrenr			tryagain = 1;
142092685Sdarrenr			WRITE_ENTER(&ipf_state);
142192685Sdarrenr			goto icmp6again;
142292685Sdarrenr		}
142392685Sdarrenr
142492685Sdarrenr		fr = fr_checkicmp6matchingstate((ip6_t *)ip, fin);
142592685Sdarrenr		if (fr)
142692685Sdarrenr			return fr;
142792685Sdarrenr		break;
142860854Sdarrenr#endif
142953642Sguido	case IPPROTO_ICMP :
143092685Sdarrenr		tcp = NULL;
143160854Sdarrenr		if (v == 4) {
143255929Sguido			hv += ic->icmp_id;
143355929Sguido			hv += ic->icmp_seq;
143455929Sguido		}
143592685Sdarrenr		hvm = hv % fr_statesize;
143653642Sguido		READ_ENTER(&ipf_state);
143792685Sdarrenr		for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext)
143860854Sdarrenr			if ((is->is_p == pr) && (is->is_v == v) &&
143960854Sdarrenr			    fr_matchsrcdst(is, src, dst, fin, NULL) &&
144060854Sdarrenr			    fr_matchicmpqueryreply(v, is, ic)) {
144192685Sdarrenr				rev = fin->fin_rev;
144292685Sdarrenr				if (is->is_frage[rev] != 0)
144392685Sdarrenr					is->is_age = is->is_frage[rev];
144492685Sdarrenr				else if (fin->fin_rev)
144580482Sdarrenr					is->is_age = fr_icmpacktimeout;
144680482Sdarrenr				else
144780482Sdarrenr					is->is_age = fr_icmptimeout;
144853642Sguido				break;
144953642Sguido			}
145092685Sdarrenr
145153642Sguido		if (is != NULL)
145253642Sguido			break;
145353642Sguido		RWLOCK_EXIT(&ipf_state);
145453642Sguido		/*
145553642Sguido		 * No matching icmp state entry. Perhaps this is a
145653642Sguido		 * response to another state entry.
145753642Sguido		 */
145892685Sdarrenr		fr = fr_checkicmpmatchingstate(ip, fin);
145953642Sguido		if (fr)
146053642Sguido			return fr;
146153642Sguido		break;
146253642Sguido	case IPPROTO_TCP :
146364580Sdarrenr		/*
146464580Sdarrenr		 * Just plain ignore RST flag set with either FIN or SYN.
146564580Sdarrenr		 */
146692685Sdarrenr		if ((tcp->th_flags & TH_RST) &&
146792685Sdarrenr		    ((tcp->th_flags & (TH_FIN|TH_SYN|TH_RST)) != TH_RST))
146864580Sdarrenr			break;
146967614Sdarrenr	case IPPROTO_UDP :
147092685Sdarrenr	    {
147192685Sdarrenr		register u_short dport, sport;
147292685Sdarrenr
147367614Sdarrenr		dport = tcp->th_dport;
147467614Sdarrenr		sport = tcp->th_sport;
147553642Sguido		tryagain = 0;
147653642Sguido		hv += dport;
147753642Sguido		hv += sport;
147867614Sdarrenr		READ_ENTER(&ipf_state);
147967614Sdarrenrretry_tcpudp:
148053642Sguido		hvm = hv % fr_statesize;
148167614Sdarrenr		for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext)
148260854Sdarrenr			if ((is->is_p == pr) && (is->is_v == v) &&
148353642Sguido			    fr_matchsrcdst(is, src, dst, fin, tcp)) {
148492685Sdarrenr				rev = fin->fin_rev;
148567614Sdarrenr				if ((pr == IPPROTO_TCP)) {
148667614Sdarrenr					if (!fr_tcpstate(is, fin, ip, tcp)) {
148767614Sdarrenr						continue;
148867614Sdarrenr					}
148980482Sdarrenr				} else if ((pr == IPPROTO_UDP)) {
149092685Sdarrenr					if (is->is_frage[rev] != 0)
149192685Sdarrenr						is->is_age = is->is_frage[rev];
149292685Sdarrenr					else if (fin->fin_rev)
149380482Sdarrenr						is->is_age = fr_udpacktimeout;
149480482Sdarrenr					else
149580482Sdarrenr						is->is_age = fr_udptimeout;
149667614Sdarrenr				}
149753642Sguido				break;
149853642Sguido			}
149967614Sdarrenr		if (is != NULL) {
150067614Sdarrenr			if (tryagain &&
150167614Sdarrenr			    !(is->is_flags & (FI_WILDP|FI_WILDA))) {
150267614Sdarrenr				hv += dport;
150367614Sdarrenr				hv += sport;
150467614Sdarrenr				fr_ipsmove(isp, is, hv);
150567614Sdarrenr				MUTEX_DOWNGRADE(&ipf_state);
150667614Sdarrenr			}
150753642Sguido			break;
150867614Sdarrenr		}
150953642Sguido		RWLOCK_EXIT(&ipf_state);
151067614Sdarrenr		if (!tryagain && ips_wild) {
151167614Sdarrenr			hv -= dport;
151267614Sdarrenr			hv -= sport;
151353642Sguido			tryagain = 1;
151467614Sdarrenr			WRITE_ENTER(&ipf_state);
151567614Sdarrenr			goto retry_tcpudp;
151653642Sguido		}
151753642Sguido		break;
151853642Sguido	    }
151953642Sguido	default :
152092685Sdarrenr		tcp = NULL;
152192685Sdarrenr		hv %= fr_statesize;
152292685Sdarrenr		READ_ENTER(&ipf_state);
152392685Sdarrenr		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
152492685Sdarrenr			if ((is->is_p == pr) && (is->is_v == v) &&
152592685Sdarrenr			    fr_matchsrcdst(is, src, dst, fin, NULL)) {
152692685Sdarrenr				rev = fin->fin_rev;
152792685Sdarrenr				if (is->is_frage[rev] != 0)
152892685Sdarrenr					is->is_age = is->is_frage[rev];
152992685Sdarrenr				else
153092685Sdarrenr					is->is_age = fr_udptimeout;
153192685Sdarrenr				break;
153292685Sdarrenr			}
153392685Sdarrenr		}
153492685Sdarrenr		if (is == NULL) {
153592685Sdarrenr			RWLOCK_EXIT(&ipf_state);
153692685Sdarrenr		}
153753642Sguido		break;
153853642Sguido	}
153992685Sdarrenr
154053642Sguido	if (is == NULL) {
154160854Sdarrenr		ATOMIC_INCL(ips_stats.iss_miss);
154253642Sguido		return NULL;
154353642Sguido	}
154492685Sdarrenr
154560854Sdarrenr	MUTEX_ENTER(&is->is_lock);
154660854Sdarrenr	is->is_bytes += fin->fin_plen;
154753642Sguido	ips_stats.iss_hits++;
154853642Sguido	is->is_pkts++;
154960854Sdarrenr	MUTEX_EXIT(&is->is_lock);
155053642Sguido	fr = is->is_rule;
155192685Sdarrenr	fin->fin_rule = is->is_rulen;
155292685Sdarrenr	if (fr != NULL) {
155392685Sdarrenr		fin->fin_group = fr->fr_group;
155492685Sdarrenr		fin->fin_icode = fr->fr_icode;
155592685Sdarrenr	}
155653642Sguido	fin->fin_fr = fr;
155753642Sguido	pass = is->is_pass;
155892685Sdarrenr	RWLOCK_EXIT(&ipf_state);
155992685Sdarrenr	if ((fin->fin_fl & FI_FRAG) && (pass & FR_KEEPFRAG))
156092685Sdarrenr		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
156160854Sdarrenr#ifndef	_KERNEL
156292685Sdarrenr	if ((tcp != NULL) && (tcp->th_flags & TCP_CLOSE))
156360854Sdarrenr		fr_delstate(is);
156460854Sdarrenr#endif
156553642Sguido	return fr;
156653642Sguido}
156753642Sguido
156853642Sguido
156992685Sdarrenr/*
157092685Sdarrenr * Sync. state entries.  If interfaces come or go or just change position,
157192685Sdarrenr * this is needed.
157292685Sdarrenr */
157360854Sdarrenrvoid ip_statesync(ifp)
157460854Sdarrenrvoid *ifp;
157560854Sdarrenr{
157660854Sdarrenr	register ipstate_t *is;
157792685Sdarrenr	int i;
157860854Sdarrenr
157960854Sdarrenr	WRITE_ENTER(&ipf_state);
158060854Sdarrenr	for (is = ips_list; is; is = is->is_next) {
158192685Sdarrenr		for (i = 0; i < 4; i++) {
158292685Sdarrenr			if (is->is_ifp[i] == ifp) {
158392685Sdarrenr				is->is_ifpin = GETUNIT(is->is_ifname[i],
158492685Sdarrenr						       is->is_v);
158592685Sdarrenr				if (!is->is_ifp[i])
158692685Sdarrenr					is->is_ifp[i] = (void *)-1;
158792685Sdarrenr			}
158860854Sdarrenr		}
158960854Sdarrenr	}
159060854Sdarrenr	RWLOCK_EXIT(&ipf_state);
159160854Sdarrenr}
159260854Sdarrenr
159360854Sdarrenr
159472006Sdarrenr/*
159572006Sdarrenr * Must always be called with fr_ipfstate held as a write lock.
159672006Sdarrenr */
159753642Sguidostatic void fr_delstate(is)
159853642Sguidoipstate_t *is;
159953642Sguido{
160053642Sguido	frentry_t *fr;
160153642Sguido
160267614Sdarrenr	if (is->is_flags & (FI_WILDP|FI_WILDA))
160367614Sdarrenr		ips_wild--;
160460854Sdarrenr	if (is->is_next)
160560854Sdarrenr		is->is_next->is_pnext = is->is_pnext;
160660854Sdarrenr	*is->is_pnext = is->is_next;
160760854Sdarrenr	if (is->is_hnext)
160860854Sdarrenr		is->is_hnext->is_phnext = is->is_phnext;
160960854Sdarrenr	*is->is_phnext = is->is_hnext;
161060854Sdarrenr	if (ips_table[is->is_hv] == NULL)
161160854Sdarrenr		ips_stats.iss_inuse--;
161292685Sdarrenr	if (is->is_me)
161392685Sdarrenr		*is->is_me = NULL;
161460854Sdarrenr
161553642Sguido	fr = is->is_rule;
161653642Sguido	if (fr != NULL) {
161772006Sdarrenr		fr->fr_ref--;
161872006Sdarrenr		if (fr->fr_ref == 0) {
161953642Sguido			KFREE(fr);
162072006Sdarrenr		}
162153642Sguido	}
162260854Sdarrenr#ifdef	_KERNEL
162360854Sdarrenr	MUTEX_DESTROY(&is->is_lock);
162460854Sdarrenr#endif
162553642Sguido	KFREE(is);
162660854Sdarrenr	ips_num--;
162753642Sguido}
162853642Sguido
162953642Sguido
163053642Sguido/*
163153642Sguido * Free memory in use by all state info. kept.
163253642Sguido */
163353642Sguidovoid fr_stateunload()
163453642Sguido{
163560854Sdarrenr	register ipstate_t *is;
163653642Sguido
163753642Sguido	WRITE_ENTER(&ipf_state);
163860854Sdarrenr	while ((is = ips_list))
163960854Sdarrenr		fr_delstate(is);
164053642Sguido	ips_stats.iss_inuse = 0;
164153642Sguido	ips_num = 0;
164253642Sguido	RWLOCK_EXIT(&ipf_state);
164380482Sdarrenr	if (ips_table)
164480482Sdarrenr		KFREES(ips_table, fr_statesize * sizeof(ipstate_t *));
164553642Sguido	ips_table = NULL;
164653642Sguido}
164753642Sguido
164853642Sguido
164953642Sguido/*
165053642Sguido * Slowly expire held state for thingslike UDP and ICMP.  Timeouts are set
165153642Sguido * in expectation of this being called twice per second.
165253642Sguido */
165353642Sguidovoid fr_timeoutstate()
165453642Sguido{
165553642Sguido	register ipstate_t *is, **isp;
165653642Sguido#if defined(_KERNEL) && !SOLARIS
165753642Sguido	int s;
165853642Sguido#endif
165953642Sguido
166053642Sguido	SPL_NET(s);
166153642Sguido	WRITE_ENTER(&ipf_state);
166260854Sdarrenr	for (isp = &ips_list; (is = *isp); )
166360854Sdarrenr		if (is->is_age && !--is->is_age) {
166460854Sdarrenr			if (is->is_p == IPPROTO_TCP)
166560854Sdarrenr				ips_stats.iss_fin++;
166660854Sdarrenr			else
166760854Sdarrenr				ips_stats.iss_expire++;
166853642Sguido#ifdef	IPFILTER_LOG
166960854Sdarrenr			ipstate_log(is, ISL_EXPIRE);
167053642Sguido#endif
167160854Sdarrenr			fr_delstate(is);
167260854Sdarrenr		} else
167360854Sdarrenr			isp = &is->is_next;
167455929Sguido	if (fr_state_doflush) {
167592685Sdarrenr		(void) fr_state_flush(2);
167655929Sguido		fr_state_doflush = 0;
167755929Sguido	}
167872006Sdarrenr	RWLOCK_EXIT(&ipf_state);
167972006Sdarrenr	SPL_X(s);
168053642Sguido}
168153642Sguido
168253642Sguido
168353642Sguido/*
168453642Sguido * Original idea freom Pradeep Krishnan for use primarily with NAT code.
168553642Sguido * (pkrishna@netcom.com)
168664580Sdarrenr *
168764580Sdarrenr * Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29:
168864580Sdarrenr *
168964580Sdarrenr * - (try to) base state transitions on real evidence only,
169064580Sdarrenr *   i.e. packets that are sent and have been received by ipfilter;
169164580Sdarrenr *   diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used.
169264580Sdarrenr *
169364580Sdarrenr * - deal with half-closed connections correctly;
169464580Sdarrenr *
169564580Sdarrenr * - store the state of the source in state[0] such that ipfstat
169664580Sdarrenr *   displays the state as source/dest instead of dest/source; the calls
169764580Sdarrenr *   to fr_tcp_age have been changed accordingly.
169864580Sdarrenr *
169964580Sdarrenr * Parameters:
170064580Sdarrenr *
170164580Sdarrenr *    state[0] = state of source (host that initiated connection)
170264580Sdarrenr *    state[1] = state of dest   (host that accepted the connection)
170364580Sdarrenr *
170464580Sdarrenr *    dir == 0 : a packet from source to dest
170564580Sdarrenr *    dir == 1 : a packet from dest to source
170664580Sdarrenr *
170753642Sguido */
170860854Sdarrenrvoid fr_tcp_age(age, state, fin, dir)
170953642Sguidou_long *age;
171053642Sguidou_char *state;
171153642Sguidofr_info_t *fin;
171253642Sguidoint dir;
171353642Sguido{
171453642Sguido	tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
171553642Sguido	u_char flags = tcp->th_flags;
171653642Sguido	int dlen, ostate;
171753642Sguido
171853642Sguido	ostate = state[1 - dir];
171953642Sguido
172060854Sdarrenr	dlen = fin->fin_plen - fin->fin_hlen - (tcp->th_off << 2);
172153642Sguido
172253642Sguido	if (flags & TH_RST) {
172353642Sguido		if (!(tcp->th_flags & TH_PUSH) && !dlen) {
172453642Sguido			*age = fr_tcpclosed;
172553642Sguido			state[dir] = TCPS_CLOSED;
172653642Sguido		} else {
172753642Sguido			*age = fr_tcpclosewait;
172853642Sguido			state[dir] = TCPS_CLOSE_WAIT;
172953642Sguido		}
173053642Sguido		return;
173153642Sguido	}
173253642Sguido
173364580Sdarrenr	*age = fr_tcptimeout; /* default 4 mins */
173453642Sguido
173553642Sguido	switch(state[dir])
173653642Sguido	{
173764580Sdarrenr	case TCPS_CLOSED: /* 0 */
173864580Sdarrenr		if ((flags & TH_OPENING) == TH_OPENING) {
173964580Sdarrenr			/*
174064580Sdarrenr			 * 'dir' received an S and sends SA in response,
174164580Sdarrenr			 * CLOSED -> SYN_RECEIVED
174264580Sdarrenr			 */
174364580Sdarrenr			state[dir] = TCPS_SYN_RECEIVED;
174464580Sdarrenr			*age = fr_tcptimeout;
174564580Sdarrenr		} else if ((flags & (TH_SYN|TH_ACK)) == TH_SYN) {
174664580Sdarrenr			/* 'dir' sent S, CLOSED -> SYN_SENT */
174764580Sdarrenr			state[dir] = TCPS_SYN_SENT;
174864580Sdarrenr			*age = fr_tcptimeout;
174964580Sdarrenr		}
175064580Sdarrenr		/*
175164580Sdarrenr		 * The next piece of code makes it possible to get
175264580Sdarrenr		 * already established connections into the state table
175364580Sdarrenr		 * after a restart or reload of the filter rules; this
175464580Sdarrenr		 * does not work when a strict 'flags S keep state' is
175564580Sdarrenr		 * used for tcp connections of course
175664580Sdarrenr		 */
175753642Sguido		if ((flags & (TH_FIN|TH_SYN|TH_RST|TH_ACK)) == TH_ACK) {
175864580Sdarrenr			/* we saw an A, guess 'dir' is in ESTABLISHED mode */
175992685Sdarrenr			if (state[1 - dir] == TCPS_CLOSED ||
176092685Sdarrenr			    state[1 - dir] == TCPS_ESTABLISHED) {
176192685Sdarrenr				state[dir] = TCPS_ESTABLISHED;
176292685Sdarrenr				*age = fr_tcpidletimeout;
176392685Sdarrenr			}
176453642Sguido		}
176564580Sdarrenr		/*
176664580Sdarrenr		 * TODO: besides regular ACK packets we can have other
176764580Sdarrenr		 * packets as well; it is yet to be determined how we
176864580Sdarrenr		 * should initialize the states in those cases
176964580Sdarrenr		 */
177064580Sdarrenr		break;
177164580Sdarrenr
177264580Sdarrenr	case TCPS_LISTEN: /* 1 */
177364580Sdarrenr		/* NOT USED */
177464580Sdarrenr		break;
177564580Sdarrenr
177664580Sdarrenr	case TCPS_SYN_SENT: /* 2 */
177764580Sdarrenr		if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
177864580Sdarrenr			/*
177964580Sdarrenr			 * We see an A from 'dir' which is in SYN_SENT
178064580Sdarrenr			 * state: 'dir' sent an A in response to an SA
178164580Sdarrenr			 * which it received, SYN_SENT -> ESTABLISHED
178264580Sdarrenr			 */
178364580Sdarrenr			state[dir] = TCPS_ESTABLISHED;
178464580Sdarrenr			*age = fr_tcpidletimeout;
178564580Sdarrenr		} else if (flags & TH_FIN) {
178664580Sdarrenr			/*
178764580Sdarrenr			 * We see an F from 'dir' which is in SYN_SENT
178864580Sdarrenr			 * state and wants to close its side of the
178964580Sdarrenr			 * connection; SYN_SENT -> FIN_WAIT_1
179064580Sdarrenr			 */
179164580Sdarrenr			state[dir] = TCPS_FIN_WAIT_1;
179264580Sdarrenr			*age = fr_tcpidletimeout; /* or fr_tcptimeout? */
179364580Sdarrenr		} else if ((flags & TH_OPENING) == TH_OPENING) {
179464580Sdarrenr			/*
179564580Sdarrenr			 * We see an SA from 'dir' which is already in
179664580Sdarrenr			 * SYN_SENT state, this means we have a
179764580Sdarrenr			 * simultaneous open; SYN_SENT -> SYN_RECEIVED
179864580Sdarrenr			 */
179953642Sguido			state[dir] = TCPS_SYN_RECEIVED;
180064580Sdarrenr			*age = fr_tcptimeout;
180164580Sdarrenr		}
180253642Sguido		break;
180364580Sdarrenr
180464580Sdarrenr	case TCPS_SYN_RECEIVED: /* 3 */
180564580Sdarrenr		if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
180664580Sdarrenr			/*
180764580Sdarrenr			 * We see an A from 'dir' which was in SYN_RECEIVED
180864580Sdarrenr			 * state so it must now be in established state,
180964580Sdarrenr			 * SYN_RECEIVED -> ESTABLISHED
181064580Sdarrenr			 */
181153642Sguido			state[dir] = TCPS_ESTABLISHED;
181253642Sguido			*age = fr_tcpidletimeout;
181364580Sdarrenr		} else if (flags & TH_FIN) {
181464580Sdarrenr			/*
181564580Sdarrenr			 * We see an F from 'dir' which is in SYN_RECEIVED
181664580Sdarrenr			 * state and wants to close its side of the connection;
181764580Sdarrenr			 * SYN_RECEIVED -> FIN_WAIT_1
181864580Sdarrenr			 */
181964580Sdarrenr			state[dir] = TCPS_FIN_WAIT_1;
182067614Sdarrenr			*age = fr_tcpidletimeout;
182153642Sguido		}
182253642Sguido		break;
182364580Sdarrenr
182464580Sdarrenr	case TCPS_ESTABLISHED: /* 4 */
182553642Sguido		if (flags & TH_FIN) {
182664580Sdarrenr			/*
182764580Sdarrenr			 * 'dir' closed its side of the connection; this
182864580Sdarrenr			 * gives us a half-closed connection;
182964580Sdarrenr			 * ESTABLISHED -> FIN_WAIT_1
183064580Sdarrenr			 */
183164580Sdarrenr			state[dir] = TCPS_FIN_WAIT_1;
183267614Sdarrenr			*age = fr_tcphalfclosed;
183364580Sdarrenr		} else if (flags & TH_ACK) {
183464580Sdarrenr			/* an ACK, should we exclude other flags here? */
183564580Sdarrenr			if (ostate == TCPS_FIN_WAIT_1) {
183664580Sdarrenr				/*
183764580Sdarrenr				 * We know the other side did an active close,
183864580Sdarrenr				 * so we are ACKing the recvd FIN packet (does
183964580Sdarrenr				 * the window matching code guarantee this?)
184064580Sdarrenr				 * and go into CLOSE_WAIT state; this gives us
184164580Sdarrenr				 * a half-closed connection
184264580Sdarrenr				 */
184364580Sdarrenr				state[dir] = TCPS_CLOSE_WAIT;
184467614Sdarrenr				*age = fr_tcphalfclosed;
184564580Sdarrenr			} else if (ostate < TCPS_CLOSE_WAIT)
184664580Sdarrenr				/*
184764580Sdarrenr				 * Still a fully established connection,
184864580Sdarrenr				 * reset timeout
184964580Sdarrenr				 */
185064580Sdarrenr				*age = fr_tcpidletimeout;
185153642Sguido		}
185253642Sguido		break;
185364580Sdarrenr
185464580Sdarrenr	case TCPS_CLOSE_WAIT: /* 5 */
185564580Sdarrenr		if (flags & TH_FIN) {
185664580Sdarrenr			/*
185764580Sdarrenr			 * Application closed and 'dir' sent a FIN, we're now
185864580Sdarrenr			 * going into LAST_ACK state
185964580Sdarrenr			 */
186053642Sguido			*age  = fr_tcplastack;
186153642Sguido			state[dir] = TCPS_LAST_ACK;
186264580Sdarrenr		} else {
186364580Sdarrenr			/*
186464580Sdarrenr			 * We remain in CLOSE_WAIT because the other side has
186564580Sdarrenr			 * closed already and we did not close our side yet;
186664580Sdarrenr			 * reset timeout
186764580Sdarrenr			 */
186867614Sdarrenr			*age  = fr_tcphalfclosed;
186964580Sdarrenr		}
187064580Sdarrenr		break;
187164580Sdarrenr
187264580Sdarrenr	case TCPS_FIN_WAIT_1: /* 6 */
187364580Sdarrenr		if ((flags & TH_ACK) && ostate > TCPS_CLOSE_WAIT) {
187464580Sdarrenr			/*
187564580Sdarrenr			 * If the other side is not active anymore it has sent
187664580Sdarrenr			 * us a FIN packet that we are ack'ing now with an ACK;
187764580Sdarrenr			 * this means both sides have now closed the connection
187864580Sdarrenr			 * and we go into TIME_WAIT
187964580Sdarrenr			 */
188064580Sdarrenr			/*
188164580Sdarrenr			 * XXX: how do we know we really are ACKing the FIN
188264580Sdarrenr			 * packet here? does the window code guarantee that?
188364580Sdarrenr			 */
188464580Sdarrenr			state[dir] = TCPS_TIME_WAIT;
188564580Sdarrenr			*age = fr_tcptimeout;
188653642Sguido		} else
188764580Sdarrenr			/*
188864580Sdarrenr			 * We closed our side of the connection already but the
188964580Sdarrenr			 * other side is still active (ESTABLISHED/CLOSE_WAIT);
189064580Sdarrenr			 * continue with this half-closed connection
189164580Sdarrenr			 */
189267614Sdarrenr			*age = fr_tcphalfclosed;
189353642Sguido		break;
189464580Sdarrenr
189564580Sdarrenr	case TCPS_CLOSING: /* 7 */
189664580Sdarrenr		/* NOT USED */
189764580Sdarrenr		break;
189864580Sdarrenr
189964580Sdarrenr	case TCPS_LAST_ACK: /* 8 */
190053642Sguido		if (flags & TH_ACK) {
190164580Sdarrenr			if ((flags & TH_PUSH) || dlen)
190264580Sdarrenr				/*
190364580Sdarrenr				 * There is still data to be delivered, reset
190464580Sdarrenr				 * timeout
190564580Sdarrenr				 */
190653642Sguido				*age  = fr_tcplastack;
190753642Sguido		}
190864580Sdarrenr		/*
190964580Sdarrenr		 * We cannot detect when we go out of LAST_ACK state to CLOSED
191064580Sdarrenr		 * because that is based on the reception of ACK packets;
191164580Sdarrenr		 * ipfilter can only detect that a packet has been sent by a
191264580Sdarrenr		 * host
191364580Sdarrenr		 */
191453642Sguido		break;
191564580Sdarrenr
191664580Sdarrenr	case TCPS_FIN_WAIT_2: /* 9 */
191764580Sdarrenr		/* NOT USED */
191864580Sdarrenr		break;
191964580Sdarrenr
192064580Sdarrenr	case TCPS_TIME_WAIT: /* 10 */
192164580Sdarrenr		/* we're in 2MSL timeout now */
192264580Sdarrenr		break;
192353642Sguido	}
192453642Sguido}
192553642Sguido
192653642Sguido
192753642Sguido#ifdef	IPFILTER_LOG
192853642Sguidovoid ipstate_log(is, type)
192953642Sguidostruct ipstate *is;
193053642Sguidou_int type;
193153642Sguido{
193253642Sguido	struct	ipslog	ipsl;
193353642Sguido	void *items[1];
193453642Sguido	size_t sizes[1];
193553642Sguido	int types[1];
193653642Sguido
193753642Sguido	ipsl.isl_type = type;
193853642Sguido	ipsl.isl_pkts = is->is_pkts;
193953642Sguido	ipsl.isl_bytes = is->is_bytes;
194053642Sguido	ipsl.isl_src = is->is_src;
194153642Sguido	ipsl.isl_dst = is->is_dst;
194253642Sguido	ipsl.isl_p = is->is_p;
194360854Sdarrenr	ipsl.isl_v = is->is_v;
194453642Sguido	ipsl.isl_flags = is->is_flags;
194553642Sguido	if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
194653642Sguido		ipsl.isl_sport = is->is_sport;
194753642Sguido		ipsl.isl_dport = is->is_dport;
194853642Sguido		if (ipsl.isl_p == IPPROTO_TCP) {
194953642Sguido			ipsl.isl_state[0] = is->is_state[0];
195053642Sguido			ipsl.isl_state[1] = is->is_state[1];
195153642Sguido		}
195292685Sdarrenr	} else if (ipsl.isl_p == IPPROTO_ICMP) {
195353642Sguido		ipsl.isl_itype = is->is_icmp.ics_type;
195492685Sdarrenr	} else if (ipsl.isl_p == IPPROTO_ICMPV6) {
195592685Sdarrenr		ipsl.isl_itype = is->is_icmp.ics_type;
195692685Sdarrenr	} else {
195753642Sguido		ipsl.isl_ps.isl_filler[0] = 0;
195853642Sguido		ipsl.isl_ps.isl_filler[1] = 0;
195953642Sguido	}
196053642Sguido	items[0] = &ipsl;
196153642Sguido	sizes[0] = sizeof(ipsl);
196253642Sguido	types[0] = 0;
196353642Sguido
196453642Sguido	(void) ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1);
196553642Sguido}
196653642Sguido#endif
196757096Sguido
196857096Sguido
196960854Sdarrenr#ifdef	USE_INET6
197060854Sdarrenrfrentry_t *fr_checkicmp6matchingstate(ip, fin)
197160854Sdarrenrip6_t *ip;
197260854Sdarrenrfr_info_t *fin;
197357096Sguido{
197460854Sdarrenr	register ipstate_t *is, **isp;
197560854Sdarrenr	register u_short sport, dport;
197660854Sdarrenr	register u_char	pr;
197760854Sdarrenr	struct icmp6_hdr *ic, *oic;
197860854Sdarrenr	union i6addr dst, src;
197960854Sdarrenr	u_short savelen;
198060854Sdarrenr	fr_info_t ofin;
198160854Sdarrenr	tcphdr_t *tcp;
198260854Sdarrenr	frentry_t *fr;
198360854Sdarrenr	ip6_t *oip;
198460854Sdarrenr	int type;
198560854Sdarrenr	u_int hv;
198657096Sguido
198760854Sdarrenr	/*
198860854Sdarrenr	 * Does it at least have the return (basic) IP header ?
198960854Sdarrenr	 * Only a basic IP header (no options) should be with
199060854Sdarrenr	 * an ICMP error header.
199160854Sdarrenr	 */
199260854Sdarrenr	if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN))
199360854Sdarrenr		return NULL;
199460854Sdarrenr	ic = (struct icmp6_hdr *)fin->fin_dp;
199560854Sdarrenr	type = ic->icmp6_type;
199660854Sdarrenr	/*
199760854Sdarrenr	 * If it's not an error type, then return
199860854Sdarrenr	 */
199960854Sdarrenr	if ((type != ICMP6_DST_UNREACH) && (type != ICMP6_PACKET_TOO_BIG) &&
200060854Sdarrenr	    (type != ICMP6_TIME_EXCEEDED) && (type != ICMP6_PARAM_PROB))
200160854Sdarrenr		return NULL;
200260854Sdarrenr
200360854Sdarrenr	oip = (ip6_t *)((char *)ic + ICMPERR_ICMPHLEN);
200460854Sdarrenr	if (fin->fin_plen < sizeof(*oip))
200560854Sdarrenr		return NULL;
200660854Sdarrenr
200760854Sdarrenr	if (oip->ip6_nxt == IPPROTO_ICMPV6) {
200860854Sdarrenr		oic = (struct icmp6_hdr *)(oip + 1);
200960854Sdarrenr		/*
201060854Sdarrenr		 * a ICMP error can only be generated as a result of an
201160854Sdarrenr		 * ICMP query, not as the response on an ICMP error
201260854Sdarrenr		 *
201360854Sdarrenr		 * XXX theoretically ICMP_ECHOREP and the other reply's are
201460854Sdarrenr		 * ICMP query's as well, but adding them here seems strange XXX
201560854Sdarrenr		 */
201660854Sdarrenr		 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK))
201760854Sdarrenr		    	return NULL;
201860854Sdarrenr
201960854Sdarrenr		/*
202060854Sdarrenr		 * perform a lookup of the ICMP packet in the state table
202160854Sdarrenr		 */
202260854Sdarrenr		hv = (pr = oip->ip6_nxt);
202360854Sdarrenr		src.in6 = oip->ip6_src;
202460854Sdarrenr		hv += src.in4.s_addr;
202560854Sdarrenr		dst.in6 = oip->ip6_dst;
202660854Sdarrenr		hv += dst.in4.s_addr;
202760854Sdarrenr		hv += oic->icmp6_id;
202860854Sdarrenr		hv += oic->icmp6_seq;
202960854Sdarrenr		hv %= fr_statesize;
203060854Sdarrenr
203160854Sdarrenr		oip->ip6_plen = ntohs(oip->ip6_plen);
203264580Sdarrenr		ofin.fin_v = 6;
203360854Sdarrenr		fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin);
203460854Sdarrenr		oip->ip6_plen = htons(oip->ip6_plen);
203560854Sdarrenr		ofin.fin_ifp = fin->fin_ifp;
203660854Sdarrenr		ofin.fin_out = !fin->fin_out;
203760854Sdarrenr		ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
203860854Sdarrenr
203960854Sdarrenr		READ_ENTER(&ipf_state);
204060854Sdarrenr		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext)
204160854Sdarrenr			if ((is->is_p == pr) &&
204260854Sdarrenr			    (oic->icmp6_id == is->is_icmp.ics_id) &&
204360854Sdarrenr			    (oic->icmp6_seq == is->is_icmp.ics_seq) &&
204460854Sdarrenr			    fr_matchsrcdst(is, src, dst, &ofin, NULL)) {
204560854Sdarrenr			    	/*
204660854Sdarrenr			    	 * in the state table ICMP query's are stored
204760854Sdarrenr			    	 * with the type of the corresponding ICMP
204860854Sdarrenr			    	 * response. Correct here
204960854Sdarrenr			    	 */
205060854Sdarrenr				if (((is->is_type == ICMP6_ECHO_REPLY) &&
205160854Sdarrenr				     (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
205260854Sdarrenr				     (is->is_type - 1 == oic->icmp6_type )) {
205360854Sdarrenr				    	ips_stats.iss_hits++;
205463523Sdarrenr    					is->is_pkts++;
205563523Sdarrenr					is->is_bytes += fin->fin_plen;
205660854Sdarrenr					return is->is_rule;
205760854Sdarrenr				}
205860854Sdarrenr			}
205960854Sdarrenr		RWLOCK_EXIT(&ipf_state);
206060854Sdarrenr
206160854Sdarrenr		return NULL;
206260854Sdarrenr	};
206360854Sdarrenr
206460854Sdarrenr	if ((oip->ip6_nxt != IPPROTO_TCP) && (oip->ip6_nxt != IPPROTO_UDP))
206560854Sdarrenr		return NULL;
206660854Sdarrenr	tcp = (tcphdr_t *)(oip + 1);
206760854Sdarrenr	dport = tcp->th_dport;
206860854Sdarrenr	sport = tcp->th_sport;
206960854Sdarrenr
207060854Sdarrenr	hv = (pr = oip->ip6_nxt);
207160854Sdarrenr	src.in6 = oip->ip6_src;
207260854Sdarrenr	hv += src.in4.s_addr;
207360854Sdarrenr	dst.in6 = oip->ip6_dst;
207460854Sdarrenr	hv += dst.in4.s_addr;
207560854Sdarrenr	hv += dport;
207660854Sdarrenr	hv += sport;
207760854Sdarrenr	hv %= fr_statesize;
207860854Sdarrenr	/*
207960854Sdarrenr	 * we make an fin entry to be able to feed it to
208060854Sdarrenr	 * matchsrcdst note that not all fields are encessary
208160854Sdarrenr	 * but this is the cleanest way. Note further we fill
208260854Sdarrenr	 * in fin_mp such that if someone uses it we'll get
208360854Sdarrenr	 * a kernel panic. fr_matchsrcdst does not use this.
208460854Sdarrenr	 *
208560854Sdarrenr	 * watch out here, as ip is in host order and oip in network
208660854Sdarrenr	 * order. Any change we make must be undone afterwards.
208760854Sdarrenr	 */
208860854Sdarrenr	savelen = oip->ip6_plen;
208960854Sdarrenr	oip->ip6_plen = ip->ip6_plen - sizeof(*ip) - ICMPERR_ICMPHLEN;
209060854Sdarrenr	ofin.fin_v = 6;
209160854Sdarrenr	fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin);
209260854Sdarrenr	oip->ip6_plen = savelen;
209360854Sdarrenr	ofin.fin_ifp = fin->fin_ifp;
209460854Sdarrenr	ofin.fin_out = !fin->fin_out;
209560854Sdarrenr	ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
209660854Sdarrenr	READ_ENTER(&ipf_state);
209760854Sdarrenr	for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
209860854Sdarrenr		/*
209960854Sdarrenr		 * Only allow this icmp though if the
210060854Sdarrenr		 * encapsulated packet was allowed through the
210160854Sdarrenr		 * other way around. Note that the minimal amount
210260854Sdarrenr		 * of info present does not allow for checking against
210360854Sdarrenr		 * tcp internals such as seq and ack numbers.
210460854Sdarrenr		 */
210560854Sdarrenr		if ((is->is_p == pr) && (is->is_v == 6) &&
210660854Sdarrenr		    fr_matchsrcdst(is, src, dst, &ofin, tcp)) {
210760854Sdarrenr			fr = is->is_rule;
210860854Sdarrenr			ips_stats.iss_hits++;
210960854Sdarrenr			is->is_pkts++;
211060854Sdarrenr			is->is_bytes += fin->fin_plen;
211160854Sdarrenr			/*
211260854Sdarrenr			 * we deliberately do not touch the timeouts
211360854Sdarrenr			 * for the accompanying state table entry.
211460854Sdarrenr			 * It remains to be seen if that is correct. XXX
211560854Sdarrenr			 */
211660854Sdarrenr			RWLOCK_EXIT(&ipf_state);
211760854Sdarrenr			return fr;
211857096Sguido		}
211960854Sdarrenr	}
212057096Sguido	RWLOCK_EXIT(&ipf_state);
212160854Sdarrenr	return NULL;
212257096Sguido}
212360854Sdarrenr#endif
2124