ip_state.c revision 80482
153642Sguido/*
280482Sdarrenr * Copyright (C) 1995-2001 by Darren Reed.
353642Sguido *
480482Sdarrenr * See the IPFILTER.LICENCE file for details on licencing.
553642Sguido */
653642Sguido
753642Sguido#include <sys/errno.h>
853642Sguido#include <sys/types.h>
953642Sguido#include <sys/param.h>
1053642Sguido#include <sys/file.h>
1153642Sguido#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
1253642Sguido    defined(_KERNEL)
1353642Sguido# include "opt_ipfilter_log.h"
1453642Sguido#endif
1560854Sdarrenr#if defined(_KERNEL) && defined(__FreeBSD_version) && \
1660854Sdarrenr    (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
1760854Sdarrenr#include "opt_inet6.h"
1860854Sdarrenr#endif
1953642Sguido#if !defined(_KERNEL) && !defined(KERNEL) && !defined(__KERNEL__)
2053642Sguido# include <stdio.h>
2153642Sguido# include <stdlib.h>
2253642Sguido# include <string.h>
2353642Sguido#else
2453642Sguido# ifdef linux
2553642Sguido#  include <linux/kernel.h>
2653642Sguido#  include <linux/module.h>
2753642Sguido# endif
2853642Sguido#endif
2960854Sdarrenr#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
3053642Sguido# include <sys/filio.h>
3153642Sguido# include <sys/fcntl.h>
3253642Sguido# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
3353642Sguido#  include "opt_ipfilter.h"
3453642Sguido# endif
3553642Sguido#else
3653642Sguido# include <sys/ioctl.h>
3753642Sguido#endif
3853642Sguido#include <sys/time.h>
3953642Sguido#include <sys/uio.h>
4053642Sguido#ifndef linux
4153642Sguido# include <sys/protosw.h>
4253642Sguido#endif
4353642Sguido#include <sys/socket.h>
4457096Sguido#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux)
4553642Sguido# include <sys/systm.h>
4653642Sguido#endif
4753642Sguido#if !defined(__SVR4) && !defined(__svr4__)
4853642Sguido# ifndef linux
4953642Sguido#  include <sys/mbuf.h>
5053642Sguido# endif
5153642Sguido#else
5253642Sguido# include <sys/filio.h>
5353642Sguido# include <sys/byteorder.h>
5453642Sguido# ifdef _KERNEL
5553642Sguido#  include <sys/dditypes.h>
5653642Sguido# endif
5753642Sguido# include <sys/stream.h>
5853642Sguido# include <sys/kmem.h>
5953642Sguido#endif
6053642Sguido
6153642Sguido#include <net/if.h>
6253642Sguido#ifdef sun
6353642Sguido# include <net/af.h>
6453642Sguido#endif
6553642Sguido#include <net/route.h>
6653642Sguido#include <netinet/in.h>
6753642Sguido#include <netinet/in_systm.h>
6853642Sguido#include <netinet/ip.h>
6953642Sguido#include <netinet/tcp.h>
7053642Sguido#ifndef linux
7153642Sguido# include <netinet/ip_var.h>
7253642Sguido# include <netinet/tcp_fsm.h>
7353642Sguido#endif
7453642Sguido#include <netinet/udp.h>
7553642Sguido#include <netinet/ip_icmp.h>
7653642Sguido#include "netinet/ip_compat.h"
7753642Sguido#include <netinet/tcpip.h>
7853642Sguido#include "netinet/ip_fil.h"
7953642Sguido#include "netinet/ip_nat.h"
8053642Sguido#include "netinet/ip_frag.h"
8153642Sguido#include "netinet/ip_proxy.h"
8253642Sguido#include "netinet/ip_state.h"
8360854Sdarrenr#ifdef	USE_INET6
8460854Sdarrenr#include <netinet/icmp6.h>
8560854Sdarrenr#endif
8653642Sguido#if (__FreeBSD_version >= 300000)
8753642Sguido# include <sys/malloc.h>
8853642Sguido# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM)
8953642Sguido#  include <sys/libkern.h>
9053642Sguido#  include <sys/systm.h>
9153642Sguido# endif
9253642Sguido#endif
9353642Sguido
9480482Sdarrenr#if !defined(lint)
9580482Sdarrenrstatic const char sccsid[] = "@(#)ip_state.c	1.8 6/5/96 (C) 1993-2000 Darren Reed";
9680482Sdarrenr/* static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.30.2.38 2001/07/23 13:49:46 darrenr Exp $"; */
9780482Sdarrenrstatic const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_state.c 80482 2001-07-28 11:58:26Z darrenr $";
9880482Sdarrenr#endif
9980482Sdarrenr
10053642Sguido#ifndef	MIN
10153642Sguido# define	MIN(a,b)	(((a)<(b))?(a):(b))
10253642Sguido#endif
10353642Sguido
10453642Sguido#define	TCP_CLOSE	(TH_FIN|TH_RST)
10553642Sguido
10660854Sdarrenrstatic ipstate_t **ips_table = NULL;
10760854Sdarrenrstatic ipstate_t *ips_list = NULL;
10860854Sdarrenrstatic int	ips_num = 0;
10967614Sdarrenrstatic int	ips_wild = 0;
11060854Sdarrenrstatic ips_stat_t ips_stats;
11153642Sguido#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
11253642Sguidoextern	KRWLOCK_T	ipf_state, ipf_mutex;
11353642Sguidoextern	kmutex_t	ipf_rw;
11453642Sguido#endif
11553642Sguido
11660854Sdarrenr#ifdef	USE_INET6
11760854Sdarrenrstatic frentry_t *fr_checkicmp6matchingstate __P((ip6_t *, fr_info_t *));
11860854Sdarrenr#endif
11960854Sdarrenrstatic int fr_matchsrcdst __P((ipstate_t *, union i6addr, union i6addr,
12053642Sguido			       fr_info_t *, tcphdr_t *));
12153642Sguidostatic frentry_t *fr_checkicmpmatchingstate __P((ip_t *, fr_info_t *));
12260854Sdarrenrstatic int fr_matchicmpqueryreply __P((int, ipstate_t *, icmphdr_t *));
12353642Sguidostatic int fr_state_flush __P((int));
12453642Sguidostatic ips_stat_t *fr_statetstats __P((void));
12553642Sguidostatic void fr_delstate __P((ipstate_t *));
12660854Sdarrenrstatic int fr_state_remove __P((caddr_t));
12767614Sdarrenrstatic void fr_ipsmove __P((ipstate_t **, ipstate_t *, u_int));
12860854Sdarrenrint fr_stputent __P((caddr_t));
12960854Sdarrenrint fr_stgetent __P((caddr_t));
13060854Sdarrenrvoid fr_stinsert __P((ipstate_t *));
13153642Sguido
13253642Sguido
13353642Sguido#define	FIVE_DAYS	(2 * 5 * 86400)	/* 5 days: half closed session */
13453642Sguido
13553642Sguido#define	TCP_MSL	240			/* 2 minutes */
13653642Sguidou_long	fr_tcpidletimeout = FIVE_DAYS,
13753642Sguido	fr_tcpclosewait = 2 * TCP_MSL,
13853642Sguido	fr_tcplastack = 2 * TCP_MSL,
13953642Sguido	fr_tcptimeout = 2 * TCP_MSL,
14067614Sdarrenr	fr_tcpclosed = 120,
14167614Sdarrenr	fr_tcphalfclosed = 2 * 2 * 3600,    /* 2 hours */
14253642Sguido	fr_udptimeout = 240,
14380482Sdarrenr	fr_udpacktimeout = 24,
14480482Sdarrenr	fr_icmptimeout = 120,
14580482Sdarrenr	fr_icmpacktimeout = 12;
14653642Sguidoint	fr_statemax = IPSTATE_MAX,
14753642Sguido	fr_statesize = IPSTATE_SIZE;
14860854Sdarrenrint	fr_state_doflush = 0,
14960854Sdarrenr	fr_state_lock = 0;
15053642Sguido
15160854Sdarrenrstatic 	int icmpreplytype4[ICMP_MAXTYPE + 1];
15253642Sguido
15353642Sguidoint fr_stateinit()
15453642Sguido{
15560854Sdarrenr	int i;
15660854Sdarrenr
15753642Sguido	KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *));
15853642Sguido	if (ips_table != NULL)
15953642Sguido		bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *));
16053642Sguido	else
16153642Sguido		return -1;
16260854Sdarrenr
16360854Sdarrenr	/* fill icmp reply type table */
16460854Sdarrenr	for (i = 0; i <= ICMP_MAXTYPE; i++)
16560854Sdarrenr		icmpreplytype4[i] = -1;
16660854Sdarrenr	icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
16760854Sdarrenr	icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
16860854Sdarrenr	icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
16960854Sdarrenr	icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
17060854Sdarrenr
17153642Sguido	return 0;
17253642Sguido}
17353642Sguido
17453642Sguido
17553642Sguidostatic ips_stat_t *fr_statetstats()
17653642Sguido{
17753642Sguido	ips_stats.iss_active = ips_num;
17853642Sguido	ips_stats.iss_table = ips_table;
17960854Sdarrenr	ips_stats.iss_list = ips_list;
18053642Sguido	return &ips_stats;
18153642Sguido}
18253642Sguido
18353642Sguido
18453642Sguido/*
18553642Sguido * flush state tables.  two actions currently defined:
18653642Sguido * which == 0 : flush all state table entries
18753642Sguido * which == 1 : flush TCP connections which have started to close but are
18864580Sdarrenr *	        stuck for some reason.
18953642Sguido */
19053642Sguidostatic int fr_state_flush(which)
19153642Sguidoint which;
19253642Sguido{
19353642Sguido	register ipstate_t *is, **isp;
19453642Sguido#if defined(_KERNEL) && !SOLARIS
19553642Sguido	int s;
19653642Sguido#endif
19753642Sguido	int delete, removed = 0;
19853642Sguido
19953642Sguido	SPL_NET(s);
20060854Sdarrenr	for (isp = &ips_list; (is = *isp); ) {
20160854Sdarrenr		delete = 0;
20253642Sguido
20360854Sdarrenr		switch (which)
20460854Sdarrenr		{
20560854Sdarrenr		case 0 :
20660854Sdarrenr			delete = 1;
20760854Sdarrenr			break;
20860854Sdarrenr		case 1 :
20960854Sdarrenr			if (is->is_p != IPPROTO_TCP)
21060854Sdarrenr				break;
21160854Sdarrenr			if ((is->is_state[0] != TCPS_ESTABLISHED) ||
21260854Sdarrenr			    (is->is_state[1] != TCPS_ESTABLISHED))
21353642Sguido				delete = 1;
21460854Sdarrenr			break;
21560854Sdarrenr		}
21653642Sguido
21760854Sdarrenr		if (delete) {
21860854Sdarrenr			if (is->is_p == IPPROTO_TCP)
21960854Sdarrenr				ips_stats.iss_fin++;
22060854Sdarrenr			else
22160854Sdarrenr				ips_stats.iss_expire++;
22253642Sguido#ifdef	IPFILTER_LOG
22360854Sdarrenr			ipstate_log(is, ISL_FLUSH);
22453642Sguido#endif
22560854Sdarrenr			fr_delstate(is);
22660854Sdarrenr			removed++;
22760854Sdarrenr		} else
22860854Sdarrenr			isp = &is->is_next;
22960854Sdarrenr	}
23053642Sguido	SPL_X(s);
23153642Sguido	return removed;
23253642Sguido}
23353642Sguido
23453642Sguido
23560854Sdarrenrstatic int fr_state_remove(data)
23660854Sdarrenrcaddr_t data;
23760854Sdarrenr{
23860854Sdarrenr	ipstate_t *sp, st;
23960854Sdarrenr	int error;
24060854Sdarrenr
24160854Sdarrenr	sp = &st;
24260854Sdarrenr	error = IRCOPYPTR(data, (caddr_t)&st, sizeof(st));
24360854Sdarrenr	if (error)
24460854Sdarrenr		return EFAULT;
24560854Sdarrenr
24680482Sdarrenr	WRITE_ENTER(&ipf_state);
24760854Sdarrenr	for (sp = ips_list; sp; sp = sp->is_next)
24860854Sdarrenr		if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
24967614Sdarrenr		    !bcmp((char *)&sp->is_src, (char *)&st.is_src,
25067614Sdarrenr			  sizeof(st.is_src)) &&
25167614Sdarrenr		    !bcmp((char *)&sp->is_dst, (char *)&st.is_src,
25267614Sdarrenr			  sizeof(st.is_dst)) &&
25367614Sdarrenr		    !bcmp((char *)&sp->is_ps, (char *)&st.is_ps,
25467614Sdarrenr			  sizeof(st.is_ps))) {
25560854Sdarrenr#ifdef	IPFILTER_LOG
25660854Sdarrenr			ipstate_log(sp, ISL_REMOVE);
25760854Sdarrenr#endif
25860854Sdarrenr			fr_delstate(sp);
25960854Sdarrenr			RWLOCK_EXIT(&ipf_state);
26060854Sdarrenr			return 0;
26160854Sdarrenr		}
26280482Sdarrenr	RWLOCK_EXIT(&ipf_state);
26360854Sdarrenr	return ESRCH;
26460854Sdarrenr}
26560854Sdarrenr
26660854Sdarrenr
26753642Sguidoint fr_state_ioctl(data, cmd, mode)
26853642Sguidocaddr_t data;
26953642Sguido#if defined(__NetBSD__) || defined(__OpenBSD__)
27053642Sguidou_long cmd;
27153642Sguido#else
27253642Sguidoint cmd;
27353642Sguido#endif
27453642Sguidoint mode;
27553642Sguido{
27660854Sdarrenr	int arg, ret, error = 0;
27753642Sguido
27853642Sguido	switch (cmd)
27953642Sguido	{
28060854Sdarrenr	case SIOCDELST :
28160854Sdarrenr		error = fr_state_remove(data);
28260854Sdarrenr		break;
28353642Sguido	case SIOCIPFFL :
28460854Sdarrenr		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
28560854Sdarrenr		if (error)
28660854Sdarrenr			break;
28753642Sguido		if (arg == 0 || arg == 1) {
28860854Sdarrenr			WRITE_ENTER(&ipf_state);
28953642Sguido			ret = fr_state_flush(arg);
29060854Sdarrenr			RWLOCK_EXIT(&ipf_state);
29160854Sdarrenr			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
29253642Sguido		} else
29353642Sguido			error = EINVAL;
29453642Sguido		break;
29555929Sguido#ifdef	IPFILTER_LOG
29655929Sguido	case SIOCIPFFB :
29755929Sguido		if (!(mode & FWRITE))
29855929Sguido			error = EPERM;
29960854Sdarrenr		else {
30060854Sdarrenr			int tmp;
30160854Sdarrenr
30260854Sdarrenr			tmp = ipflog_clear(IPL_LOGSTATE);
30360854Sdarrenr			IWCOPY((char *)&tmp, data, sizeof(tmp));
30460854Sdarrenr		}
30555929Sguido		break;
30655929Sguido#endif
30760854Sdarrenr	case SIOCGETFS :
30860854Sdarrenr		error = IWCOPYPTR((caddr_t)fr_statetstats(), data,
30960854Sdarrenr				  sizeof(ips_stat_t));
31053642Sguido		break;
31153642Sguido	case FIONREAD :
31253642Sguido#ifdef	IPFILTER_LOG
31372006Sdarrenr		arg = (int)iplused[IPL_LOGSTATE];
31472006Sdarrenr		error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
31553642Sguido#endif
31653642Sguido		break;
31760854Sdarrenr	case SIOCSTLCK :
31860854Sdarrenr		error = fr_lock(data, &fr_state_lock);
31960854Sdarrenr		break;
32060854Sdarrenr	case SIOCSTPUT :
32160854Sdarrenr		if (!fr_state_lock) {
32260854Sdarrenr			error = EACCES;
32360854Sdarrenr			break;
32460854Sdarrenr		}
32560854Sdarrenr		error = fr_stputent(data);
32660854Sdarrenr		break;
32760854Sdarrenr	case SIOCSTGET :
32860854Sdarrenr		if (!fr_state_lock) {
32960854Sdarrenr			error = EACCES;
33060854Sdarrenr			break;
33160854Sdarrenr		}
33260854Sdarrenr		error = fr_stgetent(data);
33360854Sdarrenr		break;
33453642Sguido	default :
33553642Sguido		error = EINVAL;
33653642Sguido		break;
33753642Sguido	}
33853642Sguido	return error;
33953642Sguido}
34053642Sguido
34153642Sguido
34260854Sdarrenrint fr_stgetent(data)
34360854Sdarrenrcaddr_t data;
34460854Sdarrenr{
34560854Sdarrenr	register ipstate_t *is, *isn;
34660854Sdarrenr	ipstate_save_t ips, *ipsp;
34760854Sdarrenr	int error;
34860854Sdarrenr
34960854Sdarrenr	error = IRCOPY(data, (caddr_t)&ipsp, sizeof(ipsp));
35060854Sdarrenr	if (error)
35160854Sdarrenr		return EFAULT;
35260854Sdarrenr	error = IRCOPY((caddr_t)ipsp, (caddr_t)&ips, sizeof(ips));
35360854Sdarrenr	if (error)
35460854Sdarrenr		return EFAULT;
35560854Sdarrenr
35660854Sdarrenr	isn = ips.ips_next;
35760854Sdarrenr	if (!isn) {
35860854Sdarrenr		isn = ips_list;
35960854Sdarrenr		if (isn == NULL) {
36060854Sdarrenr			if (ips.ips_next == NULL)
36160854Sdarrenr				return ENOENT;
36260854Sdarrenr			return 0;
36360854Sdarrenr		}
36460854Sdarrenr	} else {
36560854Sdarrenr		/*
36660854Sdarrenr		 * Make sure the pointer we're copying from exists in the
36760854Sdarrenr		 * current list of entries.  Security precaution to prevent
36860854Sdarrenr		 * copying of random kernel data.
36960854Sdarrenr		 */
37060854Sdarrenr		for (is = ips_list; is; is = is->is_next)
37160854Sdarrenr			if (is == isn)
37260854Sdarrenr				break;
37360854Sdarrenr		if (!is)
37460854Sdarrenr			return ESRCH;
37560854Sdarrenr	}
37660854Sdarrenr	ips.ips_next = isn->is_next;
37760854Sdarrenr	bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
37860854Sdarrenr	if (isn->is_rule)
37960854Sdarrenr		bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
38060854Sdarrenr		      sizeof(ips.ips_fr));
38160854Sdarrenr	error = IWCOPY((caddr_t)&ips, ipsp, sizeof(ips));
38260854Sdarrenr	if (error)
38364580Sdarrenr		error = EFAULT;
38464580Sdarrenr	return error;
38560854Sdarrenr}
38660854Sdarrenr
38760854Sdarrenr
38860854Sdarrenrint fr_stputent(data)
38960854Sdarrenrcaddr_t data;
39060854Sdarrenr{
39160854Sdarrenr	register ipstate_t *is, *isn;
39260854Sdarrenr	ipstate_save_t ips, *ipsp;
39363523Sdarrenr	int error, out;
39460854Sdarrenr	frentry_t *fr;
39560854Sdarrenr
39660854Sdarrenr	error = IRCOPY(data, (caddr_t)&ipsp, sizeof(ipsp));
39760854Sdarrenr	if (error)
39860854Sdarrenr		return EFAULT;
39960854Sdarrenr	error = IRCOPY((caddr_t)ipsp, (caddr_t)&ips, sizeof(ips));
40060854Sdarrenr	if (error)
40160854Sdarrenr		return EFAULT;
40260854Sdarrenr
40360854Sdarrenr	KMALLOC(isn, ipstate_t *);
40460854Sdarrenr	if (isn == NULL)
40560854Sdarrenr		return ENOMEM;
40660854Sdarrenr
40760854Sdarrenr	bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
40860854Sdarrenr	fr = isn->is_rule;
40960854Sdarrenr	if (fr != NULL) {
41060854Sdarrenr		if (isn->is_flags & FI_NEWFR) {
41160854Sdarrenr			KMALLOC(fr, frentry_t *);
41260854Sdarrenr			if (fr == NULL) {
41360854Sdarrenr				KFREE(isn);
41460854Sdarrenr				return ENOMEM;
41560854Sdarrenr			}
41660854Sdarrenr			bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
41763523Sdarrenr			out = fr->fr_flags & FR_OUTQUE ? 1 : 0;
41860854Sdarrenr			isn->is_rule = fr;
41960854Sdarrenr			ips.ips_is.is_rule = fr;
42063523Sdarrenr			if (*fr->fr_ifname) {
42163523Sdarrenr				fr->fr_ifa = GETUNIT(fr->fr_ifname, fr->fr_v);
42263523Sdarrenr				if (fr->fr_ifa == NULL)
42363523Sdarrenr					fr->fr_ifa = (void *)-1;
42463523Sdarrenr#ifdef	_KERNEL
42563523Sdarrenr				else {
42663523Sdarrenr					strncpy(isn->is_ifname[out],
42763523Sdarrenr						IFNAME(fr->fr_ifa), IFNAMSIZ);
42863523Sdarrenr					isn->is_ifp[out] = fr->fr_ifa;
42963523Sdarrenr				}
43063523Sdarrenr#endif
43163523Sdarrenr			} else
43263523Sdarrenr				fr->fr_ifa = NULL;
43363523Sdarrenr			/*
43463523Sdarrenr			 * send a copy back to userland of what we ended up
43563523Sdarrenr			 * to allow for verification.
43663523Sdarrenr			 */
43760854Sdarrenr			error = IWCOPY((caddr_t)&ips, ipsp, sizeof(ips));
43860854Sdarrenr			if (error) {
43960854Sdarrenr				KFREE(isn);
44060854Sdarrenr				KFREE(fr);
44160854Sdarrenr				return EFAULT;
44260854Sdarrenr			}
44360854Sdarrenr		} else {
44460854Sdarrenr			for (is = ips_list; is; is = is->is_next)
44560854Sdarrenr				if (is->is_rule == fr)
44660854Sdarrenr					break;
44760854Sdarrenr			if (!is) {
44860854Sdarrenr				KFREE(isn);
44960854Sdarrenr				return ESRCH;
45060854Sdarrenr			}
45160854Sdarrenr		}
45260854Sdarrenr	}
45360854Sdarrenr	fr_stinsert(isn);
45460854Sdarrenr	return 0;
45560854Sdarrenr}
45660854Sdarrenr
45760854Sdarrenr
45860854Sdarrenrvoid fr_stinsert(is)
45960854Sdarrenrregister ipstate_t *is;
46060854Sdarrenr{
46160854Sdarrenr	register u_int hv = is->is_hv;
46260854Sdarrenr
46360854Sdarrenr	MUTEX_INIT(&is->is_lock, "ipf state entry", NULL);
46460854Sdarrenr
46560854Sdarrenr	is->is_ifname[0][sizeof(is->is_ifname[0]) - 1] = '\0';
46660854Sdarrenr	if (is->is_ifname[0][0] != '\0') {
46760854Sdarrenr		is->is_ifp[0] = GETUNIT(is->is_ifname[0], is->is_v);
46860854Sdarrenr	}
46960854Sdarrenr	is->is_ifname[1][sizeof(is->is_ifname[0]) - 1] = '\0';
47060854Sdarrenr	if (is->is_ifname[1][0] != '\0') {
47160854Sdarrenr		is->is_ifp[1] = GETUNIT(is->is_ifname[1], is->is_v);
47260854Sdarrenr	}
47360854Sdarrenr
47460854Sdarrenr	/*
47560854Sdarrenr	 * add into list table.
47660854Sdarrenr	 */
47760854Sdarrenr	if (ips_list)
47860854Sdarrenr		ips_list->is_pnext = &is->is_next;
47960854Sdarrenr	is->is_pnext = &ips_list;
48060854Sdarrenr	is->is_next = ips_list;
48160854Sdarrenr	ips_list = is;
48260854Sdarrenr	if (ips_table[hv])
48360854Sdarrenr		ips_table[hv]->is_phnext = &is->is_hnext;
48460854Sdarrenr	else
48560854Sdarrenr		ips_stats.iss_inuse++;
48660854Sdarrenr	is->is_phnext = ips_table + hv;
48760854Sdarrenr	is->is_hnext = ips_table[hv];
48860854Sdarrenr	ips_table[hv] = is;
48964580Sdarrenr	ips_num++;
49060854Sdarrenr}
49160854Sdarrenr
49260854Sdarrenr
49353642Sguido/*
49453642Sguido * Create a new ipstate structure and hang it off the hash table.
49553642Sguido */
49653642Sguidoipstate_t *fr_addstate(ip, fin, flags)
49753642Sguidoip_t *ip;
49853642Sguidofr_info_t *fin;
49953642Sguidou_int flags;
50053642Sguido{
50160854Sdarrenr	register tcphdr_t *tcp = NULL;
50253642Sguido	register ipstate_t *is;
50353642Sguido	register u_int hv;
50453642Sguido	ipstate_t ips;
50553642Sguido	u_int pass;
50660854Sdarrenr	int out;
50753642Sguido
50880482Sdarrenr	if (fr_state_lock || (fin->fin_off != 0) || (fin->fin_fl & FI_SHORT))
50953642Sguido		return NULL;
51053642Sguido	if (ips_num == fr_statemax) {
51153642Sguido		ips_stats.iss_max++;
51253642Sguido		fr_state_doflush = 1;
51353642Sguido		return NULL;
51453642Sguido	}
51560854Sdarrenr	out = fin->fin_out;
51653642Sguido	is = &ips;
51753642Sguido	bzero((char *)is, sizeof(*is));
51853642Sguido	ips.is_age = 1;
51953642Sguido	ips.is_state[0] = 0;
52053642Sguido	ips.is_state[1] = 0;
52153642Sguido	/*
52253642Sguido	 * Copy and calculate...
52353642Sguido	 */
52460854Sdarrenr	hv = (is->is_p = fin->fin_fi.fi_p);
52560854Sdarrenr	is->is_src = fin->fin_fi.fi_src;
52660854Sdarrenr	hv += is->is_saddr;
52760854Sdarrenr	is->is_dst = fin->fin_fi.fi_dst;
52860854Sdarrenr	hv += is->is_daddr;
52960854Sdarrenr#ifdef	USE_INET6
53060854Sdarrenr	if (fin->fin_v == 6) {
53160854Sdarrenr		if (is->is_p == IPPROTO_ICMPV6) {
53260854Sdarrenr			if (IN6_IS_ADDR_MULTICAST(&is->is_dst.in6))
53360854Sdarrenr				flags |= FI_W_DADDR;
53460854Sdarrenr			if (out)
53560854Sdarrenr				hv -= is->is_daddr;
53660854Sdarrenr			else
53760854Sdarrenr				hv -= is->is_saddr;
53860854Sdarrenr		}
53960854Sdarrenr	}
54060854Sdarrenr#endif
54153642Sguido
54260854Sdarrenr	switch (is->is_p)
54353642Sguido	{
54460854Sdarrenr#ifdef	USE_INET6
54560854Sdarrenr	case IPPROTO_ICMPV6 :
54660854Sdarrenr#endif
54753642Sguido	case IPPROTO_ICMP :
54853642Sguido	    {
54953642Sguido		struct icmp *ic = (struct icmp *)fin->fin_dp;
55053642Sguido
55160854Sdarrenr#ifdef	USE_INET6
55260854Sdarrenr		if ((is->is_p == IPPROTO_ICMPV6) &&
55360854Sdarrenr		    ((ic->icmp_type & ICMP6_INFOMSG_MASK) == 0))
55460854Sdarrenr			return NULL;
55560854Sdarrenr#endif
55653642Sguido		switch (ic->icmp_type)
55753642Sguido		{
55860854Sdarrenr#ifdef	USE_INET6
55960854Sdarrenr		case ICMP6_ECHO_REQUEST :
56060854Sdarrenr			is->is_icmp.ics_type = ICMP6_ECHO_REPLY;
56153642Sguido			hv += (is->is_icmp.ics_id = ic->icmp_id);
56253642Sguido			hv += (is->is_icmp.ics_seq = ic->icmp_seq);
56353642Sguido			break;
56460854Sdarrenr		case ICMP6_MEMBERSHIP_QUERY :
56560854Sdarrenr		case ND_ROUTER_SOLICIT :
56660854Sdarrenr		case ND_NEIGHBOR_SOLICIT :
56760854Sdarrenr			is->is_icmp.ics_type = ic->icmp_type + 1;
56860854Sdarrenr			break;
56960854Sdarrenr#endif
57060854Sdarrenr		case ICMP_ECHO :
57153642Sguido		case ICMP_TSTAMP :
57253642Sguido		case ICMP_IREQ :
57353642Sguido		case ICMP_MASKREQ :
57460854Sdarrenr			is->is_icmp.ics_type = ic->icmp_type;
57560854Sdarrenr			hv += (is->is_icmp.ics_id = ic->icmp_id);
57660854Sdarrenr			hv += (is->is_icmp.ics_seq = ic->icmp_seq);
57753642Sguido			break;
57853642Sguido		default :
57953642Sguido			return NULL;
58053642Sguido		}
58160854Sdarrenr		ATOMIC_INCL(ips_stats.iss_icmp);
58253642Sguido		is->is_age = fr_icmptimeout;
58353642Sguido		break;
58453642Sguido	    }
58553642Sguido	case IPPROTO_TCP :
58653642Sguido	    {
58760854Sdarrenr		tcp = (tcphdr_t *)fin->fin_dp;
58853642Sguido
58960854Sdarrenr		if (tcp->th_flags & TH_RST)
59060854Sdarrenr			return NULL;
59153642Sguido		/*
59253642Sguido		 * The endian of the ports doesn't matter, but the ack and
59353642Sguido		 * sequence numbers do as we do mathematics on them later.
59453642Sguido		 */
59553642Sguido		is->is_dport = tcp->th_dport;
59653642Sguido		is->is_sport = tcp->th_sport;
59753642Sguido		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
59853642Sguido			hv += tcp->th_dport;
59953642Sguido			hv += tcp->th_sport;
60053642Sguido		}
60167614Sdarrenr		is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
60267614Sdarrenr			      (tcp->th_off << 2) +
60360854Sdarrenr			      ((tcp->th_flags & TH_SYN) ? 1 : 0) +
60460854Sdarrenr			      ((tcp->th_flags & TH_FIN) ? 1 : 0);
60560854Sdarrenr		is->is_maxsend = is->is_send;
60653642Sguido		is->is_dend = 0;
60760854Sdarrenr		is->is_maxdwin = 1;
60853642Sguido		is->is_maxswin = ntohs(tcp->th_win);
60953642Sguido		if (is->is_maxswin == 0)
61053642Sguido			is->is_maxswin = 1;
61153642Sguido		/*
61253642Sguido		 * If we're creating state for a starting connection, start the
61353642Sguido		 * timer on it as we'll never see an error if it fails to
61453642Sguido		 * connect.
61553642Sguido		 */
61660854Sdarrenr		ATOMIC_INCL(ips_stats.iss_tcp);
61753642Sguido		break;
61853642Sguido	    }
61953642Sguido	case IPPROTO_UDP :
62053642Sguido	    {
62160854Sdarrenr		tcp = (tcphdr_t *)fin->fin_dp;
62253642Sguido
62355929Sguido		is->is_dport = tcp->th_dport;
62455929Sguido		is->is_sport = tcp->th_sport;
62553642Sguido		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
62655929Sguido			hv += tcp->th_dport;
62755929Sguido			hv += tcp->th_sport;
62853642Sguido		}
62960854Sdarrenr		ATOMIC_INCL(ips_stats.iss_udp);
63053642Sguido		is->is_age = fr_udptimeout;
63153642Sguido		break;
63253642Sguido	    }
63353642Sguido	default :
63453642Sguido		return NULL;
63553642Sguido	}
63653642Sguido
63753642Sguido	KMALLOC(is, ipstate_t *);
63853642Sguido	if (is == NULL) {
63960854Sdarrenr		ATOMIC_INCL(ips_stats.iss_nomem);
64053642Sguido		return NULL;
64153642Sguido	}
64253642Sguido	bcopy((char *)&ips, (char *)is, sizeof(*is));
64353642Sguido	hv %= fr_statesize;
64460854Sdarrenr	is->is_hv = hv;
64553642Sguido	is->is_rule = fin->fin_fr;
64653642Sguido	if (is->is_rule != NULL) {
64760854Sdarrenr		ATOMIC_INC32(is->is_rule->fr_ref);
64853642Sguido		pass = is->is_rule->fr_flags;
64953642Sguido	} else
65053642Sguido		pass = fr_flags;
65153642Sguido	WRITE_ENTER(&ipf_state);
65253642Sguido
65353642Sguido	is->is_pass = pass;
65453642Sguido	is->is_pkts = 1;
65560854Sdarrenr	is->is_bytes = fin->fin_dlen + fin->fin_hlen;
65653642Sguido	/*
65753642Sguido	 * We want to check everything that is a property of this packet,
65853642Sguido	 * but we don't (automatically) care about it's fragment status as
65953642Sguido	 * this may change.
66053642Sguido	 */
66160854Sdarrenr	is->is_v = fin->fin_fi.fi_v;
66253642Sguido	is->is_opt = fin->fin_fi.fi_optmsk;
66353642Sguido	is->is_optmsk = 0xffffffff;
66453642Sguido	is->is_sec = fin->fin_fi.fi_secmsk;
66553642Sguido	is->is_secmsk = 0xffff;
66653642Sguido	is->is_auth = fin->fin_fi.fi_auth;
66753642Sguido	is->is_authmsk = 0xffff;
66880482Sdarrenr	is->is_flags = fin->fin_fl & FI_CMP;
66953642Sguido	is->is_flags |= FI_CMP << 4;
67060854Sdarrenr	is->is_flags |= flags & (FI_WILDP|FI_WILDA);
67167614Sdarrenr	if (flags & (FI_WILDP|FI_WILDA))
67267614Sdarrenr		ips_wild++;
67360854Sdarrenr	is->is_ifp[1 - out] = NULL;
67460854Sdarrenr	is->is_ifp[out] = fin->fin_ifp;
67560854Sdarrenr#ifdef	_KERNEL
67660854Sdarrenr	strncpy(is->is_ifname[out], IFNAME(fin->fin_ifp), IFNAMSIZ);
67760854Sdarrenr#endif
67860854Sdarrenr	is->is_ifname[1 - out][0] = '\0';
67953642Sguido	if (pass & FR_LOGFIRST)
68053642Sguido		is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
68160854Sdarrenr	fr_stinsert(is);
68260854Sdarrenr	if (is->is_p == IPPROTO_TCP) {
68360854Sdarrenr		MUTEX_ENTER(&is->is_lock);
68460854Sdarrenr		fr_tcp_age(&is->is_age, is->is_state, fin,
68564580Sdarrenr			   0); /* 0 = packet from the source */
68660854Sdarrenr		MUTEX_EXIT(&is->is_lock);
68760854Sdarrenr	}
68853642Sguido#ifdef	IPFILTER_LOG
68953642Sguido	ipstate_log(is, ISL_NEW);
69053642Sguido#endif
69153642Sguido	RWLOCK_EXIT(&ipf_state);
69260854Sdarrenr	fin->fin_rev = IP6NEQ(is->is_dst, fin->fin_fi.fi_dst);
69375262Sdarrenr	if ((fin->fin_fi.fi_fl & FI_FRAG) && (pass & FR_KEEPFRAG))
69453642Sguido		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
69553642Sguido	return is;
69653642Sguido}
69753642Sguido
69853642Sguido
69953642Sguido
70053642Sguido/*
70153642Sguido * check to see if a packet with TCP headers fits within the TCP window.
70253642Sguido * change timeout depending on whether new packet is a SYN-ACK returning for a
70353642Sguido * SYN or a RST or FIN which indicate time to close up shop.
70453642Sguido */
70553642Sguidoint fr_tcpstate(is, fin, ip, tcp)
70653642Sguidoregister ipstate_t *is;
70753642Sguidofr_info_t *fin;
70853642Sguidoip_t *ip;
70953642Sguidotcphdr_t *tcp;
71053642Sguido{
71153642Sguido	register tcp_seq seq, ack, end;
71253642Sguido	register int ackskew;
71353642Sguido	tcpdata_t  *fdata, *tdata;
71453642Sguido	u_short	win, maxwin;
71553642Sguido	int ret = 0;
71653642Sguido	int source;
71753642Sguido
71853642Sguido	/*
71953642Sguido	 * Find difference between last checked packet and this packet.
72053642Sguido	 */
72160854Sdarrenr	source = IP6EQ(fin->fin_fi.fi_src, is->is_src);
72280482Sdarrenr	if (source && (ntohs(is->is_sport) != fin->fin_data[0]))
72380482Sdarrenr		source = 0;
72453642Sguido	fdata = &is->is_tcp.ts_data[!source];
72553642Sguido	tdata = &is->is_tcp.ts_data[source];
72653642Sguido	seq = ntohl(tcp->th_seq);
72753642Sguido	ack = ntohl(tcp->th_ack);
72853642Sguido	win = ntohs(tcp->th_win);
72960854Sdarrenr	end = seq + fin->fin_dlen - (tcp->th_off << 2) +
73053642Sguido	       ((tcp->th_flags & TH_SYN) ? 1 : 0) +
73157096Sguido	       ((tcp->th_flags & TH_FIN) ? 1 : 0);
73253642Sguido
73367614Sdarrenr	MUTEX_ENTER(&is->is_lock);
73453642Sguido	if (fdata->td_end == 0) {
73553642Sguido		/*
73653642Sguido		 * Must be a (outgoing) SYN-ACK in reply to a SYN.
73753642Sguido		 */
73853642Sguido		fdata->td_end = end;
73953642Sguido		fdata->td_maxwin = 1;
74053642Sguido		fdata->td_maxend = end + 1;
74153642Sguido	}
74253642Sguido
74353642Sguido	if (!(tcp->th_flags & TH_ACK)) {  /* Pretend an ack was sent */
74453642Sguido		ack = tdata->td_end;
74553642Sguido	} else if (((tcp->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
74653642Sguido		   (ack == 0)) {
74753642Sguido		/* gross hack to get around certain broken tcp stacks */
74853642Sguido		ack = tdata->td_end;
74953642Sguido	}
75053642Sguido
75153642Sguido	if (seq == end)
75253642Sguido		seq = end = fdata->td_end;
75353642Sguido
75453642Sguido	maxwin = tdata->td_maxwin;
75553642Sguido	ackskew = tdata->td_end - ack;
75653642Sguido
75753642Sguido#define	SEQ_GE(a,b)	((int)((a) - (b)) >= 0)
75853642Sguido#define	SEQ_GT(a,b)	((int)((a) - (b)) > 0)
75953642Sguido	if ((SEQ_GE(fdata->td_maxend, end)) &&
76057096Sguido	    (SEQ_GE(seq, fdata->td_end - maxwin)) &&
76153642Sguido/* XXX what about big packets */
76253642Sguido#define MAXACKWINDOW 66000
76353642Sguido	    (ackskew >= -MAXACKWINDOW) &&
76453642Sguido	    (ackskew <= MAXACKWINDOW)) {
76553642Sguido		/* if ackskew < 0 then this should be due to fragented
76653642Sguido		 * packets. There is no way to know the length of the
76753642Sguido		 * total packet in advance.
76853642Sguido		 * We do know the total length from the fragment cache though.
76953642Sguido		 * Note however that there might be more sessions with
77053642Sguido		 * exactly the same source and destination paramters in the
77153642Sguido		 * state cache (and source and destination is the only stuff
77253642Sguido		 * that is saved in the fragment cache). Note further that
77353642Sguido		 * some TCP connections in the state cache are hashed with
77453642Sguido		 * sport and dport as well which makes it not worthwhile to
77553642Sguido		 * look for them.
77653642Sguido		 * Thus, when ackskew is negative but still seems to belong
77753642Sguido		 * to this session, we bump up the destinations end value.
77853642Sguido		 */
77953642Sguido		if (ackskew < 0)
78053642Sguido			tdata->td_end = ack;
78153642Sguido
78253642Sguido		/* update max window seen */
78353642Sguido		if (fdata->td_maxwin < win)
78453642Sguido			fdata->td_maxwin = win;
78553642Sguido		if (SEQ_GT(end, fdata->td_end))
78653642Sguido			fdata->td_end = end;
78753642Sguido		if (SEQ_GE(ack + win, tdata->td_maxend)) {
78853642Sguido			tdata->td_maxend = ack + win;
78953642Sguido			if (win == 0)
79053642Sguido				tdata->td_maxend++;
79153642Sguido		}
79253642Sguido
79360854Sdarrenr		ATOMIC_INCL(ips_stats.iss_hits);
79453642Sguido		/*
79553642Sguido		 * Nearing end of connection, start timeout.
79653642Sguido		 */
79764580Sdarrenr		/* source ? 0 : 1 -> !source */
79864580Sdarrenr		fr_tcp_age(&is->is_age, is->is_state, fin, !source);
79953642Sguido		ret = 1;
80053642Sguido	}
80167614Sdarrenr	MUTEX_EXIT(&is->is_lock);
80253642Sguido	return ret;
80353642Sguido}
80453642Sguido
80553642Sguido
80653642Sguidostatic int fr_matchsrcdst(is, src, dst, fin, tcp)
80753642Sguidoipstate_t *is;
80860854Sdarrenrunion i6addr src, dst;
80953642Sguidofr_info_t *fin;
81053642Sguidotcphdr_t *tcp;
81153642Sguido{
81253642Sguido	int ret = 0, rev, out, flags;
81353642Sguido	u_short sp, dp;
81453642Sguido	void *ifp;
81553642Sguido
81680482Sdarrenr	rev = IP6NEQ(is->is_dst, dst);
81753642Sguido	ifp = fin->fin_ifp;
81853642Sguido	out = fin->fin_out;
81953642Sguido
82053642Sguido	if (tcp != NULL) {
82153642Sguido		flags = is->is_flags;
82253642Sguido		sp = tcp->th_sport;
82353642Sguido		dp = tcp->th_dport;
82480482Sdarrenr		if (!rev) {
82580482Sdarrenr			if (!(flags & FI_W_SPORT) && (sp != is->is_sport))
82680482Sdarrenr				rev = 1;
82780482Sdarrenr			else if (!(flags & FI_W_DPORT) && (dp != is->is_dport))
82880482Sdarrenr				rev = 1;
82980482Sdarrenr		}
83053642Sguido	} else {
83160854Sdarrenr		flags = is->is_flags & FI_WILDA;
83253642Sguido		sp = 0;
83353642Sguido		dp = 0;
83453642Sguido	}
83553642Sguido
83653642Sguido	if (rev == 0) {
83753642Sguido		if (!out) {
83860854Sdarrenr			if (is->is_ifpin == NULL || is->is_ifpin == ifp)
83953642Sguido				ret = 1;
84053642Sguido		} else {
84153642Sguido			if (is->is_ifpout == NULL || is->is_ifpout == ifp)
84253642Sguido				ret = 1;
84353642Sguido		}
84453642Sguido	} else {
84553642Sguido		if (out) {
84660854Sdarrenr			if (is->is_ifpin == NULL || is->is_ifpin == ifp)
84753642Sguido				ret = 1;
84853642Sguido		} else {
84953642Sguido			if (is->is_ifpout == NULL || is->is_ifpout == ifp)
85053642Sguido				ret = 1;
85153642Sguido		}
85253642Sguido	}
85353642Sguido	if (ret == 0)
85453642Sguido		return 0;
85553642Sguido	ret = 0;
85653642Sguido
85753642Sguido	if (rev == 0) {
85860854Sdarrenr		if (
85960854Sdarrenr		    (IP6EQ(is->is_dst, dst) || (flags & FI_W_DADDR)) &&
86060854Sdarrenr		    (IP6EQ(is->is_src, src) || (flags & FI_W_SADDR)) &&
86153642Sguido		    (!tcp || ((sp == is->is_sport || flags & FI_W_SPORT) &&
86253642Sguido		     (dp == is->is_dport || flags & FI_W_DPORT)))) {
86353642Sguido			ret = 1;
86453642Sguido		}
86553642Sguido	} else {
86660854Sdarrenr		if (
86760854Sdarrenr		    (IP6EQ(is->is_dst, src) || (flags & FI_W_DADDR)) &&
86860854Sdarrenr		    (IP6EQ(is->is_src, dst) || (flags & FI_W_SADDR)) &&
86953642Sguido		    (!tcp || ((sp == is->is_dport || flags & FI_W_DPORT) &&
87053642Sguido		     (dp == is->is_sport || flags & FI_W_SPORT)))) {
87153642Sguido			ret = 1;
87253642Sguido		}
87353642Sguido	}
87453642Sguido	if (ret == 0)
87553642Sguido		return 0;
87653642Sguido
87753642Sguido	/*
87853642Sguido	 * Whether or not this should be here, is questionable, but the aim
87953642Sguido	 * is to get this out of the main line.
88053642Sguido	 */
88153642Sguido	if (tcp == NULL)
88253642Sguido		flags = is->is_flags & (FI_CMP|(FI_CMP<<4));
88353642Sguido
88480482Sdarrenr	if (((fin->fin_fl & (flags >> 4)) != (flags & FI_CMP)) ||
88580482Sdarrenr	    (fin->fin_fi.fi_optmsk != is->is_opt) ||
88680482Sdarrenr	    (fin->fin_fi.fi_secmsk != is->is_sec) ||
88780482Sdarrenr	    (fin->fin_fi.fi_auth != is->is_auth))
88853642Sguido		return 0;
88953642Sguido
89053642Sguido	if ((flags & (FI_W_SPORT|FI_W_DPORT))) {
89153642Sguido		if ((flags & FI_W_SPORT) != 0) {
89253642Sguido			if (rev == 0) {
89353642Sguido				is->is_sport = sp;
89453642Sguido				is->is_send = htonl(tcp->th_seq);
89553642Sguido			} else {
89653642Sguido				is->is_sport = dp;
89753642Sguido				is->is_send = htonl(tcp->th_ack);
89853642Sguido			}
89953642Sguido			is->is_maxsend = is->is_send + 1;
90053642Sguido		} else if ((flags & FI_W_DPORT) != 0) {
90153642Sguido			if (rev == 0) {
90253642Sguido				is->is_dport = dp;
90353642Sguido				is->is_dend = htonl(tcp->th_ack);
90453642Sguido			} else {
90553642Sguido				is->is_dport = sp;
90653642Sguido				is->is_dend = htonl(tcp->th_seq);
90753642Sguido			}
90853642Sguido			is->is_maxdend = is->is_dend + 1;
90953642Sguido		}
91053642Sguido		is->is_flags &= ~(FI_W_SPORT|FI_W_DPORT);
91167614Sdarrenr		ips_wild--;
91253642Sguido	}
91353642Sguido
91460854Sdarrenr	ret = -1;
91560854Sdarrenr
91653642Sguido	if (!rev) {
91760854Sdarrenr		if (out) {
91853642Sguido			if (!is->is_ifpout)
91960854Sdarrenr				ret = 1;
92053642Sguido		} else {
92153642Sguido			if (!is->is_ifpin)
92260854Sdarrenr				ret = 0;
92353642Sguido		}
92453642Sguido	} else {
92560854Sdarrenr		if (out) {
92653642Sguido			if (!is->is_ifpin)
92760854Sdarrenr				ret = 0;
92853642Sguido		} else {
92953642Sguido			if (!is->is_ifpout)
93060854Sdarrenr				ret = 1;
93153642Sguido		}
93253642Sguido	}
93360854Sdarrenr
93460854Sdarrenr	if (ret >= 0) {
93560854Sdarrenr		is->is_ifp[ret] = ifp;
93660854Sdarrenr#ifdef	_KERNEL
93780482Sdarrenr		strncpy(is->is_ifname[ret], IFNAME(fin->fin_ifp),
93880482Sdarrenr			sizeof(is->is_ifname[ret]));
93960854Sdarrenr#endif
94060854Sdarrenr	}
94180482Sdarrenr	fin->fin_rev = rev;
94253642Sguido	return 1;
94353642Sguido}
94453642Sguido
94560854Sdarrenrstatic int fr_matchicmpqueryreply(v, is, icmp)
94660854Sdarrenrint v;
94760854Sdarrenripstate_t *is;
94860854Sdarrenricmphdr_t *icmp;
94960854Sdarrenr{
95060854Sdarrenr	if (v == 4) {
95160854Sdarrenr		/*
95260854Sdarrenr		 * If we matched its type on the way in, then when going out
95360854Sdarrenr		 * it will still be the same type.
95460854Sdarrenr		 */
95560854Sdarrenr		if (((icmp->icmp_type == is->is_type) ||
95660854Sdarrenr		     (icmpreplytype4[is->is_type] == icmp->icmp_type)) &&
95760854Sdarrenr		    (icmp->icmp_id == is->is_icmp.ics_id) &&
95860854Sdarrenr		    (icmp->icmp_seq == is->is_icmp.ics_seq)) {
95960854Sdarrenr			return 1;
96060854Sdarrenr		};
96160854Sdarrenr	}
96260854Sdarrenr#ifdef	USE_INET6
96360854Sdarrenr	else if (is->is_v == 6) {
96460854Sdarrenr		if ((is->is_type == ICMP6_ECHO_REPLY) &&
96560854Sdarrenr		    (icmp->icmp_type == ICMP6_ECHO_REQUEST) &&
96660854Sdarrenr		    (icmp->icmp_id == is->is_icmp.ics_id) &&
96760854Sdarrenr		    (icmp->icmp_seq == is->is_icmp.ics_seq)) {
96860854Sdarrenr			return 1;
96960854Sdarrenr		};
97060854Sdarrenr	}
97160854Sdarrenr#endif
97260854Sdarrenr	return 0;
97360854Sdarrenr}
97460854Sdarrenr
97560854Sdarrenrstatic frentry_t *fr_checkicmpmatchingstate(ip, fin)
97653642Sguidoip_t *ip;
97753642Sguidofr_info_t *fin;
97853642Sguido{
97953642Sguido	register ipstate_t *is, **isp;
98053642Sguido	register u_short sport, dport;
98153642Sguido	register u_char	pr;
98260854Sdarrenr	union i6addr dst, src;
98353642Sguido	struct icmp *ic;
98455929Sguido	u_short savelen;
98564580Sdarrenr	icmphdr_t *icmp;
98653642Sguido	fr_info_t ofin;
98764580Sdarrenr	int type, len;
98853642Sguido	tcphdr_t *tcp;
98953642Sguido	frentry_t *fr;
99053642Sguido	ip_t *oip;
99155929Sguido	u_int hv;
99253642Sguido
99357096Sguido	/*
99457096Sguido	 * Does it at least have the return (basic) IP header ?
99553642Sguido	 * Only a basic IP header (no options) should be with
99653642Sguido	 * an ICMP error header.
99753642Sguido	 */
99867614Sdarrenr	if (((ip->ip_v != 4) || (ip->ip_hl != 5)) ||
99960854Sdarrenr	    (fin->fin_plen < ICMPERR_MINPKTLEN))
100053642Sguido		return NULL;
100160854Sdarrenr	ic = (struct icmp *)fin->fin_dp;
100253642Sguido	type = ic->icmp_type;
100353642Sguido	/*
100453642Sguido	 * If it's not an error type, then return
100553642Sguido	 */
100653642Sguido	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
100753642Sguido    	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
100853642Sguido    	    (type != ICMP_PARAMPROB))
100953642Sguido		return NULL;
101053642Sguido
101160854Sdarrenr	oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
101260854Sdarrenr	if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((oip->ip_hl - 5) << 2))
101353642Sguido		return NULL;
101455929Sguido
101564580Sdarrenr	/*
101664580Sdarrenr	 * Sanity checks.
101764580Sdarrenr	 */
101864580Sdarrenr	len = fin->fin_dlen - ICMPERR_ICMPHLEN;
101964580Sdarrenr	if ((len <= 0) || ((oip->ip_hl << 2) > len))
102064580Sdarrenr		return NULL;
102164580Sdarrenr
102264580Sdarrenr	/*
102364580Sdarrenr	 * Is the buffer big enough for all of it ?  It's the size of the IP
102464580Sdarrenr	 * header claimed in the encapsulated part which is of concern.  It
102564580Sdarrenr	 * may be too big to be in this buffer but not so big that it's
102664580Sdarrenr	 * outside the ICMP packet, leading to TCP deref's causing problems.
102764580Sdarrenr	 * This is possible because we don't know how big oip_hl is when we
102864580Sdarrenr	 * do the pullup early in fr_check() and thus can't gaurantee it is
102964580Sdarrenr	 * all here now.
103064580Sdarrenr	 */
103164580Sdarrenr#ifdef  _KERNEL
103264580Sdarrenr	{
103364580Sdarrenr	mb_t *m;
103464580Sdarrenr
103564580Sdarrenr# if SOLARIS
103664580Sdarrenr	m = fin->fin_qfm;
103764580Sdarrenr	if ((char *)oip + len > (char *)m->b_wptr)
103864580Sdarrenr		return NULL;
103964580Sdarrenr# else
104064580Sdarrenr	m = *(mb_t **)fin->fin_mp;
104164580Sdarrenr	if ((char *)oip + len > (char *)ip + m->m_len)
104264580Sdarrenr		return NULL;
104364580Sdarrenr# endif
104464580Sdarrenr	}
104564580Sdarrenr#endif
104664580Sdarrenr
104764580Sdarrenr	/*
104864580Sdarrenr	 * in the IPv4 case we must zero the i6addr union otherwise
104964580Sdarrenr	 * the IP6EQ and IP6NEQ macros produce the wrong results because
105064580Sdarrenr	 * of the 'junk' in the unused part of the union
105164580Sdarrenr	 */
105267614Sdarrenr	bzero((char *)&src, sizeof(src));
105367614Sdarrenr	bzero((char *)&dst, sizeof(dst));
105464580Sdarrenr
105555929Sguido	if (oip->ip_p == IPPROTO_ICMP) {
105655929Sguido		icmp = (icmphdr_t *)((char *)oip + (oip->ip_hl << 2));
105755929Sguido
105855929Sguido		/*
105955929Sguido		 * a ICMP error can only be generated as a result of an
106055929Sguido		 * ICMP query, not as the response on an ICMP error
106155929Sguido		 *
106255929Sguido		 * XXX theoretically ICMP_ECHOREP and the other reply's are
106355929Sguido		 * ICMP query's as well, but adding them here seems strange XXX
106455929Sguido		 */
106555929Sguido		 if ((icmp->icmp_type != ICMP_ECHO) &&
106655929Sguido		     (icmp->icmp_type != ICMP_TSTAMP) &&
106755929Sguido		     (icmp->icmp_type != ICMP_IREQ) &&
106857096Sguido		     (icmp->icmp_type != ICMP_MASKREQ))
106955929Sguido		    	return NULL;
107055929Sguido
107157096Sguido		/*
107255929Sguido		 * perform a lookup of the ICMP packet in the state table
107355929Sguido		 */
107455929Sguido		hv = (pr = oip->ip_p);
107560854Sdarrenr		src.in4 = oip->ip_src;
107660854Sdarrenr		hv += src.in4.s_addr;
107760854Sdarrenr		dst.in4 = oip->ip_dst;
107860854Sdarrenr		hv += dst.in4.s_addr;
107960854Sdarrenr		hv += icmp->icmp_id;
108060854Sdarrenr		hv += icmp->icmp_seq;
108155929Sguido		hv %= fr_statesize;
108255929Sguido
108364580Sdarrenr		savelen = oip->ip_len;
108464580Sdarrenr		oip->ip_len = len;
108564580Sdarrenr		ofin.fin_v = 4;
108655929Sguido		fr_makefrip(oip->ip_hl << 2, oip, &ofin);
108764580Sdarrenr		oip->ip_len = savelen;
108855929Sguido		ofin.fin_ifp = fin->fin_ifp;
108955929Sguido		ofin.fin_out = !fin->fin_out;
109055929Sguido		ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
109155929Sguido
109255929Sguido		READ_ENTER(&ipf_state);
109360854Sdarrenr		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext)
109460854Sdarrenr			if ((is->is_p == pr) && (is->is_v == 4) &&
109560854Sdarrenr			    fr_matchsrcdst(is, src, dst, &ofin, NULL) &&
109660854Sdarrenr			    fr_matchicmpqueryreply(is->is_v, is, icmp)) {
109760854Sdarrenr				ips_stats.iss_hits++;
109860854Sdarrenr				is->is_pkts++;
109960854Sdarrenr				is->is_bytes += ip->ip_len;
110060854Sdarrenr				fr = is->is_rule;
110160854Sdarrenr				RWLOCK_EXIT(&ipf_state);
110260854Sdarrenr				return fr;
110355929Sguido			}
110455929Sguido		RWLOCK_EXIT(&ipf_state);
110555929Sguido		return NULL;
110655929Sguido	};
110755929Sguido
110853642Sguido	if ((oip->ip_p != IPPROTO_TCP) && (oip->ip_p != IPPROTO_UDP))
110953642Sguido		return NULL;
111053642Sguido
111153642Sguido	tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
111253642Sguido	dport = tcp->th_dport;
111353642Sguido	sport = tcp->th_sport;
111453642Sguido
111553642Sguido	hv = (pr = oip->ip_p);
111660854Sdarrenr	src.in4 = oip->ip_src;
111760854Sdarrenr	hv += src.in4.s_addr;
111860854Sdarrenr	dst.in4 = oip->ip_dst;
111960854Sdarrenr	hv += dst.in4.s_addr;
112053642Sguido	hv += dport;
112153642Sguido	hv += sport;
112253642Sguido	hv %= fr_statesize;
112353642Sguido	/*
112453642Sguido	 * we make an fin entry to be able to feed it to
112553642Sguido	 * matchsrcdst note that not all fields are encessary
112653642Sguido	 * but this is the cleanest way. Note further we fill
112753642Sguido	 * in fin_mp such that if someone uses it we'll get
112853642Sguido	 * a kernel panic. fr_matchsrcdst does not use this.
112953642Sguido	 *
113053642Sguido	 * watch out here, as ip is in host order and oip in network
113153642Sguido	 * order. Any change we make must be undone afterwards.
113253642Sguido	 */
113355929Sguido	savelen = oip->ip_len;
113464580Sdarrenr	oip->ip_len = len;
113564580Sdarrenr	ofin.fin_v = 4;
113653642Sguido	fr_makefrip(oip->ip_hl << 2, oip, &ofin);
113755929Sguido	oip->ip_len = savelen;
113853642Sguido	ofin.fin_ifp = fin->fin_ifp;
113953642Sguido	ofin.fin_out = !fin->fin_out;
114053642Sguido	ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
114153642Sguido	READ_ENTER(&ipf_state);
114260854Sdarrenr	for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
114353642Sguido		/*
114453642Sguido		 * Only allow this icmp though if the
114553642Sguido		 * encapsulated packet was allowed through the
114653642Sguido		 * other way around. Note that the minimal amount
114753642Sguido		 * of info present does not allow for checking against
114853642Sguido		 * tcp internals such as seq and ack numbers.
114953642Sguido		 */
115060854Sdarrenr		if ((is->is_p == pr) && (is->is_v == 4) &&
115153642Sguido		    fr_matchsrcdst(is, src, dst, &ofin, tcp)) {
115253642Sguido			fr = is->is_rule;
115353642Sguido			ips_stats.iss_hits++;
115453642Sguido			is->is_pkts++;
115560854Sdarrenr			is->is_bytes += fin->fin_plen;
115653642Sguido			/*
115753642Sguido			 * we deliberately do not touch the timeouts
115853642Sguido			 * for the accompanying state table entry.
115953642Sguido			 * It remains to be seen if that is correct. XXX
116053642Sguido			 */
116153642Sguido			RWLOCK_EXIT(&ipf_state);
116253642Sguido			return fr;
116353642Sguido		}
116453642Sguido	}
116553642Sguido	RWLOCK_EXIT(&ipf_state);
116653642Sguido	return NULL;
116753642Sguido}
116853642Sguido
116967614Sdarrenr
117067614Sdarrenrstatic void fr_ipsmove(isp, is, hv)
117167614Sdarrenripstate_t **isp, *is;
117267614Sdarrenru_int hv;
117367614Sdarrenr{
117467614Sdarrenr	u_int hvm;
117567614Sdarrenr
117667614Sdarrenr	hvm = is->is_hv;
117767614Sdarrenr	/*
117867614Sdarrenr	 * Remove the hash from the old location...
117967614Sdarrenr	 */
118067614Sdarrenr	if (is->is_hnext)
118167614Sdarrenr		is->is_hnext->is_phnext = isp;
118267614Sdarrenr	*isp = is->is_hnext;
118367614Sdarrenr	if (ips_table[hvm] == NULL)
118467614Sdarrenr		ips_stats.iss_inuse--;
118567614Sdarrenr
118667614Sdarrenr	/*
118767614Sdarrenr	 * ...and put the hash in the new one.
118867614Sdarrenr	 */
118967614Sdarrenr	hvm = hv % fr_statesize;
119067853Sdarrenr	is->is_hv = hvm;
119167614Sdarrenr	isp = &ips_table[hvm];
119267614Sdarrenr	if (*isp)
119367614Sdarrenr		(*isp)->is_phnext = &is->is_hnext;
119467614Sdarrenr	else
119567614Sdarrenr		ips_stats.iss_inuse++;
119667614Sdarrenr	is->is_phnext = isp;
119767614Sdarrenr	is->is_hnext = *isp;
119867614Sdarrenr	*isp = is;
119967614Sdarrenr}
120067614Sdarrenr
120167614Sdarrenr
120253642Sguido/*
120353642Sguido * Check if a packet has a registered state.
120453642Sguido */
120553642Sguidofrentry_t *fr_checkstate(ip, fin)
120653642Sguidoip_t *ip;
120753642Sguidofr_info_t *fin;
120853642Sguido{
120960854Sdarrenr	union i6addr dst, src;
121053642Sguido	register ipstate_t *is, **isp;
121153642Sguido	register u_char pr;
121260854Sdarrenr	u_int hv, hvm, hlen, tryagain, pass, v;
121353642Sguido	struct icmp *ic;
121453642Sguido	frentry_t *fr;
121553642Sguido	tcphdr_t *tcp;
121653642Sguido
121780482Sdarrenr	if (fr_state_lock || (fin->fin_off != 0) || (fin->fin_fl & FI_SHORT))
121853642Sguido		return NULL;
121953642Sguido
122053642Sguido	is = NULL;
122153642Sguido	hlen = fin->fin_hlen;
122253642Sguido	tcp = (tcphdr_t *)((char *)ip + hlen);
122353642Sguido	ic = (struct icmp *)tcp;
122460854Sdarrenr	hv = (pr = fin->fin_fi.fi_p);
122560854Sdarrenr	src = fin->fin_fi.fi_src;
122660854Sdarrenr	dst = fin->fin_fi.fi_dst;
122760854Sdarrenr	hv += src.in4.s_addr;
122860854Sdarrenr	hv += dst.in4.s_addr;
122953642Sguido
123053642Sguido	/*
123153642Sguido	 * Search the hash table for matching packet header info.
123253642Sguido	 */
123360854Sdarrenr	v = fin->fin_fi.fi_v;
123460854Sdarrenr	switch (fin->fin_fi.fi_p)
123553642Sguido	{
123660854Sdarrenr#ifdef	USE_INET6
123760854Sdarrenr	case IPPROTO_ICMPV6 :
123860854Sdarrenr		if (v == 6) {
123960854Sdarrenr			if (fin->fin_out)
124060854Sdarrenr				hv -= dst.in4.s_addr;
124160854Sdarrenr			else
124260854Sdarrenr				hv -= src.in4.s_addr;
124360854Sdarrenr			if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
124460854Sdarrenr			    (ic->icmp_type == ICMP6_ECHO_REPLY)) {
124560854Sdarrenr				hv += ic->icmp_id;
124660854Sdarrenr				hv += ic->icmp_seq;
124760854Sdarrenr			}
124860854Sdarrenr		}
124960854Sdarrenr#endif
125053642Sguido	case IPPROTO_ICMP :
125160854Sdarrenr		if (v == 4) {
125255929Sguido			hv += ic->icmp_id;
125355929Sguido			hv += ic->icmp_seq;
125455929Sguido		}
125553642Sguido		hv %= fr_statesize;
125653642Sguido		READ_ENTER(&ipf_state);
125760854Sdarrenr		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
125860854Sdarrenr			if ((is->is_p == pr) && (is->is_v == v) &&
125960854Sdarrenr			    fr_matchsrcdst(is, src, dst, fin, NULL) &&
126060854Sdarrenr			    fr_matchicmpqueryreply(v, is, ic)) {
126180482Sdarrenr				if (fin->fin_rev)
126280482Sdarrenr					is->is_age = fr_icmpacktimeout;
126380482Sdarrenr				else
126480482Sdarrenr					is->is_age = fr_icmptimeout;
126553642Sguido				break;
126653642Sguido			}
126760854Sdarrenr		}
126853642Sguido		if (is != NULL)
126953642Sguido			break;
127053642Sguido		RWLOCK_EXIT(&ipf_state);
127153642Sguido		/*
127253642Sguido		 * No matching icmp state entry. Perhaps this is a
127353642Sguido		 * response to another state entry.
127453642Sguido		 */
127560854Sdarrenr#ifdef	USE_INET6
127660854Sdarrenr		if (v == 6)
127760854Sdarrenr			fr = fr_checkicmp6matchingstate((ip6_t *)ip, fin);
127860854Sdarrenr		else
127960854Sdarrenr#endif
128060854Sdarrenr			fr = fr_checkicmpmatchingstate(ip, fin);
128153642Sguido		if (fr)
128253642Sguido			return fr;
128353642Sguido		break;
128453642Sguido	case IPPROTO_TCP :
128553642Sguido	    {
128667614Sdarrenr		register u_short dport, sport;
128764580Sdarrenr		register int i;
128853642Sguido
128964580Sdarrenr		i = tcp->th_flags;
129064580Sdarrenr		/*
129164580Sdarrenr		 * Just plain ignore RST flag set with either FIN or SYN.
129264580Sdarrenr		 */
129364580Sdarrenr		if ((i & TH_RST) &&
129464580Sdarrenr		    ((i & (TH_FIN|TH_SYN|TH_RST)) != TH_RST))
129564580Sdarrenr			break;
129667614Sdarrenr	case IPPROTO_UDP :
129767614Sdarrenr		dport = tcp->th_dport;
129867614Sdarrenr		sport = tcp->th_sport;
129953642Sguido		tryagain = 0;
130053642Sguido		hv += dport;
130153642Sguido		hv += sport;
130267614Sdarrenr		READ_ENTER(&ipf_state);
130367614Sdarrenrretry_tcpudp:
130453642Sguido		hvm = hv % fr_statesize;
130567614Sdarrenr		for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext)
130660854Sdarrenr			if ((is->is_p == pr) && (is->is_v == v) &&
130753642Sguido			    fr_matchsrcdst(is, src, dst, fin, tcp)) {
130867614Sdarrenr				if ((pr == IPPROTO_TCP)) {
130967614Sdarrenr					if (!fr_tcpstate(is, fin, ip, tcp)) {
131067614Sdarrenr						continue;
131167614Sdarrenr					}
131280482Sdarrenr				} else if ((pr == IPPROTO_UDP)) {
131380482Sdarrenr					if (fin->fin_rev)
131480482Sdarrenr						is->is_age = fr_udpacktimeout;
131580482Sdarrenr					else
131680482Sdarrenr						is->is_age = fr_udptimeout;
131767614Sdarrenr				}
131853642Sguido				break;
131953642Sguido			}
132067614Sdarrenr		if (is != NULL) {
132167614Sdarrenr			if (tryagain &&
132267614Sdarrenr			    !(is->is_flags & (FI_WILDP|FI_WILDA))) {
132367614Sdarrenr				hv += dport;
132467614Sdarrenr				hv += sport;
132567614Sdarrenr				fr_ipsmove(isp, is, hv);
132667614Sdarrenr				MUTEX_DOWNGRADE(&ipf_state);
132767614Sdarrenr			}
132853642Sguido			break;
132967614Sdarrenr		}
133053642Sguido		RWLOCK_EXIT(&ipf_state);
133167614Sdarrenr		if (!tryagain && ips_wild) {
133267614Sdarrenr			hv -= dport;
133367614Sdarrenr			hv -= sport;
133453642Sguido			tryagain = 1;
133567614Sdarrenr			WRITE_ENTER(&ipf_state);
133667614Sdarrenr			goto retry_tcpudp;
133753642Sguido		}
133853642Sguido		break;
133953642Sguido	    }
134053642Sguido	default :
134153642Sguido		break;
134253642Sguido	}
134353642Sguido	if (is == NULL) {
134460854Sdarrenr		ATOMIC_INCL(ips_stats.iss_miss);
134553642Sguido		return NULL;
134653642Sguido	}
134760854Sdarrenr	MUTEX_ENTER(&is->is_lock);
134860854Sdarrenr	is->is_bytes += fin->fin_plen;
134953642Sguido	ips_stats.iss_hits++;
135053642Sguido	is->is_pkts++;
135160854Sdarrenr	MUTEX_EXIT(&is->is_lock);
135253642Sguido	fr = is->is_rule;
135353642Sguido	fin->fin_fr = fr;
135453642Sguido	pass = is->is_pass;
135560854Sdarrenr#ifndef	_KERNEL
135660854Sdarrenr	if (tcp->th_flags & TCP_CLOSE)
135760854Sdarrenr		fr_delstate(is);
135860854Sdarrenr#endif
135953642Sguido	RWLOCK_EXIT(&ipf_state);
136075262Sdarrenr	if ((fin->fin_fi.fi_fl & FI_FRAG) && (pass & FR_KEEPFRAG))
136153642Sguido		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
136253642Sguido	return fr;
136353642Sguido}
136453642Sguido
136553642Sguido
136660854Sdarrenrvoid ip_statesync(ifp)
136760854Sdarrenrvoid *ifp;
136860854Sdarrenr{
136960854Sdarrenr	register ipstate_t *is;
137060854Sdarrenr
137160854Sdarrenr	WRITE_ENTER(&ipf_state);
137260854Sdarrenr	for (is = ips_list; is; is = is->is_next) {
137360854Sdarrenr		if (is->is_ifpin == ifp) {
137460854Sdarrenr			is->is_ifpin = GETUNIT(is->is_ifname[0], is->is_v);
137560854Sdarrenr			if (!is->is_ifpin)
137660854Sdarrenr				is->is_ifpin = (void *)-1;
137760854Sdarrenr		}
137860854Sdarrenr		if (is->is_ifpout == ifp) {
137960854Sdarrenr			is->is_ifpout = GETUNIT(is->is_ifname[1], is->is_v);
138060854Sdarrenr			if (!is->is_ifpout)
138160854Sdarrenr				is->is_ifpout = (void *)-1;
138260854Sdarrenr		}
138360854Sdarrenr	}
138460854Sdarrenr	RWLOCK_EXIT(&ipf_state);
138560854Sdarrenr}
138660854Sdarrenr
138760854Sdarrenr
138872006Sdarrenr/*
138972006Sdarrenr * Must always be called with fr_ipfstate held as a write lock.
139072006Sdarrenr */
139153642Sguidostatic void fr_delstate(is)
139253642Sguidoipstate_t *is;
139353642Sguido{
139453642Sguido	frentry_t *fr;
139553642Sguido
139667614Sdarrenr	if (is->is_flags & (FI_WILDP|FI_WILDA))
139767614Sdarrenr		ips_wild--;
139860854Sdarrenr	if (is->is_next)
139960854Sdarrenr		is->is_next->is_pnext = is->is_pnext;
140060854Sdarrenr	*is->is_pnext = is->is_next;
140160854Sdarrenr	if (is->is_hnext)
140260854Sdarrenr		is->is_hnext->is_phnext = is->is_phnext;
140360854Sdarrenr	*is->is_phnext = is->is_hnext;
140460854Sdarrenr	if (ips_table[is->is_hv] == NULL)
140560854Sdarrenr		ips_stats.iss_inuse--;
140660854Sdarrenr
140753642Sguido	fr = is->is_rule;
140853642Sguido	if (fr != NULL) {
140972006Sdarrenr		fr->fr_ref--;
141072006Sdarrenr		if (fr->fr_ref == 0) {
141153642Sguido			KFREE(fr);
141272006Sdarrenr		}
141353642Sguido	}
141460854Sdarrenr#ifdef	_KERNEL
141560854Sdarrenr	MUTEX_DESTROY(&is->is_lock);
141660854Sdarrenr#endif
141753642Sguido	KFREE(is);
141860854Sdarrenr	ips_num--;
141953642Sguido}
142053642Sguido
142153642Sguido
142253642Sguido/*
142353642Sguido * Free memory in use by all state info. kept.
142453642Sguido */
142553642Sguidovoid fr_stateunload()
142653642Sguido{
142760854Sdarrenr	register ipstate_t *is;
142853642Sguido
142953642Sguido	WRITE_ENTER(&ipf_state);
143060854Sdarrenr	while ((is = ips_list))
143160854Sdarrenr		fr_delstate(is);
143253642Sguido	ips_stats.iss_inuse = 0;
143353642Sguido	ips_num = 0;
143453642Sguido	RWLOCK_EXIT(&ipf_state);
143580482Sdarrenr	if (ips_table)
143680482Sdarrenr		KFREES(ips_table, fr_statesize * sizeof(ipstate_t *));
143753642Sguido	ips_table = NULL;
143853642Sguido}
143953642Sguido
144053642Sguido
144153642Sguido/*
144253642Sguido * Slowly expire held state for thingslike UDP and ICMP.  Timeouts are set
144353642Sguido * in expectation of this being called twice per second.
144453642Sguido */
144553642Sguidovoid fr_timeoutstate()
144653642Sguido{
144753642Sguido	register ipstate_t *is, **isp;
144853642Sguido#if defined(_KERNEL) && !SOLARIS
144953642Sguido	int s;
145053642Sguido#endif
145153642Sguido
145253642Sguido	SPL_NET(s);
145353642Sguido	WRITE_ENTER(&ipf_state);
145460854Sdarrenr	for (isp = &ips_list; (is = *isp); )
145560854Sdarrenr		if (is->is_age && !--is->is_age) {
145660854Sdarrenr			if (is->is_p == IPPROTO_TCP)
145760854Sdarrenr				ips_stats.iss_fin++;
145860854Sdarrenr			else
145960854Sdarrenr				ips_stats.iss_expire++;
146053642Sguido#ifdef	IPFILTER_LOG
146160854Sdarrenr			ipstate_log(is, ISL_EXPIRE);
146253642Sguido#endif
146360854Sdarrenr			fr_delstate(is);
146460854Sdarrenr		} else
146560854Sdarrenr			isp = &is->is_next;
146655929Sguido	if (fr_state_doflush) {
146755929Sguido		(void) fr_state_flush(1);
146855929Sguido		fr_state_doflush = 0;
146955929Sguido	}
147072006Sdarrenr	RWLOCK_EXIT(&ipf_state);
147172006Sdarrenr	SPL_X(s);
147253642Sguido}
147353642Sguido
147453642Sguido
147553642Sguido/*
147653642Sguido * Original idea freom Pradeep Krishnan for use primarily with NAT code.
147753642Sguido * (pkrishna@netcom.com)
147864580Sdarrenr *
147964580Sdarrenr * Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29:
148064580Sdarrenr *
148164580Sdarrenr * - (try to) base state transitions on real evidence only,
148264580Sdarrenr *   i.e. packets that are sent and have been received by ipfilter;
148364580Sdarrenr *   diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used.
148464580Sdarrenr *
148564580Sdarrenr * - deal with half-closed connections correctly;
148664580Sdarrenr *
148764580Sdarrenr * - store the state of the source in state[0] such that ipfstat
148864580Sdarrenr *   displays the state as source/dest instead of dest/source; the calls
148964580Sdarrenr *   to fr_tcp_age have been changed accordingly.
149064580Sdarrenr *
149164580Sdarrenr * Parameters:
149264580Sdarrenr *
149364580Sdarrenr *    state[0] = state of source (host that initiated connection)
149464580Sdarrenr *    state[1] = state of dest   (host that accepted the connection)
149564580Sdarrenr *
149664580Sdarrenr *    dir == 0 : a packet from source to dest
149764580Sdarrenr *    dir == 1 : a packet from dest to source
149864580Sdarrenr *
149953642Sguido */
150060854Sdarrenrvoid fr_tcp_age(age, state, fin, dir)
150153642Sguidou_long *age;
150253642Sguidou_char *state;
150353642Sguidofr_info_t *fin;
150453642Sguidoint dir;
150553642Sguido{
150653642Sguido	tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
150753642Sguido	u_char flags = tcp->th_flags;
150853642Sguido	int dlen, ostate;
150953642Sguido
151053642Sguido	ostate = state[1 - dir];
151153642Sguido
151260854Sdarrenr	dlen = fin->fin_plen - fin->fin_hlen - (tcp->th_off << 2);
151353642Sguido
151453642Sguido	if (flags & TH_RST) {
151553642Sguido		if (!(tcp->th_flags & TH_PUSH) && !dlen) {
151653642Sguido			*age = fr_tcpclosed;
151753642Sguido			state[dir] = TCPS_CLOSED;
151853642Sguido		} else {
151953642Sguido			*age = fr_tcpclosewait;
152053642Sguido			state[dir] = TCPS_CLOSE_WAIT;
152153642Sguido		}
152253642Sguido		return;
152353642Sguido	}
152453642Sguido
152564580Sdarrenr	*age = fr_tcptimeout; /* default 4 mins */
152653642Sguido
152753642Sguido	switch(state[dir])
152853642Sguido	{
152964580Sdarrenr	case TCPS_CLOSED: /* 0 */
153064580Sdarrenr		if ((flags & TH_OPENING) == TH_OPENING) {
153164580Sdarrenr			/*
153264580Sdarrenr			 * 'dir' received an S and sends SA in response,
153364580Sdarrenr			 * CLOSED -> SYN_RECEIVED
153464580Sdarrenr			 */
153564580Sdarrenr			state[dir] = TCPS_SYN_RECEIVED;
153664580Sdarrenr			*age = fr_tcptimeout;
153764580Sdarrenr		} else if ((flags & (TH_SYN|TH_ACK)) == TH_SYN) {
153864580Sdarrenr			/* 'dir' sent S, CLOSED -> SYN_SENT */
153964580Sdarrenr			state[dir] = TCPS_SYN_SENT;
154064580Sdarrenr			*age = fr_tcptimeout;
154164580Sdarrenr		}
154264580Sdarrenr		/*
154364580Sdarrenr		 * The next piece of code makes it possible to get
154464580Sdarrenr		 * already established connections into the state table
154564580Sdarrenr		 * after a restart or reload of the filter rules; this
154664580Sdarrenr		 * does not work when a strict 'flags S keep state' is
154764580Sdarrenr		 * used for tcp connections of course
154864580Sdarrenr		 */
154953642Sguido		if ((flags & (TH_FIN|TH_SYN|TH_RST|TH_ACK)) == TH_ACK) {
155064580Sdarrenr			/* we saw an A, guess 'dir' is in ESTABLISHED mode */
155153642Sguido			state[dir] = TCPS_ESTABLISHED;
155253642Sguido			*age = fr_tcpidletimeout;
155353642Sguido		}
155464580Sdarrenr		/*
155564580Sdarrenr		 * TODO: besides regular ACK packets we can have other
155664580Sdarrenr		 * packets as well; it is yet to be determined how we
155764580Sdarrenr		 * should initialize the states in those cases
155864580Sdarrenr		 */
155964580Sdarrenr		break;
156064580Sdarrenr
156164580Sdarrenr	case TCPS_LISTEN: /* 1 */
156264580Sdarrenr		/* NOT USED */
156364580Sdarrenr		break;
156464580Sdarrenr
156564580Sdarrenr	case TCPS_SYN_SENT: /* 2 */
156664580Sdarrenr		if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
156764580Sdarrenr			/*
156864580Sdarrenr			 * We see an A from 'dir' which is in SYN_SENT
156964580Sdarrenr			 * state: 'dir' sent an A in response to an SA
157064580Sdarrenr			 * which it received, SYN_SENT -> ESTABLISHED
157164580Sdarrenr			 */
157264580Sdarrenr			state[dir] = TCPS_ESTABLISHED;
157364580Sdarrenr			*age = fr_tcpidletimeout;
157464580Sdarrenr		} else if (flags & TH_FIN) {
157564580Sdarrenr			/*
157664580Sdarrenr			 * We see an F from 'dir' which is in SYN_SENT
157764580Sdarrenr			 * state and wants to close its side of the
157864580Sdarrenr			 * connection; SYN_SENT -> FIN_WAIT_1
157964580Sdarrenr			 */
158064580Sdarrenr			state[dir] = TCPS_FIN_WAIT_1;
158164580Sdarrenr			*age = fr_tcpidletimeout; /* or fr_tcptimeout? */
158264580Sdarrenr		} else if ((flags & TH_OPENING) == TH_OPENING) {
158364580Sdarrenr			/*
158464580Sdarrenr			 * We see an SA from 'dir' which is already in
158564580Sdarrenr			 * SYN_SENT state, this means we have a
158664580Sdarrenr			 * simultaneous open; SYN_SENT -> SYN_RECEIVED
158764580Sdarrenr			 */
158853642Sguido			state[dir] = TCPS_SYN_RECEIVED;
158964580Sdarrenr			*age = fr_tcptimeout;
159064580Sdarrenr		}
159153642Sguido		break;
159264580Sdarrenr
159364580Sdarrenr	case TCPS_SYN_RECEIVED: /* 3 */
159464580Sdarrenr		if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
159564580Sdarrenr			/*
159664580Sdarrenr			 * We see an A from 'dir' which was in SYN_RECEIVED
159764580Sdarrenr			 * state so it must now be in established state,
159864580Sdarrenr			 * SYN_RECEIVED -> ESTABLISHED
159964580Sdarrenr			 */
160053642Sguido			state[dir] = TCPS_ESTABLISHED;
160153642Sguido			*age = fr_tcpidletimeout;
160264580Sdarrenr		} else if (flags & TH_FIN) {
160364580Sdarrenr			/*
160464580Sdarrenr			 * We see an F from 'dir' which is in SYN_RECEIVED
160564580Sdarrenr			 * state and wants to close its side of the connection;
160664580Sdarrenr			 * SYN_RECEIVED -> FIN_WAIT_1
160764580Sdarrenr			 */
160864580Sdarrenr			state[dir] = TCPS_FIN_WAIT_1;
160967614Sdarrenr			*age = fr_tcpidletimeout;
161053642Sguido		}
161153642Sguido		break;
161264580Sdarrenr
161364580Sdarrenr	case TCPS_ESTABLISHED: /* 4 */
161453642Sguido		if (flags & TH_FIN) {
161564580Sdarrenr			/*
161664580Sdarrenr			 * 'dir' closed its side of the connection; this
161764580Sdarrenr			 * gives us a half-closed connection;
161864580Sdarrenr			 * ESTABLISHED -> FIN_WAIT_1
161964580Sdarrenr			 */
162064580Sdarrenr			state[dir] = TCPS_FIN_WAIT_1;
162167614Sdarrenr			*age = fr_tcphalfclosed;
162264580Sdarrenr		} else if (flags & TH_ACK) {
162364580Sdarrenr			/* an ACK, should we exclude other flags here? */
162464580Sdarrenr			if (ostate == TCPS_FIN_WAIT_1) {
162564580Sdarrenr				/*
162664580Sdarrenr				 * We know the other side did an active close,
162764580Sdarrenr				 * so we are ACKing the recvd FIN packet (does
162864580Sdarrenr				 * the window matching code guarantee this?)
162964580Sdarrenr				 * and go into CLOSE_WAIT state; this gives us
163064580Sdarrenr				 * a half-closed connection
163164580Sdarrenr				 */
163264580Sdarrenr				state[dir] = TCPS_CLOSE_WAIT;
163367614Sdarrenr				*age = fr_tcphalfclosed;
163464580Sdarrenr			} else if (ostate < TCPS_CLOSE_WAIT)
163564580Sdarrenr				/*
163664580Sdarrenr				 * Still a fully established connection,
163764580Sdarrenr				 * reset timeout
163864580Sdarrenr				 */
163964580Sdarrenr				*age = fr_tcpidletimeout;
164053642Sguido		}
164153642Sguido		break;
164264580Sdarrenr
164364580Sdarrenr	case TCPS_CLOSE_WAIT: /* 5 */
164464580Sdarrenr		if (flags & TH_FIN) {
164564580Sdarrenr			/*
164664580Sdarrenr			 * Application closed and 'dir' sent a FIN, we're now
164764580Sdarrenr			 * going into LAST_ACK state
164864580Sdarrenr			 */
164953642Sguido			*age  = fr_tcplastack;
165053642Sguido			state[dir] = TCPS_LAST_ACK;
165164580Sdarrenr		} else {
165264580Sdarrenr			/*
165364580Sdarrenr			 * We remain in CLOSE_WAIT because the other side has
165464580Sdarrenr			 * closed already and we did not close our side yet;
165564580Sdarrenr			 * reset timeout
165664580Sdarrenr			 */
165767614Sdarrenr			*age  = fr_tcphalfclosed;
165864580Sdarrenr		}
165964580Sdarrenr		break;
166064580Sdarrenr
166164580Sdarrenr	case TCPS_FIN_WAIT_1: /* 6 */
166264580Sdarrenr		if ((flags & TH_ACK) && ostate > TCPS_CLOSE_WAIT) {
166364580Sdarrenr			/*
166464580Sdarrenr			 * If the other side is not active anymore it has sent
166564580Sdarrenr			 * us a FIN packet that we are ack'ing now with an ACK;
166664580Sdarrenr			 * this means both sides have now closed the connection
166764580Sdarrenr			 * and we go into TIME_WAIT
166864580Sdarrenr			 */
166964580Sdarrenr			/*
167064580Sdarrenr			 * XXX: how do we know we really are ACKing the FIN
167164580Sdarrenr			 * packet here? does the window code guarantee that?
167264580Sdarrenr			 */
167364580Sdarrenr			state[dir] = TCPS_TIME_WAIT;
167464580Sdarrenr			*age = fr_tcptimeout;
167553642Sguido		} else
167664580Sdarrenr			/*
167764580Sdarrenr			 * We closed our side of the connection already but the
167864580Sdarrenr			 * other side is still active (ESTABLISHED/CLOSE_WAIT);
167964580Sdarrenr			 * continue with this half-closed connection
168064580Sdarrenr			 */
168167614Sdarrenr			*age = fr_tcphalfclosed;
168253642Sguido		break;
168364580Sdarrenr
168464580Sdarrenr	case TCPS_CLOSING: /* 7 */
168564580Sdarrenr		/* NOT USED */
168664580Sdarrenr		break;
168764580Sdarrenr
168864580Sdarrenr	case TCPS_LAST_ACK: /* 8 */
168953642Sguido		if (flags & TH_ACK) {
169064580Sdarrenr			if ((flags & TH_PUSH) || dlen)
169164580Sdarrenr				/*
169264580Sdarrenr				 * There is still data to be delivered, reset
169364580Sdarrenr				 * timeout
169464580Sdarrenr				 */
169553642Sguido				*age  = fr_tcplastack;
169653642Sguido		}
169764580Sdarrenr		/*
169864580Sdarrenr		 * We cannot detect when we go out of LAST_ACK state to CLOSED
169964580Sdarrenr		 * because that is based on the reception of ACK packets;
170064580Sdarrenr		 * ipfilter can only detect that a packet has been sent by a
170164580Sdarrenr		 * host
170264580Sdarrenr		 */
170353642Sguido		break;
170464580Sdarrenr
170564580Sdarrenr	case TCPS_FIN_WAIT_2: /* 9 */
170664580Sdarrenr		/* NOT USED */
170764580Sdarrenr		break;
170864580Sdarrenr
170964580Sdarrenr	case TCPS_TIME_WAIT: /* 10 */
171064580Sdarrenr		/* we're in 2MSL timeout now */
171164580Sdarrenr		break;
171253642Sguido	}
171353642Sguido}
171453642Sguido
171553642Sguido
171653642Sguido#ifdef	IPFILTER_LOG
171753642Sguidovoid ipstate_log(is, type)
171853642Sguidostruct ipstate *is;
171953642Sguidou_int type;
172053642Sguido{
172153642Sguido	struct	ipslog	ipsl;
172253642Sguido	void *items[1];
172353642Sguido	size_t sizes[1];
172453642Sguido	int types[1];
172553642Sguido
172653642Sguido	ipsl.isl_type = type;
172753642Sguido	ipsl.isl_pkts = is->is_pkts;
172853642Sguido	ipsl.isl_bytes = is->is_bytes;
172953642Sguido	ipsl.isl_src = is->is_src;
173053642Sguido	ipsl.isl_dst = is->is_dst;
173153642Sguido	ipsl.isl_p = is->is_p;
173260854Sdarrenr	ipsl.isl_v = is->is_v;
173353642Sguido	ipsl.isl_flags = is->is_flags;
173453642Sguido	if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
173553642Sguido		ipsl.isl_sport = is->is_sport;
173653642Sguido		ipsl.isl_dport = is->is_dport;
173753642Sguido		if (ipsl.isl_p == IPPROTO_TCP) {
173853642Sguido			ipsl.isl_state[0] = is->is_state[0];
173953642Sguido			ipsl.isl_state[1] = is->is_state[1];
174053642Sguido		}
174153642Sguido	} else if (ipsl.isl_p == IPPROTO_ICMP)
174253642Sguido		ipsl.isl_itype = is->is_icmp.ics_type;
174353642Sguido	else {
174453642Sguido		ipsl.isl_ps.isl_filler[0] = 0;
174553642Sguido		ipsl.isl_ps.isl_filler[1] = 0;
174653642Sguido	}
174753642Sguido	items[0] = &ipsl;
174853642Sguido	sizes[0] = sizeof(ipsl);
174953642Sguido	types[0] = 0;
175053642Sguido
175153642Sguido	(void) ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1);
175253642Sguido}
175353642Sguido#endif
175457096Sguido
175557096Sguido
175660854Sdarrenr#ifdef	USE_INET6
175760854Sdarrenrfrentry_t *fr_checkicmp6matchingstate(ip, fin)
175860854Sdarrenrip6_t *ip;
175960854Sdarrenrfr_info_t *fin;
176057096Sguido{
176160854Sdarrenr	register ipstate_t *is, **isp;
176260854Sdarrenr	register u_short sport, dport;
176360854Sdarrenr	register u_char	pr;
176460854Sdarrenr	struct icmp6_hdr *ic, *oic;
176560854Sdarrenr	union i6addr dst, src;
176660854Sdarrenr	u_short savelen;
176760854Sdarrenr	fr_info_t ofin;
176860854Sdarrenr	tcphdr_t *tcp;
176960854Sdarrenr	frentry_t *fr;
177060854Sdarrenr	ip6_t *oip;
177160854Sdarrenr	int type;
177260854Sdarrenr	u_int hv;
177357096Sguido
177460854Sdarrenr	/*
177560854Sdarrenr	 * Does it at least have the return (basic) IP header ?
177660854Sdarrenr	 * Only a basic IP header (no options) should be with
177760854Sdarrenr	 * an ICMP error header.
177860854Sdarrenr	 */
177960854Sdarrenr	if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN))
178060854Sdarrenr		return NULL;
178160854Sdarrenr	ic = (struct icmp6_hdr *)fin->fin_dp;
178260854Sdarrenr	type = ic->icmp6_type;
178360854Sdarrenr	/*
178460854Sdarrenr	 * If it's not an error type, then return
178560854Sdarrenr	 */
178660854Sdarrenr	if ((type != ICMP6_DST_UNREACH) && (type != ICMP6_PACKET_TOO_BIG) &&
178760854Sdarrenr	    (type != ICMP6_TIME_EXCEEDED) && (type != ICMP6_PARAM_PROB))
178860854Sdarrenr		return NULL;
178960854Sdarrenr
179060854Sdarrenr	oip = (ip6_t *)((char *)ic + ICMPERR_ICMPHLEN);
179160854Sdarrenr	if (fin->fin_plen < sizeof(*oip))
179260854Sdarrenr		return NULL;
179360854Sdarrenr
179460854Sdarrenr	if (oip->ip6_nxt == IPPROTO_ICMPV6) {
179560854Sdarrenr		oic = (struct icmp6_hdr *)(oip + 1);
179660854Sdarrenr		/*
179760854Sdarrenr		 * a ICMP error can only be generated as a result of an
179860854Sdarrenr		 * ICMP query, not as the response on an ICMP error
179960854Sdarrenr		 *
180060854Sdarrenr		 * XXX theoretically ICMP_ECHOREP and the other reply's are
180160854Sdarrenr		 * ICMP query's as well, but adding them here seems strange XXX
180260854Sdarrenr		 */
180360854Sdarrenr		 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK))
180460854Sdarrenr		    	return NULL;
180560854Sdarrenr
180660854Sdarrenr		/*
180760854Sdarrenr		 * perform a lookup of the ICMP packet in the state table
180860854Sdarrenr		 */
180960854Sdarrenr		hv = (pr = oip->ip6_nxt);
181060854Sdarrenr		src.in6 = oip->ip6_src;
181160854Sdarrenr		hv += src.in4.s_addr;
181260854Sdarrenr		dst.in6 = oip->ip6_dst;
181360854Sdarrenr		hv += dst.in4.s_addr;
181460854Sdarrenr		hv += oic->icmp6_id;
181560854Sdarrenr		hv += oic->icmp6_seq;
181660854Sdarrenr		hv %= fr_statesize;
181760854Sdarrenr
181860854Sdarrenr		oip->ip6_plen = ntohs(oip->ip6_plen);
181964580Sdarrenr		ofin.fin_v = 6;
182060854Sdarrenr		fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin);
182160854Sdarrenr		oip->ip6_plen = htons(oip->ip6_plen);
182260854Sdarrenr		ofin.fin_ifp = fin->fin_ifp;
182360854Sdarrenr		ofin.fin_out = !fin->fin_out;
182460854Sdarrenr		ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
182560854Sdarrenr
182660854Sdarrenr		READ_ENTER(&ipf_state);
182760854Sdarrenr		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext)
182860854Sdarrenr			if ((is->is_p == pr) &&
182960854Sdarrenr			    (oic->icmp6_id == is->is_icmp.ics_id) &&
183060854Sdarrenr			    (oic->icmp6_seq == is->is_icmp.ics_seq) &&
183160854Sdarrenr			    fr_matchsrcdst(is, src, dst, &ofin, NULL)) {
183260854Sdarrenr			    	/*
183360854Sdarrenr			    	 * in the state table ICMP query's are stored
183460854Sdarrenr			    	 * with the type of the corresponding ICMP
183560854Sdarrenr			    	 * response. Correct here
183660854Sdarrenr			    	 */
183760854Sdarrenr				if (((is->is_type == ICMP6_ECHO_REPLY) &&
183860854Sdarrenr				     (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
183960854Sdarrenr				     (is->is_type - 1 == oic->icmp6_type )) {
184060854Sdarrenr				    	ips_stats.iss_hits++;
184163523Sdarrenr    					is->is_pkts++;
184263523Sdarrenr					is->is_bytes += fin->fin_plen;
184360854Sdarrenr					return is->is_rule;
184460854Sdarrenr				}
184560854Sdarrenr			}
184660854Sdarrenr		RWLOCK_EXIT(&ipf_state);
184760854Sdarrenr
184860854Sdarrenr		return NULL;
184960854Sdarrenr	};
185060854Sdarrenr
185160854Sdarrenr	if ((oip->ip6_nxt != IPPROTO_TCP) && (oip->ip6_nxt != IPPROTO_UDP))
185260854Sdarrenr		return NULL;
185360854Sdarrenr	tcp = (tcphdr_t *)(oip + 1);
185460854Sdarrenr	dport = tcp->th_dport;
185560854Sdarrenr	sport = tcp->th_sport;
185660854Sdarrenr
185760854Sdarrenr	hv = (pr = oip->ip6_nxt);
185860854Sdarrenr	src.in6 = oip->ip6_src;
185960854Sdarrenr	hv += src.in4.s_addr;
186060854Sdarrenr	dst.in6 = oip->ip6_dst;
186160854Sdarrenr	hv += dst.in4.s_addr;
186260854Sdarrenr	hv += dport;
186360854Sdarrenr	hv += sport;
186460854Sdarrenr	hv %= fr_statesize;
186560854Sdarrenr	/*
186660854Sdarrenr	 * we make an fin entry to be able to feed it to
186760854Sdarrenr	 * matchsrcdst note that not all fields are encessary
186860854Sdarrenr	 * but this is the cleanest way. Note further we fill
186960854Sdarrenr	 * in fin_mp such that if someone uses it we'll get
187060854Sdarrenr	 * a kernel panic. fr_matchsrcdst does not use this.
187160854Sdarrenr	 *
187260854Sdarrenr	 * watch out here, as ip is in host order and oip in network
187360854Sdarrenr	 * order. Any change we make must be undone afterwards.
187460854Sdarrenr	 */
187560854Sdarrenr	savelen = oip->ip6_plen;
187660854Sdarrenr	oip->ip6_plen = ip->ip6_plen - sizeof(*ip) - ICMPERR_ICMPHLEN;
187760854Sdarrenr	ofin.fin_v = 6;
187860854Sdarrenr	fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin);
187960854Sdarrenr	oip->ip6_plen = savelen;
188060854Sdarrenr	ofin.fin_ifp = fin->fin_ifp;
188160854Sdarrenr	ofin.fin_out = !fin->fin_out;
188260854Sdarrenr	ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
188360854Sdarrenr	READ_ENTER(&ipf_state);
188460854Sdarrenr	for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
188560854Sdarrenr		/*
188660854Sdarrenr		 * Only allow this icmp though if the
188760854Sdarrenr		 * encapsulated packet was allowed through the
188860854Sdarrenr		 * other way around. Note that the minimal amount
188960854Sdarrenr		 * of info present does not allow for checking against
189060854Sdarrenr		 * tcp internals such as seq and ack numbers.
189160854Sdarrenr		 */
189260854Sdarrenr		if ((is->is_p == pr) && (is->is_v == 6) &&
189360854Sdarrenr		    fr_matchsrcdst(is, src, dst, &ofin, tcp)) {
189460854Sdarrenr			fr = is->is_rule;
189560854Sdarrenr			ips_stats.iss_hits++;
189660854Sdarrenr			/*
189760854Sdarrenr			 * we must swap src and dst here because the icmp
189860854Sdarrenr			 * comes the other way around
189960854Sdarrenr			 */
190060854Sdarrenr			is->is_pkts++;
190160854Sdarrenr			is->is_bytes += fin->fin_plen;
190260854Sdarrenr			/*
190360854Sdarrenr			 * we deliberately do not touch the timeouts
190460854Sdarrenr			 * for the accompanying state table entry.
190560854Sdarrenr			 * It remains to be seen if that is correct. XXX
190660854Sdarrenr			 */
190760854Sdarrenr			RWLOCK_EXIT(&ipf_state);
190860854Sdarrenr			return fr;
190957096Sguido		}
191060854Sdarrenr	}
191157096Sguido	RWLOCK_EXIT(&ipf_state);
191260854Sdarrenr	return NULL;
191357096Sguido}
191460854Sdarrenr#endif
1915