ip_nat.c revision 80482
1/*
2 * Copyright (C) 1995-2001 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7 */
8
9#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
10#define _KERNEL
11#endif
12
13#include <sys/errno.h>
14#include <sys/types.h>
15#include <sys/param.h>
16#include <sys/time.h>
17#include <sys/file.h>
18#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
19    defined(_KERNEL)
20# include "opt_ipfilter_log.h"
21#endif
22#if !defined(_KERNEL) && !defined(KERNEL)
23# include <stdio.h>
24# include <string.h>
25# include <stdlib.h>
26#endif
27#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
28# include <sys/filio.h>
29# include <sys/fcntl.h>
30#else
31# include <sys/ioctl.h>
32#endif
33#include <sys/fcntl.h>
34#include <sys/uio.h>
35#ifndef linux
36# include <sys/protosw.h>
37#endif
38#include <sys/socket.h>
39#if defined(_KERNEL) && !defined(linux)
40# include <sys/systm.h>
41#endif
42#if !defined(__SVR4) && !defined(__svr4__)
43# ifndef linux
44#  include <sys/mbuf.h>
45# endif
46#else
47# include <sys/filio.h>
48# include <sys/byteorder.h>
49# ifdef _KERNEL
50#  include <sys/dditypes.h>
51# endif
52# include <sys/stream.h>
53# include <sys/kmem.h>
54#endif
55#if __FreeBSD_version >= 300000
56# include <sys/queue.h>
57#endif
58#include <net/if.h>
59#if __FreeBSD_version >= 300000
60# include <net/if_var.h>
61# if defined(_KERNEL) && !defined(IPFILTER_LKM)
62#  include "opt_ipfilter.h"
63# endif
64#endif
65#ifdef sun
66# include <net/af.h>
67#endif
68#include <net/route.h>
69#include <netinet/in.h>
70#include <netinet/in_systm.h>
71#include <netinet/ip.h>
72
73#ifdef __sgi
74# ifdef IFF_DRVRLOCK /* IRIX6 */
75#include <sys/hashing.h>
76#include <netinet/in_var.h>
77# endif
78#endif
79
80#ifdef RFC1825
81# include <vpn/md5.h>
82# include <vpn/ipsec.h>
83extern struct ifnet vpnif;
84#endif
85
86#ifndef linux
87# include <netinet/ip_var.h>
88# include <netinet/tcp_fsm.h>
89#endif
90#include <netinet/tcp.h>
91#include <netinet/udp.h>
92#include <netinet/ip_icmp.h>
93#include "netinet/ip_compat.h"
94#include <netinet/tcpip.h>
95#include "netinet/ip_fil.h"
96#include "netinet/ip_proxy.h"
97#include "netinet/ip_nat.h"
98#include "netinet/ip_frag.h"
99#include "netinet/ip_state.h"
100#if (__FreeBSD_version >= 300000)
101# include <sys/malloc.h>
102#endif
103#ifndef	MIN
104# define	MIN(a,b)	(((a)<(b))?(a):(b))
105#endif
106#undef	SOCKADDR_IN
107#define	SOCKADDR_IN	struct sockaddr_in
108
109#if !defined(lint)
110static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
111/* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.44 2001/07/21 07:17:22 darrenr Exp $"; */
112static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 80482 2001-07-28 11:58:26Z darrenr $";
113#endif
114
115nat_t	**nat_table[2] = { NULL, NULL },
116	*nat_instances = NULL;
117ipnat_t	*nat_list = NULL;
118u_int	ipf_nattable_sz = NAT_TABLE_SZ;
119u_int	ipf_natrules_sz = NAT_SIZE;
120u_int	ipf_rdrrules_sz = RDR_SIZE;
121u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
122u_32_t	nat_masks = 0;
123u_32_t	rdr_masks = 0;
124ipnat_t	**nat_rules = NULL;
125ipnat_t	**rdr_rules = NULL;
126hostmap_t	**maptable  = NULL;
127
128u_long	fr_defnatage = DEF_NAT_AGE,
129	fr_defnaticmpage = 6;		/* 3 seconds */
130natstat_t nat_stats;
131int	fr_nat_lock = 0;
132#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
133extern	kmutex_t	ipf_rw;
134extern	KRWLOCK_T	ipf_nat;
135#endif
136
137static	int	nat_flushtable __P((void));
138static	int	nat_clearlist __P((void));
139static	void	nat_addnat __P((struct ipnat *));
140static	void	nat_addrdr __P((struct ipnat *));
141static	void	nat_delete __P((struct nat *));
142static	void	nat_delrdr __P((struct ipnat *));
143static	void	nat_delnat __P((struct ipnat *));
144static	int	fr_natgetent __P((caddr_t));
145static	int	fr_natgetsz __P((caddr_t));
146static	int	fr_natputent __P((caddr_t));
147static	void	nat_tabmove __P((nat_t *, u_32_t));
148static	int	nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
149static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
150				    struct in_addr));
151static	void	nat_hostmapdel __P((struct hostmap *));
152
153
154int nat_init()
155{
156	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
157	if (nat_table[0] != NULL)
158		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
159	else
160		return -1;
161
162	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
163	if (nat_table[1] != NULL)
164		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
165	else
166		return -1;
167
168	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
169	if (nat_rules != NULL)
170		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
171	else
172		return -1;
173
174	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
175	if (rdr_rules != NULL)
176		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
177	else
178		return -1;
179
180	KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
181	if (maptable != NULL)
182		bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
183	else
184		return -1;
185	return 0;
186}
187
188
189static void nat_addrdr(n)
190ipnat_t *n;
191{
192	ipnat_t **np;
193	u_32_t j;
194	u_int hv;
195	int k;
196
197	k = countbits(n->in_outmsk);
198	if ((k >= 0) && (k != 32))
199		rdr_masks |= 1 << k;
200	j = (n->in_outip & n->in_outmsk);
201	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
202	np = rdr_rules + hv;
203	while (*np != NULL)
204		np = &(*np)->in_rnext;
205	n->in_rnext = NULL;
206	n->in_prnext = np;
207	*np = n;
208}
209
210
211static void nat_addnat(n)
212ipnat_t *n;
213{
214	ipnat_t **np;
215	u_32_t j;
216	u_int hv;
217	int k;
218
219	k = countbits(n->in_inmsk);
220	if ((k >= 0) && (k != 32))
221		nat_masks |= 1 << k;
222	j = (n->in_inip & n->in_inmsk);
223	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
224	np = nat_rules + hv;
225	while (*np != NULL)
226		np = &(*np)->in_mnext;
227	n->in_mnext = NULL;
228	n->in_pmnext = np;
229	*np = n;
230}
231
232
233static void nat_delrdr(n)
234ipnat_t *n;
235{
236	if (n->in_rnext)
237		n->in_rnext->in_prnext = n->in_prnext;
238	*n->in_prnext = n->in_rnext;
239}
240
241
242static void nat_delnat(n)
243ipnat_t *n;
244{
245	if (n->in_mnext)
246		n->in_mnext->in_pmnext = n->in_pmnext;
247	*n->in_pmnext = n->in_mnext;
248}
249
250
251/*
252 * check if an ip address has already been allocated for a given mapping that
253 * is not doing port based translation.
254 *
255 * Must be called with ipf_nat held as a write lock.
256 */
257static struct hostmap *nat_hostmap(np, real, map)
258ipnat_t *np;
259struct in_addr real;
260struct in_addr map;
261{
262	hostmap_t *hm;
263	u_int hv;
264
265	hv = real.s_addr % HOSTMAP_SIZE;
266	for (hm = maptable[hv]; hm; hm = hm->hm_next)
267		if ((hm->hm_realip.s_addr == real.s_addr) &&
268		    (np == hm->hm_ipnat)) {
269			hm->hm_ref++;
270			return hm;
271		}
272
273	KMALLOC(hm, hostmap_t *);
274	if (hm) {
275		hm->hm_next = maptable[hv];
276		hm->hm_pnext = maptable + hv;
277		if (maptable[hv])
278			maptable[hv]->hm_pnext = &hm->hm_next;
279		maptable[hv] = hm;
280		hm->hm_ipnat = np;
281		hm->hm_realip = real;
282		hm->hm_mapip = map;
283		hm->hm_ref = 1;
284	}
285	return hm;
286}
287
288
289/*
290 * Must be called with ipf_nat held as a write lock.
291 */
292static void nat_hostmapdel(hm)
293struct hostmap *hm;
294{
295	ATOMIC_DEC32(hm->hm_ref);
296	if (hm->hm_ref == 0) {
297		if (hm->hm_next)
298			hm->hm_next->hm_pnext = hm->hm_pnext;
299		*hm->hm_pnext = hm->hm_next;
300		KFREE(hm);
301	}
302}
303
304
305void fix_outcksum(fin, sp, n)
306fr_info_t *fin;
307u_short *sp;
308u_32_t n;
309{
310	register u_short sumshort;
311	register u_32_t sum1;
312
313	if (!n)
314		return;
315	else if (n & NAT_HW_CKSUM) {
316		n &= 0xffff;
317		n += fin->fin_dlen;
318		n = (n & 0xffff) + (n >> 16);
319		*sp = n & 0xffff;
320		return;
321	}
322	sum1 = (~ntohs(*sp)) & 0xffff;
323	sum1 += (n);
324	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
325	/* Again */
326	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
327	sumshort = ~(u_short)sum1;
328	*(sp) = htons(sumshort);
329}
330
331
332void fix_incksum(fin, sp, n)
333fr_info_t *fin;
334u_short *sp;
335u_32_t n;
336{
337	register u_short sumshort;
338	register u_32_t sum1;
339
340	if (!n)
341		return;
342	else if (n & NAT_HW_CKSUM) {
343		n &= 0xffff;
344		n += fin->fin_dlen;
345		n = (n & 0xffff) + (n >> 16);
346		*sp = n & 0xffff;
347		return;
348	}
349#ifdef sparc
350	sum1 = (~(*sp)) & 0xffff;
351#else
352	sum1 = (~ntohs(*sp)) & 0xffff;
353#endif
354	sum1 += ~(n) & 0xffff;
355	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
356	/* Again */
357	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
358	sumshort = ~(u_short)sum1;
359	*(sp) = htons(sumshort);
360}
361
362
363/*
364 * fix_datacksum is used *only* for the adjustments of checksums in the data
365 * section of an IP packet.
366 *
367 * The only situation in which you need to do this is when NAT'ing an
368 * ICMP error message. Such a message, contains in its body the IP header
369 * of the original IP packet, that causes the error.
370 *
371 * You can't use fix_incksum or fix_outcksum in that case, because for the
372 * kernel the data section of the ICMP error is just data, and no special
373 * processing like hardware cksum or ntohs processing have been done by the
374 * kernel on the data section.
375 */
376void fix_datacksum(sp, n)
377u_short *sp;
378u_32_t n;
379{
380	register u_short sumshort;
381	register u_32_t sum1;
382
383	if (!n)
384		return;
385
386	sum1 = (~ntohs(*sp)) & 0xffff;
387	sum1 += (n);
388	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
389	/* Again */
390	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
391	sumshort = ~(u_short)sum1;
392	*(sp) = htons(sumshort);
393}
394
395/*
396 * How the NAT is organised and works.
397 *
398 * Inside (interface y) NAT       Outside (interface x)
399 * -------------------- -+- -------------------------------------
400 * Packet going          |   out, processsed by ip_natout() for x
401 * ------------>         |   ------------>
402 * src=10.1.1.1          |   src=192.1.1.1
403 *                       |
404 *                       |   in, processed by ip_natin() for x
405 * <------------         |   <------------
406 * dst=10.1.1.1          |   dst=192.1.1.1
407 * -------------------- -+- -------------------------------------
408 * ip_natout() - changes ip_src and if required, sport
409 *             - creates a new mapping, if required.
410 * ip_natin()  - changes ip_dst and if required, dport
411 *
412 * In the NAT table, internal source is recorded as "in" and externally
413 * seen as "out".
414 */
415
416/*
417 * Handle ioctls which manipulate the NAT.
418 */
419int nat_ioctl(data, cmd, mode)
420#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
421u_long cmd;
422#else
423int cmd;
424#endif
425caddr_t data;
426int mode;
427{
428	register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
429	int error = 0, ret, arg;
430	ipnat_t natd;
431	u_32_t i, j;
432
433#if (BSD >= 199306) && defined(_KERNEL)
434	if ((securelevel >= 2) && (mode & FWRITE))
435		return EPERM;
436#endif
437
438	nat = NULL;     /* XXX gcc -Wuninitialized */
439	KMALLOC(nt, ipnat_t *);
440	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT))
441		error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
442	else if (cmd == SIOCIPFFL) {	/* SIOCFLNAT & SIOCCNATL */
443		error = IRCOPY(data, (char *)&arg, sizeof(arg));
444		if (error)
445			error = EFAULT;
446	}
447
448	if (error)
449		goto done;
450
451	/*
452	 * For add/delete, look to see if the NAT entry is already present
453	 */
454	WRITE_ENTER(&ipf_nat);
455	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
456		nat = &natd;
457		nat->in_flags &= IPN_USERFLAGS;
458		if ((nat->in_redir & NAT_MAPBLK) == 0) {
459			if ((nat->in_flags & IPN_SPLIT) == 0)
460				nat->in_inip &= nat->in_inmsk;
461			if ((nat->in_flags & IPN_IPRANGE) == 0)
462				nat->in_outip &= nat->in_outmsk;
463		}
464		for (np = &nat_list; (n = *np); np = &n->in_next)
465			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
466					IPN_CMPSIZ))
467				break;
468	}
469
470	switch (cmd)
471	{
472#ifdef  IPFILTER_LOG
473	case SIOCIPFFB :
474	{
475		int tmp;
476
477		if (!(mode & FWRITE))
478			error = EPERM;
479		else {
480			tmp = ipflog_clear(IPL_LOGNAT);
481			IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
482		}
483		break;
484	}
485#endif
486	case SIOCADNAT :
487		if (!(mode & FWRITE)) {
488			error = EPERM;
489			break;
490		}
491		if (n) {
492			error = EEXIST;
493			break;
494		}
495		if (nt == NULL) {
496			error = ENOMEM;
497			break;
498		}
499		n = nt;
500		nt = NULL;
501		bcopy((char *)nat, (char *)n, sizeof(*n));
502		n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
503		if (!n->in_ifp)
504			n->in_ifp = (void *)-1;
505		if (n->in_plabel[0] != '\0') {
506			n->in_apr = appr_match(n->in_p, n->in_plabel);
507			if (!n->in_apr) {
508				error = ENOENT;
509				break;
510			}
511		}
512		n->in_next = NULL;
513		*np = n;
514
515		if (n->in_redir & NAT_REDIRECT) {
516			n->in_flags &= ~IPN_NOTDST;
517			nat_addrdr(n);
518		}
519		if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
520			n->in_flags &= ~IPN_NOTSRC;
521			nat_addnat(n);
522		}
523
524		n->in_use = 0;
525		if (n->in_redir & NAT_MAPBLK)
526			n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
527		else if (n->in_flags & IPN_AUTOPORTMAP)
528			n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
529		else if (n->in_flags & IPN_IPRANGE)
530			n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
531		else if (n->in_flags & IPN_SPLIT)
532			n->in_space = 2;
533		else
534			n->in_space = ~ntohl(n->in_outmsk);
535		/*
536		 * Calculate the number of valid IP addresses in the output
537		 * mapping range.  In all cases, the range is inclusive of
538		 * the start and ending IP addresses.
539		 * If to a CIDR address, lose 2: broadcast + network address
540		 *			         (so subtract 1)
541		 * If to a range, add one.
542		 * If to a single IP address, set to 1.
543		 */
544		if (n->in_space) {
545			if ((n->in_flags & IPN_IPRANGE) != 0)
546				n->in_space += 1;
547			else
548				n->in_space -= 1;
549		} else
550			n->in_space = 1;
551		if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
552		    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
553			n->in_nip = ntohl(n->in_outip) + 1;
554		else if ((n->in_flags & IPN_SPLIT) &&
555			 (n->in_redir & NAT_REDIRECT))
556			n->in_nip = ntohl(n->in_inip);
557		else
558			n->in_nip = ntohl(n->in_outip);
559		if (n->in_redir & NAT_MAP) {
560			n->in_pnext = ntohs(n->in_pmin);
561			/*
562			 * Multiply by the number of ports made available.
563			 */
564			if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
565				n->in_space *= (ntohs(n->in_pmax) -
566						ntohs(n->in_pmin) + 1);
567				/*
568				 * Because two different sources can map to
569				 * different destinations but use the same
570				 * local IP#/port #.
571				 * If the result is smaller than in_space, then
572				 * we may have wrapped around 32bits.
573				 */
574				i = n->in_inmsk;
575				if ((i != 0) && (i != 0xffffffff)) {
576					j = n->in_space * (~ntohl(i) + 1);
577					if (j >= n->in_space)
578						n->in_space = j;
579					else
580						n->in_space = 0xffffffff;
581				}
582			}
583			/*
584			 * If no protocol is specified, multiple by 256.
585			 */
586			if ((n->in_flags & IPN_TCPUDP) == 0) {
587					j = n->in_space * 256;
588					if (j >= n->in_space)
589						n->in_space = j;
590					else
591						n->in_space = 0xffffffff;
592			}
593		}
594		/* Otherwise, these fields are preset */
595		n = NULL;
596		nat_stats.ns_rules++;
597		break;
598	case SIOCRMNAT :
599		if (!(mode & FWRITE)) {
600			error = EPERM;
601			n = NULL;
602			break;
603		}
604		if (!n) {
605			error = ESRCH;
606			break;
607		}
608		if (n->in_redir & NAT_REDIRECT)
609			nat_delrdr(n);
610		if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
611			nat_delnat(n);
612		if (nat_list == NULL) {
613			nat_masks = 0;
614			rdr_masks = 0;
615		}
616		*np = n->in_next;
617		if (!n->in_use) {
618			if (n->in_apr)
619				appr_free(n->in_apr);
620			KFREE(n);
621			nat_stats.ns_rules--;
622		} else {
623			n->in_flags |= IPN_DELETE;
624			n->in_next = NULL;
625		}
626		n = NULL;
627		break;
628	case SIOCGNATS :
629		MUTEX_DOWNGRADE(&ipf_nat);
630		nat_stats.ns_table[0] = nat_table[0];
631		nat_stats.ns_table[1] = nat_table[1];
632		nat_stats.ns_list = nat_list;
633		nat_stats.ns_maptable = maptable;
634		nat_stats.ns_nattab_sz = ipf_nattable_sz;
635		nat_stats.ns_rultab_sz = ipf_natrules_sz;
636		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
637		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
638		nat_stats.ns_instances = nat_instances;
639		nat_stats.ns_apslist = ap_sess_list;
640		error = IWCOPYPTR((char *)&nat_stats, (char *)data,
641				  sizeof(nat_stats));
642		break;
643	case SIOCGNATL :
644	    {
645		natlookup_t nl;
646
647		MUTEX_DOWNGRADE(&ipf_nat);
648		error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
649		if (error)
650			break;
651
652		if (nat_lookupredir(&nl)) {
653			error = IWCOPYPTR((char *)&nl, (char *)data,
654					  sizeof(nl));
655		} else
656			error = ESRCH;
657		break;
658	    }
659	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
660		if (!(mode & FWRITE)) {
661			error = EPERM;
662			break;
663		}
664		error = 0;
665		if (arg == 0)
666			ret = nat_flushtable();
667		else if (arg == 1)
668			ret = nat_clearlist();
669		else
670			error = EINVAL;
671		MUTEX_DOWNGRADE(&ipf_nat);
672		if (!error) {
673			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
674			if (error)
675				error = EFAULT;
676		}
677		break;
678	case SIOCSTLCK :
679		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
680		if (!error) {
681			error = IWCOPY((caddr_t)&fr_nat_lock, data,
682					sizeof(fr_nat_lock));
683			if (!error)
684				fr_nat_lock = arg;
685		} else
686			error = EFAULT;
687		break;
688	case SIOCSTPUT :
689		if (fr_nat_lock)
690			error = fr_natputent(data);
691		else
692			error = EACCES;
693		break;
694	case SIOCSTGSZ :
695		if (fr_nat_lock)
696			error = fr_natgetsz(data);
697		else
698			error = EACCES;
699		break;
700	case SIOCSTGET :
701		if (fr_nat_lock)
702			error = fr_natgetent(data);
703		else
704			error = EACCES;
705		break;
706	case FIONREAD :
707#ifdef	IPFILTER_LOG
708		arg = (int)iplused[IPL_LOGNAT];
709		MUTEX_DOWNGRADE(&ipf_nat);
710		error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
711		if (error)
712			error = EFAULT;
713#endif
714		break;
715	default :
716		error = EINVAL;
717		break;
718	}
719	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
720done:
721	if (nt)
722		KFREE(nt);
723	return error;
724}
725
726
727static int fr_natgetsz(data)
728caddr_t data;
729{
730	ap_session_t *aps;
731	nat_t *nat, *n;
732	int error = 0;
733	natget_t ng;
734
735	error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
736	if (error)
737		return EFAULT;
738
739	nat = ng.ng_ptr;
740	if (!nat) {
741		nat = nat_instances;
742		ng.ng_sz = 0;
743		if (nat == NULL) {
744			error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
745			if (error)
746				error = EFAULT;
747			return error;
748		}
749	} else {
750		/*
751		 * Make sure the pointer we're copying from exists in the
752		 * current list of entries.  Security precaution to prevent
753		 * copying of random kernel data.
754		 */
755		for (n = nat_instances; n; n = n->nat_next)
756			if (n == nat)
757				break;
758		if (!n)
759			return ESRCH;
760	}
761
762	ng.ng_sz = sizeof(nat_save_t);
763	aps = nat->nat_aps;
764	if ((aps != NULL) && (aps->aps_data != 0)) {
765		ng.ng_sz += sizeof(ap_session_t);
766		ng.ng_sz += aps->aps_psiz;
767	}
768
769	error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
770	if (error)
771		error = EFAULT;
772	return error;
773}
774
775
776static int fr_natgetent(data)
777caddr_t data;
778{
779	nat_save_t ipn, *ipnp, *ipnn = NULL;
780	register nat_t *n, *nat;
781	ap_session_t *aps;
782	int error;
783
784	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
785	if (error)
786		return EFAULT;
787	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
788	if (error)
789		return EFAULT;
790
791	nat = ipn.ipn_next;
792	if (!nat) {
793		nat = nat_instances;
794		if (nat == NULL) {
795			if (nat_instances == NULL)
796				return ENOENT;
797			return 0;
798		}
799	} else {
800		/*
801		 * Make sure the pointer we're copying from exists in the
802		 * current list of entries.  Security precaution to prevent
803		 * copying of random kernel data.
804		 */
805		for (n = nat_instances; n; n = n->nat_next)
806			if (n == nat)
807				break;
808		if (!n)
809			return ESRCH;
810	}
811
812	ipn.ipn_next = nat->nat_next;
813	ipn.ipn_dsize = 0;
814	bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
815	ipn.ipn_nat.nat_data = NULL;
816
817	if (nat->nat_ptr) {
818		bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
819		      sizeof(ipn.ipn_ipnat));
820	}
821
822	if (nat->nat_fr)
823		bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
824		      sizeof(ipn.ipn_rule));
825
826	if ((aps = nat->nat_aps)) {
827		ipn.ipn_dsize = sizeof(*aps);
828		if (aps->aps_data)
829			ipn.ipn_dsize += aps->aps_psiz;
830		KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
831		if (ipnn == NULL)
832			return ENOMEM;
833		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
834
835		bcopy((char *)aps, ipnn->ipn_data, sizeof(*aps));
836		if (aps->aps_data) {
837			bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
838			      aps->aps_psiz);
839			ipnn->ipn_dsize += aps->aps_psiz;
840		}
841		error = IWCOPY((caddr_t)ipnn, ipnp,
842			       sizeof(ipn) + ipn.ipn_dsize);
843		if (error)
844			error = EFAULT;
845		KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
846	} else {
847		error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
848		if (error)
849			error = EFAULT;
850	}
851	return error;
852}
853
854
855static int fr_natputent(data)
856caddr_t data;
857{
858	nat_save_t ipn, *ipnp, *ipnn = NULL;
859	register nat_t *n, *nat;
860	ap_session_t *aps;
861	frentry_t *fr;
862	ipnat_t *in;
863
864	int error;
865
866	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
867	if (error)
868		return EFAULT;
869	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
870	if (error)
871		return EFAULT;
872	nat = NULL;
873	if (ipn.ipn_dsize) {
874		KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
875		if (ipnn == NULL)
876			return ENOMEM;
877		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
878		error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
879			       ipn.ipn_dsize);
880		if (error) {
881			error = EFAULT;
882			goto junkput;
883		}
884	} else
885		ipnn = NULL;
886
887	KMALLOC(nat, nat_t *);
888	if (nat == NULL) {
889		error = EFAULT;
890		goto junkput;
891	}
892
893	bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
894	/*
895	 * Initialize all these so that nat_delete() doesn't cause a crash.
896	 */
897	nat->nat_phnext[0] = NULL;
898	nat->nat_phnext[1] = NULL;
899	fr = nat->nat_fr;
900	nat->nat_fr = NULL;
901	aps = nat->nat_aps;
902	nat->nat_aps = NULL;
903	in = nat->nat_ptr;
904	nat->nat_ptr = NULL;
905	nat->nat_data = NULL;
906
907	/*
908	 * Restore the rule associated with this nat session
909	 */
910	if (in) {
911		KMALLOC(in, ipnat_t *);
912		if (in == NULL) {
913			error = ENOMEM;
914			goto junkput;
915		}
916		nat->nat_ptr = in;
917		bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
918		in->in_use = 1;
919		in->in_flags |= IPN_DELETE;
920		in->in_next = NULL;
921		in->in_rnext = NULL;
922		in->in_prnext = NULL;
923		in->in_mnext = NULL;
924		in->in_pmnext = NULL;
925		in->in_ifp = GETUNIT(in->in_ifname, 4);
926		if (in->in_plabel[0] != '\0') {
927			in->in_apr = appr_match(in->in_p, in->in_plabel);
928		}
929	}
930
931	/*
932	 * Restore ap_session_t structure.  Include the private data allocated
933	 * if it was there.
934	 */
935	if (aps) {
936		KMALLOC(aps, ap_session_t *);
937		if (aps == NULL) {
938			error = ENOMEM;
939			goto junkput;
940		}
941		nat->nat_aps = aps;
942		aps->aps_next = ap_sess_list;
943		ap_sess_list = aps;
944		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
945		if (in)
946			aps->aps_apr = in->in_apr;
947		if (aps->aps_psiz) {
948			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
949			if (aps->aps_data == NULL) {
950				error = ENOMEM;
951				goto junkput;
952			}
953			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
954			      aps->aps_psiz);
955		} else {
956			aps->aps_psiz = 0;
957			aps->aps_data = NULL;
958		}
959	}
960
961	/*
962	 * If there was a filtering rule associated with this entry then
963	 * build up a new one.
964	 */
965	if (fr != NULL) {
966		if (nat->nat_flags & FI_NEWFR) {
967			KMALLOC(fr, frentry_t *);
968			nat->nat_fr = fr;
969			if (fr == NULL) {
970				error = ENOMEM;
971				goto junkput;
972			}
973			bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
974			ipn.ipn_nat.nat_fr = fr;
975			error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
976			if (error) {
977				error = EFAULT;
978				goto junkput;
979			}
980		} else {
981			for (n = nat_instances; n; n = n->nat_next)
982				if (n->nat_fr == fr)
983					break;
984			if (!n) {
985				error = ESRCH;
986				goto junkput;
987			}
988		}
989	}
990
991	if (ipnn)
992		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
993	nat_insert(nat);
994	return 0;
995junkput:
996	if (ipnn)
997		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
998	if (nat)
999		nat_delete(nat);
1000	return error;
1001}
1002
1003
1004/*
1005 * Delete a nat entry from the various lists and table.
1006 */
1007static void nat_delete(natd)
1008struct nat *natd;
1009{
1010	struct ipnat *ipn;
1011
1012	if (natd->nat_flags & FI_WILDP)
1013		nat_stats.ns_wilds--;
1014	if (natd->nat_hnext[0])
1015		natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1016	*natd->nat_phnext[0] = natd->nat_hnext[0];
1017	if (natd->nat_hnext[1])
1018		natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1019	*natd->nat_phnext[1] = natd->nat_hnext[1];
1020
1021	if (natd->nat_fr != NULL) {
1022		ATOMIC_DEC32(natd->nat_fr->fr_ref);
1023	}
1024
1025	if (natd->nat_hm != NULL)
1026		nat_hostmapdel(natd->nat_hm);
1027
1028	/*
1029	 * If there is an active reference from the nat entry to its parent
1030	 * rule, decrement the rule's reference count and free it too if no
1031	 * longer being used.
1032	 */
1033	ipn = natd->nat_ptr;
1034	if (ipn != NULL) {
1035		ipn->in_space++;
1036		ipn->in_use--;
1037		if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1038			if (ipn->in_apr)
1039				appr_free(ipn->in_apr);
1040			KFREE(ipn);
1041			nat_stats.ns_rules--;
1042		}
1043	}
1044
1045	MUTEX_DESTROY(&natd->nat_lock);
1046	/*
1047	 * If there's a fragment table entry too for this nat entry, then
1048	 * dereference that as well.
1049	 */
1050	ipfr_forget((void *)natd);
1051	aps_free(natd->nat_aps);
1052	nat_stats.ns_inuse--;
1053	KFREE(natd);
1054}
1055
1056
1057/*
1058 * nat_flushtable - clear the NAT table of all mapping entries.
1059 */
1060static int nat_flushtable()
1061{
1062	register nat_t *nat, **natp;
1063	register int j = 0;
1064
1065	/*
1066	 * ALL NAT mappings deleted, so lets just make the deletions
1067	 * quicker.
1068	 */
1069	if (nat_table[0] != NULL)
1070		bzero((char *)nat_table[0],
1071		      sizeof(nat_table[0]) * ipf_nattable_sz);
1072	if (nat_table[1] != NULL)
1073		bzero((char *)nat_table[1],
1074		      sizeof(nat_table[1]) * ipf_nattable_sz);
1075
1076	for (natp = &nat_instances; (nat = *natp); ) {
1077		*natp = nat->nat_next;
1078#ifdef	IPFILTER_LOG
1079		nat_log(nat, NL_FLUSH);
1080#endif
1081		nat_delete(nat);
1082		j++;
1083	}
1084	nat_stats.ns_inuse = 0;
1085	return j;
1086}
1087
1088
1089/*
1090 * nat_clearlist - delete all rules in the active NAT mapping list.
1091 */
1092static int nat_clearlist()
1093{
1094	register ipnat_t *n, **np = &nat_list;
1095	int i = 0;
1096
1097	if (nat_rules != NULL)
1098		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1099	if (rdr_rules != NULL)
1100		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1101
1102	while ((n = *np)) {
1103		*np = n->in_next;
1104		if (!n->in_use) {
1105			if (n->in_apr)
1106				appr_free(n->in_apr);
1107			KFREE(n);
1108			nat_stats.ns_rules--;
1109		} else {
1110			n->in_flags |= IPN_DELETE;
1111			n->in_next = NULL;
1112		}
1113		i++;
1114	}
1115	nat_masks = 0;
1116	rdr_masks = 0;
1117	return i;
1118}
1119
1120
1121/*
1122 * Create a new NAT table entry.
1123 * NOTE: assumes write lock on ipf_nat has been obtained already.
1124 */
1125nat_t *nat_new(np, ip, fin, flags, direction)
1126ipnat_t *np;
1127ip_t *ip;
1128fr_info_t *fin;
1129u_int flags;
1130int direction;
1131{
1132	register u_32_t sum1, sum2, sumd, l;
1133	u_short port = 0, sport = 0, dport = 0, nport = 0;
1134	struct in_addr in, inb;
1135	tcphdr_t *tcp = NULL;
1136	hostmap_t *hm = NULL;
1137	nat_t *nat, *natl;
1138	u_short nflags;
1139#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1140	qif_t *qf = fin->fin_qif;
1141#endif
1142
1143	nflags = flags & np->in_flags;
1144	if (flags & IPN_TCPUDP) {
1145		tcp = (tcphdr_t *)fin->fin_dp;
1146		sport = tcp->th_sport;
1147		dport = tcp->th_dport;
1148	}
1149
1150	/* Give me a new nat */
1151	KMALLOC(nat, nat_t *);
1152	if (nat == NULL) {
1153		nat_stats.ns_memfail++;
1154		return NULL;
1155	}
1156
1157	bzero((char *)nat, sizeof(*nat));
1158	nat->nat_flags = flags;
1159	if (flags & FI_WILDP)
1160		nat_stats.ns_wilds++;
1161	/*
1162	 * Search the current table for a match.
1163	 */
1164	if (direction == NAT_OUTBOUND) {
1165		/*
1166		 * Values at which the search for a free resouce starts.
1167		 */
1168		u_32_t st_ip;
1169		u_short st_port;
1170
1171		/*
1172		 * If it's an outbound packet which doesn't match any existing
1173		 * record, then create a new port
1174		 */
1175		l = 0;
1176		st_ip = np->in_nip;
1177		st_port = np->in_pnext;
1178
1179		do {
1180			port = 0;
1181			in.s_addr = htonl(np->in_nip);
1182			if (l == 0) {
1183				/*
1184				 * Check to see if there is an existing NAT
1185				 * setup for this IP address pair.
1186				 */
1187				hm = nat_hostmap(np, ip->ip_src, in);
1188				if (hm != NULL)
1189					in.s_addr = hm->hm_mapip.s_addr;
1190			} else if ((l == 1) && (hm != NULL)) {
1191				nat_hostmapdel(hm);
1192				hm = NULL;
1193			}
1194			in.s_addr = ntohl(in.s_addr);
1195
1196			nat->nat_hm = hm;
1197
1198			if ((np->in_outmsk == 0xffffffff) &&
1199			    (np->in_pnext == 0)) {
1200				if (l > 0)
1201					goto badnat;
1202			}
1203
1204			if (np->in_redir & NAT_MAPBLK) {
1205				if ((l >= np->in_ppip) || ((l > 0) &&
1206				     !(flags & IPN_TCPUDP)))
1207					goto badnat;
1208				/*
1209				 * map-block - Calculate destination address.
1210				 */
1211				in.s_addr = ntohl(ip->ip_src.s_addr);
1212				in.s_addr &= ntohl(~np->in_inmsk);
1213				inb.s_addr = in.s_addr;
1214				in.s_addr /= np->in_ippip;
1215				in.s_addr &= ntohl(~np->in_outmsk);
1216				in.s_addr += ntohl(np->in_outip);
1217				/*
1218				 * Calculate destination port.
1219				 */
1220				if ((flags & IPN_TCPUDP) &&
1221				    (np->in_ppip != 0)) {
1222					port = ntohs(sport) + l;
1223					port %= np->in_ppip;
1224					port += np->in_ppip *
1225						(inb.s_addr % np->in_ippip);
1226					port += MAPBLK_MINPORT;
1227					port = htons(port);
1228				}
1229			} else if (!np->in_outip &&
1230				   (np->in_outmsk == 0xffffffff)) {
1231				/*
1232				 * 0/32 - use the interface's IP address.
1233				 */
1234				if ((l > 0) ||
1235				    fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1236					goto badnat;
1237				in.s_addr = ntohl(in.s_addr);
1238			} else if (!np->in_outip && !np->in_outmsk) {
1239				/*
1240				 * 0/0 - use the original source address/port.
1241				 */
1242				if (l > 0)
1243					goto badnat;
1244				in.s_addr = ntohl(ip->ip_src.s_addr);
1245			} else if ((np->in_outmsk != 0xffffffff) &&
1246				   (np->in_pnext == 0) &&
1247				   ((l > 0) || (hm == NULL)))
1248				np->in_nip++;
1249			natl = NULL;
1250
1251			if ((nflags & IPN_TCPUDP) &&
1252			    ((np->in_redir & NAT_MAPBLK) == 0) &&
1253			    (np->in_flags & IPN_AUTOPORTMAP)) {
1254				if ((l > 0) && (l % np->in_ppip == 0)) {
1255					if (l > np->in_space) {
1256						goto badnat;
1257					} else if ((l > np->in_ppip) &&
1258						   np->in_outmsk != 0xffffffff)
1259						np->in_nip++;
1260				}
1261				if (np->in_ppip != 0) {
1262					port = ntohs(sport);
1263					port += (l % np->in_ppip);
1264					port %= np->in_ppip;
1265					port += np->in_ppip *
1266						(ntohl(ip->ip_src.s_addr) %
1267						 np->in_ippip);
1268					port += MAPBLK_MINPORT;
1269					port = htons(port);
1270				}
1271			} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1272				   (nflags & IPN_TCPUDP) &&
1273				   (np->in_pnext != 0)) {
1274				port = htons(np->in_pnext++);
1275				if (np->in_pnext > ntohs(np->in_pmax)) {
1276					np->in_pnext = ntohs(np->in_pmin);
1277					if (np->in_outmsk != 0xffffffff)
1278						np->in_nip++;
1279				}
1280			}
1281
1282			if (np->in_flags & IPN_IPRANGE) {
1283				if (np->in_nip > ntohl(np->in_outmsk))
1284					np->in_nip = ntohl(np->in_outip);
1285			} else {
1286				if ((np->in_outmsk != 0xffffffff) &&
1287				    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1288				    ntohl(np->in_outip))
1289					np->in_nip = ntohl(np->in_outip) + 1;
1290			}
1291
1292			if (!port && (flags & IPN_TCPUDP))
1293				port = sport;
1294
1295			/*
1296			 * Here we do a lookup of the connection as seen from
1297			 * the outside.  If an IP# pair already exists, try
1298			 * again.  So if you have A->B becomes C->B, you can
1299			 * also have D->E become C->E but not D->B causing
1300			 * another C->B.  Also take protocol and ports into
1301			 * account when determining whether a pre-existing
1302			 * NAT setup will cause an external conflict where
1303			 * this is appropriate.
1304			 */
1305			inb.s_addr = htonl(in.s_addr);
1306			natl = nat_inlookup(fin->fin_ifp, flags & ~FI_WILDP,
1307					    (u_int)ip->ip_p, ip->ip_dst, inb,
1308					    (port << 16) | dport, 1);
1309
1310			/*
1311			 * Has the search wrapped around and come back to the
1312			 * start ?
1313			 */
1314			if ((natl != NULL) &&
1315			    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1316			    (np->in_nip != 0) && (st_ip == np->in_nip))
1317				goto badnat;
1318			l++;
1319		} while (natl != NULL);
1320
1321		if (np->in_space > 0)
1322			np->in_space--;
1323
1324		/* Setup the NAT table */
1325		nat->nat_inip = ip->ip_src;
1326		nat->nat_outip.s_addr = htonl(in.s_addr);
1327		nat->nat_oip = ip->ip_dst;
1328		if (nat->nat_hm == NULL)
1329			nat->nat_hm = nat_hostmap(np, ip->ip_src,
1330						  nat->nat_outip);
1331
1332		sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)) + ntohs(sport);
1333		sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1334
1335		if (flags & IPN_TCPUDP) {
1336			nat->nat_inport = sport;
1337			nat->nat_outport = port;	/* sport */
1338			nat->nat_oport = dport;
1339		}
1340	} else {
1341		/*
1342		 * Otherwise, it's an inbound packet. Most likely, we don't
1343		 * want to rewrite source ports and source addresses. Instead,
1344		 * we want to rewrite to a fixed internal address and fixed
1345		 * internal port.
1346		 */
1347		if (np->in_flags & IPN_SPLIT) {
1348			in.s_addr = np->in_nip;
1349			if (np->in_inip == htonl(in.s_addr))
1350				np->in_nip = ntohl(np->in_inmsk);
1351			else {
1352				np->in_nip = ntohl(np->in_inip);
1353				if (np->in_flags & IPN_ROUNDR) {
1354					nat_delrdr(np);
1355					nat_addrdr(np);
1356				}
1357			}
1358		} else {
1359			in.s_addr = ntohl(np->in_inip);
1360			if (np->in_flags & IPN_ROUNDR) {
1361				nat_delrdr(np);
1362				nat_addrdr(np);
1363			}
1364		}
1365		if (!np->in_pnext)
1366			nport = dport;
1367		else {
1368			/*
1369			 * Whilst not optimized for the case where
1370			 * pmin == pmax, the gain is not significant.
1371			 */
1372			nport = ntohs(dport) - ntohs(np->in_pmin) +
1373				ntohs(np->in_pnext);
1374			nport = htons(nport);
1375		}
1376
1377		/*
1378		 * When the redirect-to address is set to 0.0.0.0, just
1379		 * assume a blank `forwarding' of the packet.  We don't
1380		 * setup any translation for this either.
1381		 */
1382		if (in.s_addr == 0) {
1383			if (nport == dport)
1384				goto badnat;
1385			in.s_addr = ntohl(ip->ip_dst.s_addr);
1386		}
1387
1388		nat->nat_inip.s_addr = htonl(in.s_addr);
1389		nat->nat_outip = ip->ip_dst;
1390		nat->nat_oip = ip->ip_src;
1391
1392		sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr)) + ntohs(dport);
1393		sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1394
1395		if (flags & IPN_TCPUDP) {
1396			nat->nat_inport = nport;
1397			nat->nat_outport = dport;
1398			nat->nat_oport = sport;
1399		}
1400	}
1401
1402	CALC_SUMD(sum1, sum2, sumd);
1403	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1404#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1405	if ((flags & IPN_TCPUDP) && dohwcksum &&
1406	    (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1407		if (direction == NAT_OUTBOUND)
1408			sum1 = LONG_SUM(ntohl(in.s_addr));
1409		else
1410			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
1411		sum1 += LONG_SUM(ntohl(ip->ip_dst.s_addr));
1412		sum1 += IPPROTO_TCP;
1413		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1414		nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1415	} else
1416#endif
1417		nat->nat_sumd[1] = nat->nat_sumd[0];
1418
1419	if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1420		if (direction == NAT_OUTBOUND)
1421			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
1422		else
1423			sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr));
1424
1425		sum2 = LONG_SUM(in.s_addr);
1426
1427		CALC_SUMD(sum1, sum2, sumd);
1428		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1429	} else
1430		nat->nat_ipsumd = nat->nat_sumd[0];
1431
1432	in.s_addr = htonl(in.s_addr);
1433
1434#ifdef  _KERNEL
1435	strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1436#endif
1437	nat_insert(nat);
1438
1439	nat->nat_dir = direction;
1440	nat->nat_ifp = fin->fin_ifp;
1441	nat->nat_ptr = np;
1442	nat->nat_p = ip->ip_p;
1443	nat->nat_bytes = 0;
1444	nat->nat_pkts = 0;
1445	nat->nat_fr = fin->fin_fr;
1446	if (nat->nat_fr != NULL) {
1447		ATOMIC_INC32(nat->nat_fr->fr_ref);
1448	}
1449	if (direction == NAT_OUTBOUND) {
1450		if (flags & IPN_TCPUDP)
1451			tcp->th_sport = port;
1452	} else {
1453		if (flags & IPN_TCPUDP)
1454			tcp->th_dport = nport;
1455	}
1456	np->in_use++;
1457#ifdef	IPFILTER_LOG
1458	nat_log(nat, (u_int)np->in_redir);
1459#endif
1460	return nat;
1461badnat:
1462	nat_stats.ns_badnat++;
1463	if ((hm = nat->nat_hm) != NULL)
1464		nat_hostmapdel(hm);
1465	KFREE(nat);
1466	return NULL;
1467}
1468
1469
1470void	nat_insert(nat)
1471nat_t	*nat;
1472{
1473	u_int hv1, hv2;
1474	nat_t **natp;
1475
1476	MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1477
1478	nat->nat_age = fr_defnatage;
1479	nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1480	if (nat->nat_ifname[0] !='\0') {
1481		nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1482	}
1483
1484	nat->nat_next = nat_instances;
1485	nat_instances = nat;
1486
1487	if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1488		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1489				  0xffffffff);
1490		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1491				  ipf_nattable_sz);
1492		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1493				  0xffffffff);
1494		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1495				 ipf_nattable_sz);
1496	} else {
1497		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
1498		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
1499		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
1500		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
1501	}
1502
1503	natp = &nat_table[0][hv1];
1504	if (*natp)
1505		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1506	nat->nat_phnext[0] = natp;
1507	nat->nat_hnext[0] = *natp;
1508	*natp = nat;
1509
1510	natp = &nat_table[1][hv2];
1511	if (*natp)
1512		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1513	nat->nat_phnext[1] = natp;
1514	nat->nat_hnext[1] = *natp;
1515	*natp = nat;
1516
1517	nat_stats.ns_added++;
1518	nat_stats.ns_inuse++;
1519}
1520
1521
1522nat_t *nat_icmplookup(ip, fin, dir)
1523ip_t *ip;
1524fr_info_t *fin;
1525int dir;
1526{
1527	icmphdr_t *icmp;
1528	tcphdr_t *tcp = NULL;
1529	ip_t *oip;
1530	int flags = 0, type, minlen;
1531
1532	icmp = (icmphdr_t *)fin->fin_dp;
1533	/*
1534	 * Does it at least have the return (basic) IP header ?
1535	 * Only a basic IP header (no options) should be with an ICMP error
1536	 * header.
1537	 */
1538	if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1539		return NULL;
1540	type = icmp->icmp_type;
1541	/*
1542	 * If it's not an error type, then return.
1543	 */
1544	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1545	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1546	    (type != ICMP_PARAMPROB))
1547		return NULL;
1548
1549	oip = (ip_t *)((char *)fin->fin_dp + 8);
1550	minlen = (oip->ip_hl << 2);
1551	if (minlen < sizeof(ip_t))
1552		return NULL;
1553	if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1554		return NULL;
1555	/*
1556	 * Is the buffer big enough for all of it ?  It's the size of the IP
1557	 * header claimed in the encapsulated part which is of concern.  It
1558	 * may be too big to be in this buffer but not so big that it's
1559	 * outside the ICMP packet, leading to TCP deref's causing problems.
1560	 * This is possible because we don't know how big oip_hl is when we
1561	 * do the pullup early in fr_check() and thus can't gaurantee it is
1562	 * all here now.
1563	 */
1564#ifdef  _KERNEL
1565	{
1566	mb_t *m;
1567
1568# if SOLARIS
1569	m = fin->fin_qfm;
1570	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1571		return NULL;
1572# else
1573	m = *(mb_t **)fin->fin_mp;
1574	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1575	    (char *)ip + m->m_len)
1576		return NULL;
1577# endif
1578	}
1579#endif
1580
1581	if (oip->ip_p == IPPROTO_TCP)
1582		flags = IPN_TCP;
1583	else if (oip->ip_p == IPPROTO_UDP)
1584		flags = IPN_UDP;
1585	if (flags & IPN_TCPUDP) {
1586		minlen += 8;		/* + 64bits of data to get ports */
1587		if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1588			return NULL;
1589		tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1590		if (dir == NAT_INBOUND)
1591			return nat_inlookup(fin->fin_ifp, flags,
1592				(u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
1593				(tcp->th_sport << 16) | tcp->th_dport, 0);
1594		else
1595			return nat_outlookup(fin->fin_ifp, flags,
1596				(u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
1597				(tcp->th_sport << 16) | tcp->th_dport, 0);
1598	}
1599	if (dir == NAT_INBOUND)
1600		return nat_inlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
1601			oip->ip_dst, oip->ip_src, 0, 0);
1602	else
1603		return nat_outlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
1604			oip->ip_dst, oip->ip_src, 0, 0);
1605}
1606
1607
1608/*
1609 * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1610 * packet gets correctly recognised.
1611 */
1612nat_t *nat_icmp(ip, fin, nflags, dir)
1613ip_t *ip;
1614fr_info_t *fin;
1615u_int *nflags;
1616int dir;
1617{
1618	u_32_t sum1, sum2, sumd, sumd2 = 0;
1619	struct in_addr in;
1620	icmphdr_t *icmp;
1621	udphdr_t *udp;
1622	nat_t *nat;
1623	ip_t *oip;
1624	int flags = 0;
1625
1626	if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1627		return NULL;
1628	/*
1629	 * nat_icmplookup() will return NULL for `defective' packets.
1630	 */
1631	if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1632		return NULL;
1633	*nflags = IPN_ICMPERR;
1634	icmp = (icmphdr_t *)fin->fin_dp;
1635	oip = (ip_t *)&icmp->icmp_ip;
1636	if (oip->ip_p == IPPROTO_TCP)
1637		flags = IPN_TCP;
1638	else if (oip->ip_p == IPPROTO_UDP)
1639		flags = IPN_UDP;
1640	udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1641	/*
1642	 * Need to adjust ICMP header to include the real IP#'s and
1643	 * port #'s.  Only apply a checksum change relative to the
1644	 * IP address change as it will be modified again in ip_natout
1645	 * for both address and port.  Two checksum changes are
1646	 * necessary for the two header address changes.  Be careful
1647	 * to only modify the checksum once for the port # and twice
1648	 * for the IP#.
1649	 */
1650
1651	/*
1652	 * Step 1
1653	 * Fix the IP addresses in the offending IP packet. You also need
1654	 * to adjust the IP header checksum of that offending IP packet
1655	 * and the ICMP checksum of the ICMP error message itself.
1656	 *
1657	 * Unfortunately, for UDP and TCP, the IP addresses are also contained
1658	 * in the pseudo header that is used to compute the UDP resp. TCP
1659	 * checksum. So, we must compensate that as well. Even worse, the
1660	 * change in the UDP and TCP checksums require yet another
1661	 * adjustment of the ICMP checksum of the ICMP error message.
1662	 *
1663	 * For the moment we forget about TCP, because that checksum is not
1664	 * in the first 8 bytes, so it will not be available in most cases.
1665	 */
1666
1667	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1668		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1669		in = nat->nat_inip;
1670		oip->ip_src = in;
1671	} else {
1672		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1673		in = nat->nat_outip;
1674		oip->ip_dst = in;
1675	}
1676
1677	sum2 = LONG_SUM(ntohl(in.s_addr));
1678
1679	CALC_SUMD(sum1, sum2, sumd);
1680
1681	if (nat->nat_dir == NAT_OUTBOUND) {
1682		/*
1683		 * Fix IP checksum of the offending IP packet to adjust for
1684		 * the change in the IP address.
1685		 *
1686		 * Normally, you would expect that the ICMP checksum of the
1687		 * ICMP error message needs to be adjusted as well for the
1688		 * IP address change in oip.
1689		 * However, this is a NOP, because the ICMP checksum is
1690		 * calculated over the complete ICMP packet, which includes the
1691		 * changed oip IP addresses and oip->ip_sum. However, these
1692		 * two changes cancel each other out (if the delta for
1693		 * the IP address is x, then the delta for ip_sum is minus x),
1694		 * so no change in the icmp_cksum is necessary.
1695		 *
1696		 * Be careful that nat_dir refers to the direction of the
1697		 * offending IP packet (oip), not to its ICMP response (icmp)
1698		 */
1699		fix_datacksum(&oip->ip_sum, sumd);
1700
1701		/*
1702		 * Fix UDP pseudo header checksum to compensate for the
1703		 * IP address change.
1704		 */
1705		if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1706			/*
1707			 * The UDP checksum is optional, only adjust it
1708			 * if it has been set.
1709			 */
1710			sum1 = ntohs(udp->uh_sum);
1711			fix_datacksum(&udp->uh_sum, sumd);
1712			sum2 = ntohs(udp->uh_sum);
1713
1714			/*
1715			 * Fix ICMP checksum to compensate the UDP
1716			 * checksum adjustment.
1717			 */
1718			CALC_SUMD(sum1, sum2, sumd);
1719			sumd2 = sumd;
1720		}
1721
1722#if 0
1723		/*
1724		 * Fix TCP pseudo header checksum to compensate for the
1725		 * IP address change. Before we can do the change, we
1726		 * must make sure that oip is sufficient large to hold
1727		 * the TCP checksum (normally it does not!).
1728		 */
1729		if (oip->ip_p == IPPROTO_TCP) {
1730
1731		}
1732#endif
1733	} else {
1734
1735		/*
1736		 * Fix IP checksum of the offending IP packet to adjust for
1737		 * the change in the IP address.
1738		 *
1739		 * Normally, you would expect that the ICMP checksum of the
1740		 * ICMP error message needs to be adjusted as well for the
1741		 * IP address change in oip.
1742		 * However, this is a NOP, because the ICMP checksum is
1743		 * calculated over the complete ICMP packet, which includes the
1744		 * changed oip IP addresses and oip->ip_sum. However, these
1745		 * two changes cancel each other out (if the delta for
1746		 * the IP address is x, then the delta for ip_sum is minus x),
1747		 * so no change in the icmp_cksum is necessary.
1748		 *
1749		 * Be careful that nat_dir refers to the direction of the
1750		 * offending IP packet (oip), not to its ICMP response (icmp)
1751		 */
1752		fix_datacksum(&oip->ip_sum, sumd);
1753
1754/* XXX FV : without having looked at Solaris source code, it seems unlikely
1755 * that SOLARIS would compensate this in the kernel (a body of an IP packet
1756 * in the data section of an ICMP packet). I have the feeling that this should
1757 * be unconditional, but I'm not in a position to check.
1758 */
1759#if !SOLARIS && !defined(__sgi)
1760		/*
1761		 * Fix UDP pseudo header checksum to compensate for the
1762		 * IP address change.
1763		 */
1764		if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1765			/*
1766			 * The UDP checksum is optional, only adjust it
1767			 * if it has been set
1768			 */
1769			sum1 = ntohs(udp->uh_sum);
1770			fix_datacksum(&udp->uh_sum, sumd);
1771			sum2 = ntohs(udp->uh_sum);
1772
1773			/*
1774			 * Fix ICMP checksum to compensate the UDP
1775			 * checksum adjustment.
1776			 */
1777			CALC_SUMD(sum1, sum2, sumd);
1778			sumd2 = sumd;
1779		}
1780
1781#if 0
1782		/*
1783		 * Fix TCP pseudo header checksum to compensate for the
1784		 * IP address change. Before we can do the change, we
1785		 * must make sure that oip is sufficient large to hold
1786		 * the TCP checksum (normally it does not!).
1787		 */
1788		if (oip->ip_p == IPPROTO_TCP) {
1789
1790		};
1791#endif
1792
1793#endif
1794	}
1795
1796	if ((flags & IPN_TCPUDP) != 0) {
1797		tcphdr_t *tcp;
1798
1799		/*
1800		 * XXX - what if this is bogus hl and we go off the end ?
1801		 * In this case, nat_icmpinlookup() will have returned NULL.
1802		 */
1803		tcp = (tcphdr_t *)udp;
1804
1805		/*
1806		 * Step 2 :
1807		 * For offending TCP/UDP IP packets, translate the ports as
1808		 * well, based on the NAT specification. Of course such
1809		 * a change must be reflected in the ICMP checksum as well.
1810		 *
1811		 * Advance notice : Now it becomes complicated :-)
1812		 *
1813		 * Since the port fields are part of the TCP/UDP checksum
1814		 * of the offending IP packet, you need to adjust that checksum
1815		 * as well... but, if you change, you must change the icmp
1816		 * checksum *again*, to reflect that change.
1817		 *
1818		 * To further complicate: the TCP checksum is not in the first
1819		 * 8 bytes of the offending ip packet, so it most likely is not
1820		 * available (we might have to fix that if the encounter a
1821		 * device that returns more than 8 data bytes on icmp error)
1822		 */
1823
1824		if (nat->nat_oport == tcp->th_dport) {
1825			if (tcp->th_sport != nat->nat_inport) {
1826				/*
1827				 * Fix ICMP checksum to compensate port
1828				 * adjustment.
1829				 */
1830				sum1 = ntohs(tcp->th_sport);
1831				sum2 = ntohs(nat->nat_inport);
1832				CALC_SUMD(sum1, sum2, sumd);
1833				sumd2 += sumd;
1834				tcp->th_sport = nat->nat_inport;
1835
1836				/*
1837				 * Fix udp checksum to compensate port
1838				 * adjustment.  NOTE : the offending IP packet
1839				 * flows the other direction compared to the
1840				 * ICMP message.
1841				 *
1842				 * The UDP checksum is optional, only adjust
1843				 * it if it has been set.
1844				 */
1845				if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1846
1847					sum1 = ntohs(udp->uh_sum);
1848					fix_datacksum(&udp->uh_sum, sumd);
1849					sum2 = ntohs(udp->uh_sum);
1850
1851					/*
1852					 * Fix ICMP checksum to
1853					 * compensate UDP checksum
1854					 * adjustment.
1855					 */
1856					CALC_SUMD(sum1, sum2, sumd);
1857					sumd2 += sumd;
1858				}
1859			}
1860		} else {
1861			if (tcp->th_dport != nat->nat_outport) {
1862				/*
1863				 * Fix ICMP checksum to compensate port
1864				 * adjustment.
1865				 */
1866				sum1 = ntohs(tcp->th_dport);
1867				sum2 = ntohs(nat->nat_outport);
1868				CALC_SUMD(sum1, sum2, sumd);
1869				sumd2 += sumd;
1870				tcp->th_dport = nat->nat_outport;
1871
1872				/*
1873				 * Fix udp checksum to compensate port
1874				 * adjustment.   NOTE : the offending IP
1875				 * packet flows the other direction compared
1876				 * to the ICMP message.
1877				 *
1878				 * The UDP checksum is optional, only adjust
1879				 * it if it has been set.
1880				 */
1881				if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1882
1883					sum1 = ntohs(udp->uh_sum);
1884					fix_datacksum(&udp->uh_sum, sumd);
1885					sum2 = ntohs(udp->uh_sum);
1886
1887					/*
1888					 * Fix ICMP checksum to compensate
1889					 * UDP checksum adjustment.
1890					 */
1891					CALC_SUMD(sum1, sum2, sumd);
1892					sumd2 += sumd;
1893				}
1894			}
1895		}
1896		if (sumd2) {
1897			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1898			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1899			if (nat->nat_dir == NAT_OUTBOUND) {
1900				fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
1901			} else {
1902				fix_incksum(fin, &icmp->icmp_cksum, sumd2);
1903			}
1904		}
1905	}
1906	if (oip->ip_p == IPPROTO_ICMP)
1907		nat->nat_age = fr_defnaticmpage;
1908	return nat;
1909}
1910
1911
1912/*
1913 * NB: these lookups don't lock access to the list, it assume it has already
1914 * been done!
1915 */
1916/*
1917 * Lookup a nat entry based on the mapped destination ip address/port and
1918 * real source address/port.  We use this lookup when receiving a packet,
1919 * we're looking for a table entry, based on the destination address.
1920 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1921 */
1922nat_t *nat_inlookup(ifp, flags, p, src, mapdst, ports, rw)
1923void *ifp;
1924register u_int flags, p;
1925struct in_addr src , mapdst;
1926u_32_t ports;
1927int rw;
1928{
1929	register u_short sport, dport;
1930	register nat_t *nat;
1931	register int nflags;
1932	register u_32_t dst;
1933	u_int hv;
1934
1935	dst = mapdst.s_addr;
1936	dport = ports >> 16;
1937	sport = ports & 0xffff;
1938	flags &= IPN_TCPUDP;
1939
1940	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
1941	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
1942	nat = nat_table[1][hv];
1943	for (; nat; nat = nat->nat_hnext[1]) {
1944		nflags = nat->nat_flags;
1945		if ((!ifp || ifp == nat->nat_ifp) &&
1946		    nat->nat_oip.s_addr == src.s_addr &&
1947		    nat->nat_outip.s_addr == dst &&
1948		    (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP)))
1949		     || (p == nat->nat_p)) && (!flags ||
1950		     (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
1951		      ((nat->nat_outport == dport) || (nflags & FI_W_SPORT)))))
1952			return nat;
1953	}
1954	if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP))
1955		return NULL;
1956	if (!rw) {
1957		RWLOCK_EXIT(&ipf_nat);
1958	}
1959	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
1960	hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
1961	if (!rw) {
1962		WRITE_ENTER(&ipf_nat);
1963	}
1964	nat = nat_table[1][hv];
1965	for (; nat; nat = nat->nat_hnext[1]) {
1966		nflags = nat->nat_flags;
1967		if (ifp && ifp != nat->nat_ifp)
1968			continue;
1969		if (!(nflags & IPN_TCPUDP))
1970			continue;
1971		if (!(nflags & FI_WILDP))
1972			continue;
1973		if (nat->nat_oip.s_addr != src.s_addr ||
1974		    nat->nat_outip.s_addr != dst)
1975			continue;
1976		if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
1977		    ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
1978			nat_tabmove(nat, ports);
1979			break;
1980		}
1981	}
1982	if (!rw) {
1983		MUTEX_DOWNGRADE(&ipf_nat);
1984	}
1985	return nat;
1986}
1987
1988
1989/*
1990 * This function is only called for TCP/UDP NAT table entries where the
1991 * original was placed in the table without hashing on the ports and we now
1992 * want to include hashing on port numbers.
1993 */
1994static void nat_tabmove(nat, ports)
1995nat_t *nat;
1996u_32_t ports;
1997{
1998	register u_short sport, dport;
1999	nat_t **natp;
2000	u_int hv;
2001
2002	dport = ports >> 16;
2003	sport = ports & 0xffff;
2004
2005	if (nat->nat_oport == dport) {
2006		nat->nat_inport = sport;
2007		nat->nat_outport = sport;
2008	}
2009
2010	/*
2011	 * Remove the NAT entry from the old location
2012	 */
2013	if (nat->nat_hnext[0])
2014		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2015	*nat->nat_phnext[0] = nat->nat_hnext[0];
2016
2017	if (nat->nat_hnext[1])
2018		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2019	*nat->nat_phnext[1] = nat->nat_hnext[1];
2020
2021	/*
2022	 * Add into the NAT table in the new position
2023	 */
2024	hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2025	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2026	natp = &nat_table[0][hv];
2027	if (*natp)
2028		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2029	nat->nat_phnext[0] = natp;
2030	nat->nat_hnext[0] = *natp;
2031	*natp = nat;
2032
2033	hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2034	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2035	natp = &nat_table[1][hv];
2036	if (*natp)
2037		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2038	nat->nat_phnext[1] = natp;
2039	nat->nat_hnext[1] = *natp;
2040	*natp = nat;
2041}
2042
2043
2044/*
2045 * Lookup a nat entry based on the source 'real' ip address/port and
2046 * destination address/port.  We use this lookup when sending a packet out,
2047 * we're looking for a table entry, based on the source address.
2048 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2049 */
2050nat_t *nat_outlookup(ifp, flags, p, src, dst, ports, rw)
2051void *ifp;
2052register u_int flags, p;
2053struct in_addr src , dst;
2054u_32_t ports;
2055int rw;
2056{
2057	register u_short sport, dport;
2058	register nat_t *nat;
2059	register int nflags;
2060	u_32_t srcip;
2061	u_int hv;
2062
2063	sport = ports & 0xffff;
2064	dport = ports >> 16;
2065	flags &= IPN_TCPUDP;
2066	srcip = src.s_addr;
2067
2068	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2069	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2070	nat = nat_table[0][hv];
2071	for (; nat; nat = nat->nat_hnext[0]) {
2072		nflags = nat->nat_flags;
2073
2074		if ((!ifp || ifp == nat->nat_ifp) &&
2075		    nat->nat_inip.s_addr == srcip &&
2076		    nat->nat_oip.s_addr == dst.s_addr &&
2077		    (((p == 0) && (flags == (nflags & IPN_TCPUDP)))
2078		     || (p == nat->nat_p)) && (!flags ||
2079		     ((nat->nat_inport == sport || nflags & FI_W_SPORT) &&
2080		      (nat->nat_oport == dport || nflags & FI_W_DPORT))))
2081			return nat;
2082	}
2083	if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP))
2084		return NULL;
2085	if (!rw) {
2086		RWLOCK_EXIT(&ipf_nat);
2087	}
2088	hv = NAT_HASH_FN(srcip, 0, ipf_nattable_sz);
2089	hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
2090	if (!rw) {
2091		WRITE_ENTER(&ipf_nat);
2092	}
2093	nat = nat_table[0][hv];
2094	for (; nat; nat = nat->nat_hnext[0]) {
2095		nflags = nat->nat_flags;
2096		if (ifp && ifp != nat->nat_ifp)
2097			continue;
2098		if (!(nflags & IPN_TCPUDP))
2099			continue;
2100		if (!(nflags & FI_WILDP))
2101			continue;
2102		if ((nat->nat_inip.s_addr != srcip) ||
2103		    (nat->nat_oip.s_addr != dst.s_addr))
2104			continue;
2105		if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2106		    ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2107			nat_tabmove(nat, ports);
2108			break;
2109		}
2110	}
2111	if (!rw) {
2112		MUTEX_DOWNGRADE(&ipf_nat);
2113	}
2114	return nat;
2115}
2116
2117
2118/*
2119 * Lookup the NAT tables to search for a matching redirect
2120 */
2121nat_t *nat_lookupredir(np)
2122register natlookup_t *np;
2123{
2124	u_32_t ports;
2125	nat_t *nat;
2126
2127	ports = (np->nl_outport << 16) | np->nl_inport;
2128	/*
2129	 * If nl_inip is non null, this is a lookup based on the real
2130	 * ip address. Else, we use the fake.
2131	 */
2132	if ((nat = nat_outlookup(NULL, np->nl_flags, 0, np->nl_inip,
2133				 np->nl_outip, ports, 0))) {
2134		np->nl_realip = nat->nat_outip;
2135		np->nl_realport = nat->nat_outport;
2136	}
2137	return nat;
2138}
2139
2140
2141static int nat_match(fin, np, ip)
2142fr_info_t *fin;
2143ipnat_t *np;
2144ip_t *ip;
2145{
2146	frtuc_t *ft;
2147
2148	if (ip->ip_v != 4)
2149		return 0;
2150
2151	if (np->in_p && ip->ip_p != np->in_p)
2152		return 0;
2153	if (fin->fin_out) {
2154		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2155			return 0;
2156		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2157		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
2158			return 0;
2159		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2160		    ^ ((np->in_flags & IPN_NOTDST) != 0))
2161			return 0;
2162	} else {
2163		if (!(np->in_redir & NAT_REDIRECT))
2164			return 0;
2165		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2166		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
2167			return 0;
2168		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2169		    ^ ((np->in_flags & IPN_NOTDST) != 0))
2170			return 0;
2171	}
2172
2173	ft = &np->in_tuc;
2174	if (!(fin->fin_fl & FI_TCPUDP) ||
2175	    (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2176		if (ft->ftu_scmp || ft->ftu_dcmp)
2177			return 0;
2178		return 1;
2179	}
2180
2181	return fr_tcpudpchk(ft, fin);
2182}
2183
2184
2185/*
2186 * Packets going out on the external interface go through this.
2187 * Here, the source address requires alteration, if anything.
2188 */
2189int ip_natout(ip, fin)
2190ip_t *ip;
2191fr_info_t *fin;
2192{
2193	register ipnat_t *np = NULL;
2194	register u_32_t ipa;
2195	tcphdr_t *tcp = NULL;
2196	u_short sport = 0, dport = 0, *csump = NULL;
2197	int natadd = 1, i, icmpset = 1;
2198	u_int nflags = 0, hv, msk;
2199	struct ifnet *ifp;
2200	frentry_t *fr;
2201	u_32_t iph;
2202	nat_t *nat;
2203
2204	if (nat_list == NULL || (fr_nat_lock))
2205		return 0;
2206
2207	if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2208	    fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1)
2209		ifp = fr->fr_tif.fd_ifp;
2210	else
2211		ifp = fin->fin_ifp;
2212
2213	if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2214		if (ip->ip_p == IPPROTO_TCP)
2215			nflags = IPN_TCP;
2216		else if (ip->ip_p == IPPROTO_UDP)
2217			nflags = IPN_UDP;
2218		if ((nflags & IPN_TCPUDP)) {
2219			tcp = (tcphdr_t *)fin->fin_dp;
2220			sport = tcp->th_sport;
2221			dport = tcp->th_dport;
2222		}
2223	}
2224
2225	ipa = ip->ip_src.s_addr;
2226
2227	READ_ENTER(&ipf_nat);
2228
2229	if ((ip->ip_p == IPPROTO_ICMP) &&
2230	    (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2231		icmpset = 1;
2232	else if ((fin->fin_fl & FI_FRAG) &&
2233	    (nat = ipfr_nat_knownfrag(ip, fin)))
2234		natadd = 0;
2235	else if ((nat = nat_outlookup(ifp, nflags, (u_int)ip->ip_p,
2236				      ip->ip_src, ip->ip_dst,
2237				      (dport << 16) | sport, 0))) {
2238		nflags = nat->nat_flags;
2239		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2240			if ((nflags & FI_W_SPORT) &&
2241			    (nat->nat_inport != sport))
2242				nat->nat_inport = sport;
2243			else if ((nflags & FI_W_DPORT) &&
2244				 (nat->nat_oport != dport))
2245				nat->nat_oport = dport;
2246			if (nat->nat_outport == 0)
2247				nat->nat_outport = sport;
2248			nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2249			nflags = nat->nat_flags;
2250			nat_stats.ns_wilds--;
2251		}
2252	} else {
2253		RWLOCK_EXIT(&ipf_nat);
2254		WRITE_ENTER(&ipf_nat);
2255		/*
2256		 * If there is no current entry in the nat table for this IP#,
2257		 * create one for it (if there is a matching rule).
2258		 */
2259		msk = 0xffffffff;
2260		i = 32;
2261maskloop:
2262		iph = ipa & htonl(msk);
2263		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2264		for (np = nat_rules[hv]; np; np = np->in_mnext)
2265		{
2266			if (np->in_ifp && (np->in_ifp != ifp))
2267				continue;
2268			if ((np->in_flags & IPN_RF) &&
2269			    !(np->in_flags & nflags))
2270				continue;
2271			if (np->in_flags & IPN_FILTER) {
2272				if (!nat_match(fin, np, ip))
2273					continue;
2274			} else if ((ipa & np->in_inmsk) != np->in_inip)
2275				continue;
2276			if (np->in_redir & (NAT_MAP|NAT_MAPBLK)) {
2277				if (*np->in_plabel && !appr_ok(ip, tcp, np))
2278					continue;
2279				/*
2280				 * If it's a redirection, then we don't want to
2281				 * create new outgoing port stuff.
2282				 * Redirections are only for incoming
2283				 * connections.
2284				 */
2285				if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2286					continue;
2287				if ((nat = nat_new(np, ip, fin, (u_int)nflags,
2288						    NAT_OUTBOUND))) {
2289					np->in_hits++;
2290					break;
2291				}
2292			}
2293		}
2294		if ((np == NULL) && (i > 0)) {
2295			do {
2296				i--;
2297				msk <<= 1;
2298			} while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2299			if (i >= 0)
2300				goto maskloop;
2301		}
2302		MUTEX_DOWNGRADE(&ipf_nat);
2303	}
2304
2305	/*
2306	 * NOTE: ipf_nat must now only be held as a read lock
2307	 */
2308	if (nat) {
2309		np = nat->nat_ptr;
2310		if (natadd && (fin->fin_fl & FI_FRAG) && np)
2311			ipfr_nat_newfrag(ip, fin, 0, nat);
2312		MUTEX_ENTER(&nat->nat_lock);
2313		nat->nat_age = fr_defnatage;
2314		nat->nat_bytes += ip->ip_len;
2315		nat->nat_pkts++;
2316		MUTEX_EXIT(&nat->nat_lock);
2317
2318		/*
2319		 * Fix up checksums, not by recalculating them, but
2320		 * simply computing adjustments.
2321		 */
2322		if (nflags == IPN_ICMPERR) {
2323			u_32_t s1, s2, sumd;
2324
2325			s1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
2326			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2327			CALC_SUMD(s1, s2, sumd);
2328
2329			if (nat->nat_dir == NAT_OUTBOUND)
2330				fix_incksum(fin, &ip->ip_sum, sumd);
2331			else
2332				fix_outcksum(fin, &ip->ip_sum, sumd);
2333		}
2334#if SOLARIS || defined(__sgi)
2335		else {
2336			if (nat->nat_dir == NAT_OUTBOUND)
2337				fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2338			else
2339				fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2340		}
2341#endif
2342		ip->ip_src = nat->nat_outip;
2343
2344		if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2345
2346			if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
2347				tcp->th_sport = nat->nat_outport;
2348				fin->fin_data[0] = ntohs(tcp->th_sport);
2349			}
2350
2351			if (ip->ip_p == IPPROTO_TCP) {
2352				csump = &tcp->th_sum;
2353				MUTEX_ENTER(&nat->nat_lock);
2354				fr_tcp_age(&nat->nat_age,
2355					   nat->nat_tcpstate, fin, 1);
2356				if (nat->nat_age < fr_defnaticmpage)
2357					nat->nat_age = fr_defnaticmpage;
2358#ifdef LARGE_NAT
2359				else if (nat->nat_age > fr_defnatage)
2360					nat->nat_age = fr_defnatage;
2361#endif
2362				/*
2363				 * Increase this because we may have
2364				 * "keep state" following this too and
2365				 * packet storms can occur if this is
2366				 * removed too quickly.
2367				 */
2368				if (nat->nat_age == fr_tcpclosed)
2369					nat->nat_age = fr_tcplastack;
2370				MUTEX_EXIT(&nat->nat_lock);
2371			} else if (ip->ip_p == IPPROTO_UDP) {
2372				udphdr_t *udp = (udphdr_t *)tcp;
2373
2374				if (udp->uh_sum)
2375					csump = &udp->uh_sum;
2376			} else if (ip->ip_p == IPPROTO_ICMP) {
2377				if (!icmpset)
2378					nat->nat_age = fr_defnaticmpage;
2379			}
2380
2381			if (csump) {
2382				if (nat->nat_dir == NAT_OUTBOUND)
2383					fix_outcksum(fin, csump, nat->nat_sumd[1]);
2384				else
2385					fix_incksum(fin, csump, nat->nat_sumd[1]);
2386			}
2387		}
2388
2389		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
2390		     (tcp != NULL && dport == np->in_dport))) {
2391			i = appr_check(ip, fin, nat);
2392			if (i == 0)
2393				i = 1;
2394		} else
2395			i = 1;
2396		ATOMIC_INCL(nat_stats.ns_mapped[1]);
2397		RWLOCK_EXIT(&ipf_nat);	/* READ */
2398		return i;
2399	}
2400	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
2401	return 0;
2402}
2403
2404
2405/*
2406 * Packets coming in from the external interface go through this.
2407 * Here, the destination address requires alteration, if anything.
2408 */
2409int ip_natin(ip, fin)
2410ip_t *ip;
2411fr_info_t *fin;
2412{
2413	register struct in_addr src;
2414	register struct in_addr in;
2415	register ipnat_t *np;
2416	u_short sport = 0, dport = 0, *csump = NULL;
2417	u_int nflags = 0, natadd = 1, hv, msk;
2418	struct ifnet *ifp = fin->fin_ifp;
2419	tcphdr_t *tcp = NULL;
2420	int i, icmpset = 0;
2421	nat_t *nat;
2422	u_32_t iph;
2423
2424	if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2425		return 0;
2426
2427	if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2428		if (ip->ip_p == IPPROTO_TCP)
2429			nflags = IPN_TCP;
2430		else if (ip->ip_p == IPPROTO_UDP)
2431			nflags = IPN_UDP;
2432		if ((nflags & IPN_TCPUDP)) {
2433			tcp = (tcphdr_t *)fin->fin_dp;
2434			dport = tcp->th_dport;
2435			sport = tcp->th_sport;
2436		}
2437	}
2438
2439	in = ip->ip_dst;
2440	/* make sure the source address is to be redirected */
2441	src = ip->ip_src;
2442
2443	READ_ENTER(&ipf_nat);
2444
2445	if ((ip->ip_p == IPPROTO_ICMP) &&
2446	    (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2447		icmpset = 1;
2448	else if ((fin->fin_fl & FI_FRAG) &&
2449		 (nat = ipfr_nat_knownfrag(ip, fin)))
2450		natadd = 0;
2451	else if ((nat = nat_inlookup(fin->fin_ifp, nflags, (u_int)ip->ip_p,
2452				     ip->ip_src, in, (dport << 16) | sport,
2453				     0))) {
2454		nflags = nat->nat_flags;
2455		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2456			if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2457				nat->nat_oport = sport;
2458			else if ((nat->nat_outport != dport) &&
2459				 (nflags & FI_W_SPORT))
2460				nat->nat_outport = dport;
2461			nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2462			nflags = nat->nat_flags;
2463			nat_stats.ns_wilds--;
2464		}
2465	} else {
2466		RWLOCK_EXIT(&ipf_nat);
2467		WRITE_ENTER(&ipf_nat);
2468		/*
2469		 * If there is no current entry in the nat table for this IP#,
2470		 * create one for it (if there is a matching rule).
2471		 */
2472		msk = 0xffffffff;
2473		i = 32;
2474maskloop:
2475		iph = in.s_addr & htonl(msk);
2476		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2477		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2478			if ((np->in_ifp && (np->in_ifp != ifp)) ||
2479			    (np->in_p && (np->in_p != ip->ip_p)) ||
2480			    (np->in_flags && !(nflags & np->in_flags)))
2481				continue;
2482			if (np->in_flags & IPN_FILTER) {
2483				if (!nat_match(fin, np, ip))
2484					continue;
2485			} else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2486				continue;
2487			if ((np->in_redir & NAT_REDIRECT) &&
2488			    (!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2489			     ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2490			      (ntohs(dport) >= ntohs(np->in_pmin)))))
2491				if ((nat = nat_new(np, ip, fin, nflags,
2492						    NAT_INBOUND))) {
2493					np->in_hits++;
2494					break;
2495				}
2496		}
2497
2498		if ((np == NULL) && (i > 0)) {
2499			do {
2500				i--;
2501				msk <<= 1;
2502			} while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2503			if (i >= 0)
2504				goto maskloop;
2505		}
2506		MUTEX_DOWNGRADE(&ipf_nat);
2507	}
2508
2509	/*
2510	 * NOTE: ipf_nat must now only be held as a read lock
2511	 */
2512	if (nat) {
2513		np = nat->nat_ptr;
2514		fin->fin_fr = nat->nat_fr;
2515		if (natadd && (fin->fin_fl & FI_FRAG) && np)
2516			ipfr_nat_newfrag(ip, fin, 0, nat);
2517		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
2518		    (tcp != NULL && sport == np->in_dport))) {
2519			i = appr_check(ip, fin, nat);
2520			if (i == -1) {
2521				RWLOCK_EXIT(&ipf_nat);
2522				return i;
2523			}
2524		}
2525
2526		MUTEX_ENTER(&nat->nat_lock);
2527		if (nflags != IPN_ICMPERR)
2528			nat->nat_age = fr_defnatage;
2529
2530		nat->nat_bytes += ip->ip_len;
2531		nat->nat_pkts++;
2532		MUTEX_EXIT(&nat->nat_lock);
2533		ip->ip_dst = nat->nat_inip;
2534		fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2535
2536		/*
2537		 * Fix up checksums, not by recalculating them, but
2538		 * simply computing adjustments.
2539		 */
2540#if SOLARIS || defined(__sgi)
2541		if (nat->nat_dir == NAT_OUTBOUND)
2542			fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2543		else
2544			fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2545#endif
2546		if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2547
2548			if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
2549				tcp->th_dport = nat->nat_inport;
2550				fin->fin_data[1] = ntohs(tcp->th_dport);
2551			}
2552
2553			if (ip->ip_p == IPPROTO_TCP) {
2554				csump = &tcp->th_sum;
2555				MUTEX_ENTER(&nat->nat_lock);
2556				fr_tcp_age(&nat->nat_age,
2557					   nat->nat_tcpstate, fin, 0);
2558				if (nat->nat_age < fr_defnaticmpage)
2559					nat->nat_age = fr_defnaticmpage;
2560#ifdef LARGE_NAT
2561				else if (nat->nat_age > fr_defnatage)
2562					nat->nat_age = fr_defnatage;
2563#endif
2564				/*
2565				 * Increase this because we may have
2566				 * "keep state" following this too and
2567				 * packet storms can occur if this is
2568				 * removed too quickly.
2569				 */
2570				if (nat->nat_age == fr_tcpclosed)
2571					nat->nat_age = fr_tcplastack;
2572				MUTEX_EXIT(&nat->nat_lock);
2573			} else if (ip->ip_p == IPPROTO_UDP) {
2574				udphdr_t *udp = (udphdr_t *)tcp;
2575
2576				if (udp->uh_sum)
2577					csump = &udp->uh_sum;
2578			} else if (ip->ip_p == IPPROTO_ICMP) {
2579				if (!icmpset)
2580					nat->nat_age = fr_defnaticmpage;
2581			}
2582
2583			if (csump) {
2584				if (nat->nat_dir == NAT_OUTBOUND)
2585					fix_incksum(fin, csump, nat->nat_sumd[0]);
2586				else
2587					fix_outcksum(fin, csump, nat->nat_sumd[0]);
2588			}
2589		}
2590		ATOMIC_INCL(nat_stats.ns_mapped[0]);
2591		RWLOCK_EXIT(&ipf_nat);			/* READ */
2592		return 1;
2593	}
2594	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
2595	return 0;
2596}
2597
2598
2599/*
2600 * Free all memory used by NAT structures allocated at runtime.
2601 */
2602void ip_natunload()
2603{
2604	WRITE_ENTER(&ipf_nat);
2605	(void) nat_clearlist();
2606	(void) nat_flushtable();
2607	RWLOCK_EXIT(&ipf_nat);
2608
2609	if (nat_table[0] != NULL) {
2610		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2611		nat_table[0] = NULL;
2612	}
2613	if (nat_table[1] != NULL) {
2614		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2615		nat_table[1] = NULL;
2616	}
2617	if (nat_rules != NULL) {
2618		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2619		nat_rules = NULL;
2620	}
2621	if (rdr_rules != NULL) {
2622		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2623		rdr_rules = NULL;
2624	}
2625	if (maptable != NULL) {
2626		KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2627		maptable = NULL;
2628	}
2629}
2630
2631
2632/*
2633 * Slowly expire held state for NAT entries.  Timeouts are set in
2634 * expectation of this being called twice per second.
2635 */
2636void ip_natexpire()
2637{
2638	register struct nat *nat, **natp;
2639#if defined(_KERNEL) && !SOLARIS
2640	int s;
2641#endif
2642
2643	SPL_NET(s);
2644	WRITE_ENTER(&ipf_nat);
2645	for (natp = &nat_instances; (nat = *natp); ) {
2646		nat->nat_age--;
2647		if (nat->nat_age) {
2648			natp = &nat->nat_next;
2649			continue;
2650		}
2651		*natp = nat->nat_next;
2652#ifdef	IPFILTER_LOG
2653		nat_log(nat, NL_EXPIRE);
2654#endif
2655		nat_delete(nat);
2656		nat_stats.ns_expire++;
2657	}
2658	RWLOCK_EXIT(&ipf_nat);
2659	SPL_X(s);
2660}
2661
2662
2663/*
2664 */
2665void ip_natsync(ifp)
2666void *ifp;
2667{
2668	register ipnat_t *n;
2669	register nat_t *nat;
2670	register u_32_t sum1, sum2, sumd;
2671	struct in_addr in;
2672	ipnat_t *np;
2673	void *ifp2;
2674#if defined(_KERNEL) && !SOLARIS
2675	int s;
2676#endif
2677
2678	/*
2679	 * Change IP addresses for NAT sessions for any protocol except TCP
2680	 * since it will break the TCP connection anyway.
2681	 */
2682	SPL_NET(s);
2683	WRITE_ENTER(&ipf_nat);
2684	for (nat = nat_instances; nat; nat = nat->nat_next)
2685		if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2686		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2687		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2688			ifp2 = nat->nat_ifp;
2689			/*
2690			 * Change the map-to address to be the same as the
2691			 * new one.
2692			 */
2693			sum1 = nat->nat_outip.s_addr;
2694			if (fr_ifpaddr(4, ifp2, &in) != -1)
2695				nat->nat_outip = in;
2696			sum2 = nat->nat_outip.s_addr;
2697
2698			if (sum1 == sum2)
2699				continue;
2700			/*
2701			 * Readjust the checksum adjustment to take into
2702			 * account the new IP#.
2703			 */
2704			CALC_SUMD(sum1, sum2, sumd);
2705			/* XXX - dont change for TCP when solaris does
2706			 * hardware checksumming.
2707			 */
2708			sumd += nat->nat_sumd[0];
2709			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2710			nat->nat_sumd[1] = nat->nat_sumd[0];
2711		}
2712
2713	for (n = nat_list; (n != NULL); n = n->in_next)
2714		if (n->in_ifp == ifp) {
2715			n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2716			if (!n->in_ifp)
2717				n->in_ifp = (void *)-1;
2718		}
2719	RWLOCK_EXIT(&ipf_nat);
2720	SPL_X(s);
2721}
2722
2723
2724#ifdef	IPFILTER_LOG
2725void nat_log(nat, type)
2726struct nat *nat;
2727u_int type;
2728{
2729	struct ipnat *np;
2730	struct natlog natl;
2731	void *items[1];
2732	size_t sizes[1];
2733	int rulen, types[1];
2734
2735	natl.nl_inip = nat->nat_inip;
2736	natl.nl_outip = nat->nat_outip;
2737	natl.nl_origip = nat->nat_oip;
2738	natl.nl_bytes = nat->nat_bytes;
2739	natl.nl_pkts = nat->nat_pkts;
2740	natl.nl_origport = nat->nat_oport;
2741	natl.nl_inport = nat->nat_inport;
2742	natl.nl_outport = nat->nat_outport;
2743	natl.nl_p = nat->nat_p;
2744	natl.nl_type = type;
2745	natl.nl_rule = -1;
2746#ifndef LARGE_NAT
2747	if (nat->nat_ptr != NULL) {
2748		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2749			if (np == nat->nat_ptr) {
2750				natl.nl_rule = rulen;
2751				break;
2752			}
2753	}
2754#endif
2755	items[0] = &natl;
2756	sizes[0] = sizeof(natl);
2757	types[0] = 0;
2758
2759	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2760}
2761#endif
2762