ip_nat.c revision 60853
1/*
2 * Copyright (C) 1995-2000 by Darren Reed.
3 *
4 * Redistribution and use in source and binary forms are permitted
5 * provided that this notice is preserved and due credit is given
6 * to the original author and the contributors.
7 *
8 * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
9 */
10#if !defined(lint)
11static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
12/*static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.2.2.12 2000/01/24 12:43:40 darrenr Exp $";*/
13static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 60853 2000-05-24 04:01:49Z darrenr $";
14#endif
15
16#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
17#define _KERNEL
18#endif
19
20#include <sys/errno.h>
21#include <sys/types.h>
22#include <sys/param.h>
23#include <sys/time.h>
24#include <sys/file.h>
25#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26    defined(_KERNEL)
27# include "opt_ipfilter_log.h"
28#endif
29#if !defined(_KERNEL) && !defined(KERNEL)
30# include <stdio.h>
31# include <string.h>
32# include <stdlib.h>
33#endif
34#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
35# include <sys/filio.h>
36# include <sys/fcntl.h>
37#else
38# include <sys/ioctl.h>
39#endif
40#include <sys/fcntl.h>
41#include <sys/uio.h>
42#ifndef linux
43# include <sys/protosw.h>
44#endif
45#include <sys/socket.h>
46#if defined(_KERNEL) && !defined(linux)
47# include <sys/systm.h>
48#endif
49#if !defined(__SVR4) && !defined(__svr4__)
50# ifndef linux
51#  include <sys/mbuf.h>
52# endif
53#else
54# include <sys/filio.h>
55# include <sys/byteorder.h>
56# ifdef _KERNEL
57#  include <sys/dditypes.h>
58# endif
59# include <sys/stream.h>
60# include <sys/kmem.h>
61#endif
62#if __FreeBSD_version >= 300000
63# include <sys/queue.h>
64#endif
65#include <net/if.h>
66#if __FreeBSD_version >= 300000
67# include <net/if_var.h>
68# if defined(_KERNEL) && !defined(IPFILTER_LKM)
69#  include "opt_ipfilter.h"
70# endif
71#endif
72#ifdef sun
73# include <net/af.h>
74#endif
75#include <net/route.h>
76#include <netinet/in.h>
77#include <netinet/in_systm.h>
78#include <netinet/ip.h>
79
80#ifdef __sgi
81# ifdef IFF_DRVRLOCK /* IRIX6 */
82#include <sys/hashing.h>
83#include <netinet/in_var.h>
84# endif
85#endif
86
87#ifdef RFC1825
88# include <vpn/md5.h>
89# include <vpn/ipsec.h>
90extern struct ifnet vpnif;
91#endif
92
93#ifndef linux
94# include <netinet/ip_var.h>
95#endif
96#include <netinet/tcp.h>
97#include <netinet/udp.h>
98#include <netinet/ip_icmp.h>
99#include "netinet/ip_compat.h"
100#include <netinet/tcpip.h>
101#include "netinet/ip_fil.h"
102#include "netinet/ip_proxy.h"
103#include "netinet/ip_nat.h"
104#include "netinet/ip_frag.h"
105#include "netinet/ip_state.h"
106#if (__FreeBSD_version >= 300000)
107# include <sys/malloc.h>
108#endif
109#ifndef	MIN
110# define	MIN(a,b)	(((a)<(b))?(a):(b))
111#endif
112#undef	SOCKADDR_IN
113#define	SOCKADDR_IN	struct sockaddr_in
114
115nat_t	**nat_table[2] = { NULL, NULL },
116	*nat_instances = NULL;
117ipnat_t	*nat_list = NULL;
118u_int	ipf_nattable_sz = NAT_TABLE_SZ;
119u_int	ipf_natrules_sz = NAT_SIZE;
120u_int	ipf_rdrrules_sz = RDR_SIZE;
121u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
122u_32_t	nat_masks = 0;
123u_32_t	rdr_masks = 0;
124ipnat_t	**nat_rules = NULL;
125ipnat_t	**rdr_rules = NULL;
126hostmap_t	**maptable  = NULL;
127
128u_long	fr_defnatage = DEF_NAT_AGE,
129	fr_defnaticmpage = 6;		/* 3 seconds */
130static natstat_t nat_stats;
131int	fr_nat_lock = 0;
132#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
133extern	kmutex_t	ipf_rw, ipf_hostmap;
134extern	KRWLOCK_T	ipf_nat;
135#endif
136
137static	int	nat_flushtable __P((void));
138static	int	nat_clearlist __P((void));
139static	void	nat_addnat __P((struct ipnat *));
140static	void	nat_addrdr __P((struct ipnat *));
141static	void	nat_delrdr __P((struct ipnat *));
142static	void	nat_delnat __P((struct ipnat *));
143static	int	fr_natgetent __P((caddr_t));
144static	int	fr_natgetsz __P((caddr_t));
145static	int	fr_natputent __P((caddr_t));
146static	int	nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
147static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
148				    struct in_addr));
149static	void	nat_hostmapdel __P((struct hostmap *));
150
151
152int nat_init()
153{
154	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
155	if (nat_table[0] != NULL)
156		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
157	else
158		return -1;
159
160	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
161	if (nat_table[1] != NULL)
162		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
163	else
164		return -1;
165
166	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
167	if (nat_rules != NULL)
168		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
169	else
170		return -1;
171
172	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
173	if (rdr_rules != NULL)
174		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
175	else
176		return -1;
177
178	KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
179	if (maptable != NULL)
180		bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
181	else
182		return -1;
183	return 0;
184}
185
186
187static void nat_addrdr(n)
188ipnat_t *n;
189{
190	ipnat_t **np;
191	u_32_t j;
192	u_int hv;
193	int k;
194
195	k = countbits(n->in_outmsk);
196	if ((k >= 0) && (k != 32))
197		rdr_masks |= 1 << k;
198	j = (n->in_outip & n->in_outmsk);
199	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
200	np = rdr_rules + hv;
201	while (*np != NULL)
202		np = &(*np)->in_rnext;
203	n->in_rnext = NULL;
204	n->in_prnext = np;
205	*np = n;
206}
207
208
209static void nat_addnat(n)
210ipnat_t *n;
211{
212	ipnat_t **np;
213	u_32_t j;
214	u_int hv;
215	int k;
216
217	k = countbits(n->in_inmsk);
218	if ((k >= 0) && (k != 32))
219		nat_masks |= 1 << k;
220	j = (n->in_inip & n->in_inmsk);
221	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
222	np = nat_rules + hv;
223	while (*np != NULL)
224		np = &(*np)->in_mnext;
225	n->in_mnext = NULL;
226	n->in_pmnext = np;
227	*np = n;
228}
229
230
231static void nat_delrdr(n)
232ipnat_t *n;
233{
234	if (n->in_rnext)
235		n->in_rnext->in_prnext = n->in_prnext;
236	*n->in_prnext = n->in_rnext;
237}
238
239
240static void nat_delnat(n)
241ipnat_t *n;
242{
243	if (n->in_mnext)
244		n->in_mnext->in_pmnext = n->in_pmnext;
245	*n->in_pmnext = n->in_mnext;
246}
247
248
249/*
250 * check if an ip address has already been allocated for a given mapping that
251 * is not doing port based translation.
252 */
253static struct hostmap *nat_hostmap(np, real, map)
254ipnat_t *np;
255struct in_addr real;
256struct in_addr map;
257{
258	hostmap_t *hm;
259	u_int hv;
260
261	MUTEX_ENTER(&ipf_hostmap);
262	hv = real.s_addr % HOSTMAP_SIZE;
263	for (hm = maptable[hv]; hm; hm = hm->hm_next)
264		if ((hm->hm_realip.s_addr == real.s_addr) &&
265		    (np == hm->hm_ipnat)) {
266			hm->hm_ref++;
267			MUTEX_EXIT(&ipf_hostmap);
268			return hm;
269		}
270
271	KMALLOC(hm, hostmap_t *);
272	if (hm) {
273		hm->hm_next = maptable[hv];
274		hm->hm_pnext = maptable + hv;
275		if (maptable[hv])
276			maptable[hv]->hm_pnext = &hm->hm_next;
277		maptable[hv] = hm;
278		hm->hm_ipnat = np;
279		hm->hm_realip = real;
280		hm->hm_mapip = map;
281		hm->hm_ref = 1;
282	}
283	MUTEX_EXIT(&ipf_hostmap);
284	return hm;
285}
286
287
288static void nat_hostmapdel(hm)
289struct hostmap *hm;
290{
291	MUTEX_ENTER(&ipf_hostmap);
292	ATOMIC_DEC32(hm->hm_ref);
293	if (hm->hm_ref == 0) {
294		if (hm->hm_next)
295			hm->hm_next->hm_pnext = hm->hm_pnext;
296		*hm->hm_pnext = hm->hm_next;
297		KFREE(hm);
298	}
299	MUTEX_EXIT(&ipf_hostmap);
300}
301
302
303void fix_outcksum(sp, n , len)
304u_short *sp;
305u_32_t n;
306int len;
307{
308	register u_short sumshort;
309	register u_32_t sum1;
310
311	if (!n)
312		return;
313#if SOLARIS2 >= 6
314	else if (n & NAT_HW_CKSUM) {
315		*sp = n & 0xffff;
316		return;
317	}
318#endif
319	sum1 = (~ntohs(*sp)) & 0xffff;
320	sum1 += (n);
321	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
322	/* Again */
323	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
324	sumshort = ~(u_short)sum1;
325	*(sp) = htons(sumshort);
326}
327
328
329void fix_incksum(sp, n , len)
330u_short *sp;
331u_32_t n;
332int len;
333{
334	register u_short sumshort;
335	register u_32_t sum1;
336
337	if (!n)
338		return;
339#if SOLARIS2 >= 6
340	else if (n & NAT_HW_CKSUM) {
341		*sp = n & 0xffff;
342		return;
343	}
344#endif
345#ifdef sparc
346	sum1 = (~(*sp)) & 0xffff;
347#else
348	sum1 = (~ntohs(*sp)) & 0xffff;
349#endif
350	sum1 += ~(n) & 0xffff;
351	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
352	/* Again */
353	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
354	sumshort = ~(u_short)sum1;
355	*(sp) = htons(sumshort);
356}
357
358
359/*
360 * How the NAT is organised and works.
361 *
362 * Inside (interface y) NAT       Outside (interface x)
363 * -------------------- -+- -------------------------------------
364 * Packet going          |   out, processsed by ip_natout() for x
365 * ------------>         |   ------------>
366 * src=10.1.1.1          |   src=192.1.1.1
367 *                       |
368 *                       |   in, processed by ip_natin() for x
369 * <------------         |   <------------
370 * dst=10.1.1.1          |   dst=192.1.1.1
371 * -------------------- -+- -------------------------------------
372 * ip_natout() - changes ip_src and if required, sport
373 *             - creates a new mapping, if required.
374 * ip_natin()  - changes ip_dst and if required, dport
375 *
376 * In the NAT table, internal source is recorded as "in" and externally
377 * seen as "out".
378 */
379
380/*
381 * Handle ioctls which manipulate the NAT.
382 */
383int nat_ioctl(data, cmd, mode)
384#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
385u_long cmd;
386#else
387int cmd;
388#endif
389caddr_t data;
390int mode;
391{
392	register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
393	int error = 0, ret, arg;
394	ipnat_t natd;
395	u_32_t i, j;
396
397#if (BSD >= 199306) && defined(_KERNEL)
398	if ((securelevel >= 2) && (mode & FWRITE))
399		return EPERM;
400#endif
401
402	nat = NULL;     /* XXX gcc -Wuninitialized */
403	KMALLOC(nt, ipnat_t *);
404	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT))
405		error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
406	else if (cmd == SIOCIPFFL)	/* SIOCFLNAT & SIOCCNATL */
407		error = IRCOPY(data, (char *)&arg, sizeof(arg));
408
409	if (error)
410		goto done;
411
412	/*
413	 * For add/delete, look to see if the NAT entry is already present
414	 */
415	WRITE_ENTER(&ipf_nat);
416	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
417		nat = &natd;
418		nat->in_flags &= IPN_USERFLAGS;
419		if ((nat->in_redir & NAT_MAPBLK) == 0) {
420			if ((nat->in_flags & IPN_SPLIT) == 0)
421				nat->in_inip &= nat->in_inmsk;
422			if ((nat->in_flags & IPN_IPRANGE) == 0)
423				nat->in_outip &= nat->in_outmsk;
424		}
425		for (np = &nat_list; (n = *np); np = &n->in_next)
426			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
427					IPN_CMPSIZ))
428				break;
429	}
430
431	switch (cmd)
432	{
433#ifdef  IPFILTER_LOG
434	case SIOCIPFFB :
435	{
436		int tmp;
437
438		if (!(mode & FWRITE))
439			error = EPERM;
440		else {
441			tmp = ipflog_clear(IPL_LOGNAT);
442			IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
443		}
444		break;
445	}
446#endif
447	case SIOCADNAT :
448		if (!(mode & FWRITE)) {
449			error = EPERM;
450			break;
451		}
452		if (n) {
453			error = EEXIST;
454			break;
455		}
456		if (nt == NULL) {
457			error = ENOMEM;
458			break;
459		}
460		n = nt;
461		nt = NULL;
462		bcopy((char *)nat, (char *)n, sizeof(*n));
463		n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
464		if (!n->in_ifp)
465			n->in_ifp = (void *)-1;
466		if (n->in_plabel[0] != '\0') {
467			n->in_apr = appr_match(n->in_p, n->in_plabel);
468			if (!n->in_apr) {
469				error = ENOENT;
470				break;
471			}
472		}
473		n->in_next = NULL;
474		*np = n;
475
476		if (n->in_redir & NAT_REDIRECT)
477			nat_addrdr(n);
478		if (n->in_redir & (NAT_MAP|NAT_MAPBLK))
479			nat_addnat(n);
480
481		n->in_use = 0;
482		if (n->in_redir & NAT_MAPBLK)
483			n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
484		else if (n->in_flags & IPN_AUTOPORTMAP)
485			n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
486		else if (n->in_flags & IPN_IPRANGE)
487			n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
488		else if (n->in_flags & IPN_SPLIT)
489			n->in_space = 2;
490		else
491			n->in_space = ~ntohl(n->in_outmsk);
492		/*
493		 * Calculate the number of valid IP addresses in the output
494		 * mapping range.  In all cases, the range is inclusive of
495		 * the start and ending IP addresses.
496		 * If to a CIDR address, lose 2: broadcast + network address
497		 *                               (so subtract 1)
498		 * If to a range, add one.
499		 * If to a single IP address, set to 1.
500		 */
501		if (n->in_space) {
502			if ((n->in_flags & IPN_IPRANGE) != 0)
503				n->in_space += 1;
504			else
505				n->in_space -= 1;
506		} else
507			n->in_space = 1;
508		if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
509		    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
510			n->in_nip = ntohl(n->in_outip) + 1;
511		else if ((n->in_flags & IPN_SPLIT) &&
512			 (n->in_redir & NAT_REDIRECT))
513			n->in_nip = ntohl(n->in_inip);
514		else
515			n->in_nip = ntohl(n->in_outip);
516		if (n->in_redir & NAT_MAP) {
517			n->in_pnext = ntohs(n->in_pmin);
518			/*
519			 * Multiply by the number of ports made available.
520			 */
521			if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
522				n->in_space *= (ntohs(n->in_pmax) -
523						ntohs(n->in_pmin) + 1);
524				/*
525				 * Because two different sources can map to
526				 * different destinations but use the same
527				 * local IP#/port #.
528				 * If the result is smaller than in_space, then
529				 * we may have wrapped around 32bits.
530				 */
531				i = n->in_inmsk;
532				if ((i != 0) && (i != 0xffffffff)) {
533					j = n->in_space * (~ntohl(i) + 1);
534					if (j >= n->in_space)
535						n->in_space = j;
536					else
537						n->in_space = 0xffffffff;
538				}
539			}
540			/*
541			 * If no protocol is specified, multiple by 256.
542			 */
543			if ((n->in_flags & IPN_TCPUDP) == 0) {
544					j = n->in_space * 256;
545					if (j >= n->in_space)
546						n->in_space = j;
547					else
548						n->in_space = 0xffffffff;
549			}
550		}
551		/* Otherwise, these fields are preset */
552		n = NULL;
553		nat_stats.ns_rules++;
554		break;
555	case SIOCRMNAT :
556		if (!(mode & FWRITE)) {
557			error = EPERM;
558			n = NULL;
559			break;
560		}
561		if (!n) {
562			error = ESRCH;
563			break;
564		}
565		if (n->in_redir & NAT_REDIRECT)
566			nat_delrdr(n);
567		if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
568			nat_delnat(n);
569		if (nat_list == NULL) {
570			nat_masks = 0;
571			rdr_masks = 0;
572		}
573		*np = n->in_next;
574		if (!n->in_use) {
575			if (n->in_apr)
576				appr_free(n->in_apr);
577			KFREE(n);
578			nat_stats.ns_rules--;
579		} else {
580			n->in_flags |= IPN_DELETE;
581			n->in_next = NULL;
582		}
583		n = NULL;
584		break;
585	case SIOCGNATS :
586		MUTEX_DOWNGRADE(&ipf_nat);
587		nat_stats.ns_table[0] = nat_table[0];
588		nat_stats.ns_table[1] = nat_table[1];
589		nat_stats.ns_list = nat_list;
590		nat_stats.ns_nattab_sz = ipf_nattable_sz;
591		nat_stats.ns_rultab_sz = ipf_natrules_sz;
592		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
593		nat_stats.ns_instances = nat_instances;
594		nat_stats.ns_apslist = ap_sess_list;
595		error = IWCOPYPTR((char *)&nat_stats, (char *)data,
596				  sizeof(nat_stats));
597		break;
598	case SIOCGNATL :
599	    {
600		natlookup_t nl;
601
602		MUTEX_DOWNGRADE(&ipf_nat);
603		error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
604		if (error)
605			break;
606
607		if (nat_lookupredir(&nl)) {
608			error = IWCOPYPTR((char *)&nl, (char *)data,
609					  sizeof(nl));
610		} else
611			error = ESRCH;
612		break;
613	    }
614	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
615		if (!(mode & FWRITE)) {
616			error = EPERM;
617			break;
618		}
619		error = 0;
620		if (arg == 0)
621			ret = nat_flushtable();
622		else if (arg == 1)
623			ret = nat_clearlist();
624		else
625			error = EINVAL;
626		MUTEX_DOWNGRADE(&ipf_nat);
627		if (!error) {
628			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
629			if (error)
630				error = EFAULT;
631		}
632		break;
633	case SIOCSTLCK :
634		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
635		if (!error) {
636			error = IWCOPY((caddr_t)&fr_nat_lock, data,
637					sizeof(fr_nat_lock));
638			if (!error)
639				fr_nat_lock = arg;
640		}
641		break;
642	case SIOCSTPUT :
643		if (fr_nat_lock)
644			error = fr_natputent(data);
645		else
646			error = EACCES;
647		break;
648	case SIOCSTGSZ :
649		if (fr_nat_lock)
650			error = fr_natgetsz(data);
651		else
652			error = EACCES;
653		break;
654	case SIOCSTGET :
655		if (fr_nat_lock)
656			error = fr_natgetent(data);
657		else
658			error = EACCES;
659		break;
660	case FIONREAD :
661#ifdef	IPFILTER_LOG
662		MUTEX_DOWNGRADE(&ipf_nat);
663		error = IWCOPY((caddr_t)&iplused[IPL_LOGNAT], (caddr_t)data,
664			       sizeof(iplused[IPL_LOGNAT]));
665#endif
666		break;
667	default :
668		error = EINVAL;
669		break;
670	}
671	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
672done:
673	if (nt)
674		KFREE(nt);
675	return error;
676}
677
678
679static int fr_natgetsz(data)
680caddr_t data;
681{
682	ap_session_t *aps;
683	nat_t *nat, *n;
684	int error = 0;
685	natget_t ng;
686
687	error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
688	if (error)
689		return EFAULT;
690
691	nat = ng.ng_ptr;
692	if (!nat) {
693		nat = nat_instances;
694		ng.ng_sz = 0;
695		if (nat == NULL) {
696			error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
697			if (error)
698				error = EFAULT;
699			return error;
700		}
701	} else {
702		/*
703		 * Make sure the pointer we're copying from exists in the
704		 * current list of entries.  Security precaution to prevent
705		 * copying of random kernel data.
706		 */
707		for (n = nat_instances; n; n = n->nat_next)
708			if (n == nat)
709				break;
710		if (!n)
711			return ESRCH;
712	}
713
714	ng.ng_sz = sizeof(nat_save_t);
715	aps = nat->nat_aps;
716	if ((aps != NULL) && (aps->aps_data != 0)) {
717		ng.ng_sz += sizeof(ap_session_t);
718		ng.ng_sz += aps->aps_psiz;
719	}
720
721	error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
722	if (error)
723		error = EFAULT;
724	return error;
725}
726
727
728static int fr_natgetent(data)
729caddr_t data;
730{
731	nat_save_t ipn, *ipnp, *ipnn;
732	register nat_t *n, *nat;
733	ap_session_t *aps;
734	int error;
735
736	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
737	if (error)
738		return EFAULT;
739	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
740	if (error)
741		return EFAULT;
742
743	nat = ipn.ipn_next;
744	if (!nat) {
745		nat = nat_instances;
746		if (nat == NULL) {
747			if (nat_instances == NULL)
748				return ENOENT;
749			return 0;
750		}
751	} else {
752		/*
753		 * Make sure the pointer we're copying from exists in the
754		 * current list of entries.  Security precaution to prevent
755		 * copying of random kernel data.
756		 */
757		for (n = nat_instances; n; n = n->nat_next)
758			if (n == nat)
759				break;
760		if (!n)
761			return ESRCH;
762	}
763
764	ipn.ipn_next = nat->nat_next;
765	ipn.ipn_dsize = 0;
766	bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
767	ipn.ipn_nat.nat_data = NULL;
768
769	if (nat->nat_ptr) {
770		bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
771		      sizeof(ipn.ipn_ipnat));
772	}
773
774	if (nat->nat_fr)
775		bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
776		      sizeof(ipn.ipn_rule));
777
778	if ((aps = nat->nat_aps)) {
779		ipn.ipn_dsize = sizeof(*aps);
780		if (aps->aps_data)
781			ipn.ipn_dsize += aps->aps_psiz;
782		KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
783		if (ipnn == NULL)
784			return NULL;
785		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
786
787		bcopy((char *)aps, ipn.ipn_data, sizeof(*aps));
788		if (aps->aps_data) {
789			bcopy(aps->aps_data, ipn.ipn_data + sizeof(*aps),
790			      aps->aps_psiz);
791			ipn.ipn_dsize += aps->aps_psiz;
792		}
793		error = IWCOPY((caddr_t)ipnn, ipnp,
794			       sizeof(ipn) + ipn.ipn_dsize);
795		if (error)
796			return EFAULT;
797		KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
798	} else {
799		error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
800		if (error)
801			return EFAULT;
802	}
803	return 0;
804}
805
806
807static int fr_natputent(data)
808caddr_t data;
809{
810	nat_save_t ipn, *ipnp, *ipnn;
811	register nat_t *n, *nat;
812	ap_session_t *aps;
813	frentry_t *fr;
814	ipnat_t *in;
815
816	int error;
817
818	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
819	if (error)
820		return EFAULT;
821	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
822	if (error)
823		return EFAULT;
824	if (ipn.ipn_dsize) {
825		KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
826		if (ipnn == NULL)
827			return ENOMEM;
828		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
829		error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
830			       ipn.ipn_dsize);
831		if (error)
832			return EFAULT;
833	} else
834		ipnn = NULL;
835
836	KMALLOC(nat, nat_t *);
837	if (nat == NULL)
838		return ENOMEM;
839
840	bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
841	/*
842	 * Initialize all these so that nat_delete() doesn't cause a crash.
843	 */
844	nat->nat_hstart[0] = NULL;
845	nat->nat_hstart[1] = NULL;
846	fr = nat->nat_fr;
847	nat->nat_fr = NULL;
848	aps = nat->nat_aps;
849	nat->nat_aps = NULL;
850	in = nat->nat_ptr;
851	nat->nat_ptr = NULL;
852	nat->nat_data = NULL;
853
854	/*
855	 * Restore the rule associated with this nat session
856	 */
857	if (in) {
858		KMALLOC(in, ipnat_t *);
859		if (in == NULL) {
860			error = ENOMEM;
861			goto junkput;
862		}
863		nat->nat_ptr = in;
864		bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
865		in->in_use = 1;
866		in->in_flags |= IPN_DELETE;
867		in->in_next = NULL;
868		in->in_rnext = NULL;
869		in->in_prnext = NULL;
870		in->in_mnext = NULL;
871		in->in_pmnext = NULL;
872		in->in_ifp = GETUNIT(in->in_ifname, 4);
873		if (in->in_plabel[0] != '\0') {
874			in->in_apr = appr_match(in->in_p, in->in_plabel);
875		}
876	}
877
878	/*
879	 * Restore ap_session_t structure.  Include the private data allocated
880	 * if it was there.
881	 */
882	if (aps) {
883		KMALLOC(aps, ap_session_t *);
884		if (aps == NULL) {
885			error = ENOMEM;
886			goto junkput;
887		}
888		nat->nat_aps = aps;
889		aps->aps_next = ap_sess_list;
890		ap_sess_list = aps;
891		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
892		if (in)
893			aps->aps_apr = in->in_apr;
894		if (aps->aps_psiz) {
895			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
896			if (aps->aps_data == NULL) {
897				error = ENOMEM;
898				goto junkput;
899			}
900			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
901			      aps->aps_psiz);
902		} else {
903			aps->aps_psiz = 0;
904			aps->aps_data = NULL;
905		}
906	}
907
908	/*
909	 * If there was a filtering rule associated with this entry then
910	 * build up a new one.
911	 */
912	if (fr != NULL) {
913		if (nat->nat_flags & FI_NEWFR) {
914			KMALLOC(fr, frentry_t *);
915			nat->nat_fr = fr;
916			if (fr == NULL) {
917				error = ENOMEM;
918				goto junkput;
919			}
920			bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
921			ipn.ipn_nat.nat_fr = fr;
922			error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
923			if (error) {
924				error = EFAULT;
925				goto junkput;
926			}
927		} else {
928			for (n = nat_instances; n; n = n->nat_next)
929				if (n->nat_fr == fr)
930					break;
931			if (!n) {
932				error = ESRCH;
933				goto junkput;
934			}
935		}
936	}
937
938	if (ipnn)
939		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
940	nat_insert(nat);
941	return 0;
942junkput:
943	if (ipnn)
944		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
945	if (nat)
946		nat_delete(nat);
947	return error;
948}
949
950
951/*
952 * Delete a nat entry from the various lists and table.
953 */
954static void nat_delete(natd)
955struct nat *natd;
956{
957	register struct nat **natp, *nat;
958	struct ipnat *ipn;
959
960	for (natp = natd->nat_hstart[0]; natp && (nat = *natp);
961	     natp = &nat->nat_hnext[0])
962		if (nat == natd) {
963			*natp = nat->nat_hnext[0];
964			break;
965		}
966
967	for (natp = natd->nat_hstart[1]; natp && (nat = *natp);
968	     natp = &nat->nat_hnext[1])
969		if (nat == natd) {
970			*natp = nat->nat_hnext[1];
971			break;
972		}
973
974	if (natd->nat_fr != NULL) {
975		ATOMIC_DEC32(natd->nat_fr->fr_ref);
976	}
977
978	if (natd->nat_hm != NULL)
979		nat_hostmapdel(natd->nat_hm);
980
981	/*
982	 * If there is an active reference from the nat entry to its parent
983	 * rule, decrement the rule's reference count and free it too if no
984	 * longer being used.
985	 */
986	ipn = natd->nat_ptr;
987	if (ipn != NULL) {
988		ipn->in_space++;
989		ipn->in_use--;
990		if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
991			if (ipn->in_apr)
992				appr_free(ipn->in_apr);
993			KFREE(ipn);
994			nat_stats.ns_rules--;
995		}
996	}
997
998	MUTEX_DESTROY(&natd->nat_lock);
999	/*
1000	 * If there's a fragment table entry too for this nat entry, then
1001	 * dereference that as well.
1002	 */
1003	ipfr_forget((void *)natd);
1004	aps_free(natd->nat_aps);
1005	nat_stats.ns_inuse--;
1006	KFREE(natd);
1007}
1008
1009
1010/*
1011 * nat_flushtable - clear the NAT table of all mapping entries.
1012 */
1013static int nat_flushtable()
1014{
1015	register nat_t *nat, **natp;
1016	register int j = 0;
1017
1018	/*
1019	 * ALL NAT mappings deleted, so lets just make the deletions
1020	 * quicker.
1021	 */
1022	if (nat_table[0] != NULL)
1023		bzero((char *)nat_table[0],
1024		      sizeof(nat_table[0]) * ipf_nattable_sz);
1025	if (nat_table[1] != NULL)
1026		bzero((char *)nat_table[1],
1027		      sizeof(nat_table[1]) * ipf_nattable_sz);
1028
1029	for (natp = &nat_instances; (nat = *natp); ) {
1030		*natp = nat->nat_next;
1031		nat_delete(nat);
1032		j++;
1033	}
1034	nat_stats.ns_inuse = 0;
1035	return j;
1036}
1037
1038
1039/*
1040 * nat_clearlist - delete all rules in the active NAT mapping list.
1041 */
1042static int nat_clearlist()
1043{
1044	register ipnat_t *n, **np = &nat_list;
1045	int i = 0;
1046
1047	if (nat_rules != NULL)
1048		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1049	if (rdr_rules != NULL)
1050		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1051
1052	while ((n = *np)) {
1053		*np = n->in_next;
1054		if (!n->in_use) {
1055			if (n->in_apr)
1056				appr_free(n->in_apr);
1057			KFREE(n);
1058			nat_stats.ns_rules--;
1059		} else {
1060			n->in_flags |= IPN_DELETE;
1061			n->in_next = NULL;
1062		}
1063		i++;
1064	}
1065	nat_masks = 0;
1066	rdr_masks = 0;
1067	return i;
1068}
1069
1070
1071/*
1072 * Create a new NAT table entry.
1073 * NOTE: assumes write lock on ipf_nat has been obtained already.
1074 */
1075nat_t *nat_new(np, ip, fin, flags, direction)
1076ipnat_t *np;
1077ip_t *ip;
1078fr_info_t *fin;
1079u_int flags;
1080int direction;
1081{
1082	register u_32_t sum1, sum2, sumd, l;
1083	u_short port = 0, sport = 0, dport = 0, nport = 0;
1084	struct in_addr in, inb;
1085	tcphdr_t *tcp = NULL;
1086	hostmap_t *hm = NULL;
1087	nat_t *nat, *natl;
1088	u_short nflags;
1089#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1090	qif_t *qf = fin->fin_qif;
1091#endif
1092
1093	nflags = flags & np->in_flags;
1094	if (flags & IPN_TCPUDP) {
1095		tcp = (tcphdr_t *)fin->fin_dp;
1096		sport = tcp->th_sport;
1097		dport = tcp->th_dport;
1098	}
1099
1100	/* Give me a new nat */
1101	KMALLOC(nat, nat_t *);
1102	if (nat == NULL) {
1103		nat_stats.ns_memfail++;
1104		return NULL;
1105	}
1106
1107	bzero((char *)nat, sizeof(*nat));
1108	nat->nat_flags = flags;
1109	/*
1110	 * Search the current table for a match.
1111	 */
1112	if (direction == NAT_OUTBOUND) {
1113		/*
1114		 * Values at which the search for a free resouce starts.
1115		 */
1116		u_32_t st_ip;
1117		u_short st_port;
1118
1119		/*
1120		 * If it's an outbound packet which doesn't match any existing
1121		 * record, then create a new port
1122		 */
1123		l = 0;
1124		st_ip = np->in_nip;
1125		st_port = np->in_pnext;
1126
1127		do {
1128			port = 0;
1129			in.s_addr = htonl(np->in_nip);
1130			if (l == 0) {
1131				/*
1132				 * Check to see if there is an existing NAT
1133				 * setup for this IP address pair.
1134				 */
1135				hm = nat_hostmap(np, ip->ip_src, in);
1136				if (hm != NULL)
1137					in.s_addr = hm->hm_mapip.s_addr;
1138			} else if ((l == 1) && (hm != NULL)) {
1139				nat_hostmapdel(hm);
1140				hm = NULL;
1141			}
1142			in.s_addr = ntohl(in.s_addr);
1143
1144			nat->nat_hm = hm;
1145
1146			if ((np->in_outmsk == 0xffffffff) &&
1147			    (np->in_pnext == 0)) {
1148				if (l > 0)
1149					goto badnat;
1150			}
1151
1152			if (np->in_redir & NAT_MAPBLK) {
1153				if ((l >= np->in_ppip) || ((l > 0) &&
1154				     !(flags & IPN_TCPUDP)))
1155					goto badnat;
1156				/*
1157				 * map-block - Calculate destination address.
1158				 */
1159				in.s_addr = ntohl(ip->ip_src.s_addr);
1160				in.s_addr &= ntohl(~np->in_inmsk);
1161				inb.s_addr = in.s_addr;
1162				in.s_addr /= np->in_ippip;
1163				in.s_addr &= ntohl(~np->in_outmsk);
1164				in.s_addr += ntohl(np->in_outip);
1165				/*
1166				 * Calculate destination port.
1167				 */
1168				if ((flags & IPN_TCPUDP) &&
1169				    (np->in_ppip != 0)) {
1170					port = ntohs(sport) + l;
1171					port %= np->in_ppip;
1172					port += np->in_ppip *
1173						(inb.s_addr % np->in_ippip);
1174					port += MAPBLK_MINPORT;
1175					port = htons(port);
1176				}
1177			} else if (!np->in_outip &&
1178				   (np->in_outmsk == 0xffffffff)) {
1179				/*
1180				 * 0/32 - use the interface's IP address.
1181				 */
1182				if ((l > 0) ||
1183				    fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1184					goto badnat;
1185				in.s_addr = ntohl(in.s_addr);
1186			} else if (!np->in_outip && !np->in_outmsk) {
1187				/*
1188				 * 0/0 - use the original source address/port.
1189				 */
1190				if (l > 0)
1191					goto badnat;
1192				in.s_addr = ntohl(ip->ip_src.s_addr);
1193			} else if ((np->in_outmsk != 0xffffffff) &&
1194				   (np->in_pnext == 0) &&
1195				   ((l > 0) || (hm == NULL)))
1196				np->in_nip++;
1197			natl = NULL;
1198
1199			if ((nflags & IPN_TCPUDP) &&
1200			    ((np->in_redir & NAT_MAPBLK) == 0) &&
1201			    (np->in_flags & IPN_AUTOPORTMAP)) {
1202				if ((l > 0) && (l % np->in_ppip == 0)) {
1203					if (l > np->in_space) {
1204						goto badnat;
1205					} else if ((l > np->in_ppip) &&
1206						   np->in_outmsk != 0xffffffff)
1207						np->in_nip++;
1208				}
1209				if (np->in_ppip != 0) {
1210					port = ntohs(sport);
1211					port += (l % np->in_ppip);
1212					port %= np->in_ppip;
1213					port += np->in_ppip *
1214						(ntohl(ip->ip_src.s_addr) %
1215						 np->in_ippip);
1216					port += MAPBLK_MINPORT;
1217					port = htons(port);
1218				}
1219			} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1220				   (nflags & IPN_TCPUDP) &&
1221				   (np->in_pnext != 0)) {
1222				port = htons(np->in_pnext++);
1223				if (np->in_pnext > ntohs(np->in_pmax)) {
1224					np->in_pnext = ntohs(np->in_pmin);
1225					if (np->in_outmsk != 0xffffffff)
1226						np->in_nip++;
1227				}
1228			}
1229
1230			if (np->in_flags & IPN_IPRANGE) {
1231				if (np->in_nip > ntohl(np->in_outmsk))
1232					np->in_nip = ntohl(np->in_outip);
1233			} else {
1234				if ((np->in_outmsk != 0xffffffff) &&
1235				    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1236				    ntohl(np->in_outip))
1237					np->in_nip = ntohl(np->in_outip) + 1;
1238			}
1239
1240			if (!port && (flags & IPN_TCPUDP))
1241				port = sport;
1242
1243			/*
1244			 * Here we do a lookup of the connection as seen from
1245			 * the outside.  If an IP# pair already exists, try
1246			 * again.  So if you have A->B becomes C->B, you can
1247			 * also have D->E become C->E but not D->B causing
1248			 * another C->B.  Also take protocol and ports into
1249			 * account when determining whether a pre-existing
1250			 * NAT setup will cause an external conflict where
1251			 * this is appropriate.
1252			 */
1253			inb.s_addr = htonl(in.s_addr);
1254			natl = nat_inlookup(fin->fin_ifp, flags & ~FI_WILDP,
1255					    (u_int)ip->ip_p, ip->ip_dst, inb,
1256					    (port << 16) | dport);
1257
1258			/*
1259			 * Has the search wrapped around and come back to the
1260			 * start ?
1261			 */
1262			if ((natl != NULL) &&
1263			    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1264			    (np->in_nip != 0) && (st_ip == np->in_nip))
1265				goto badnat;
1266			l++;
1267		} while (natl != NULL);
1268
1269		if (np->in_space > 0)
1270			np->in_space--;
1271
1272		/* Setup the NAT table */
1273		nat->nat_inip = ip->ip_src;
1274		nat->nat_outip.s_addr = htonl(in.s_addr);
1275		nat->nat_oip = ip->ip_dst;
1276		if (nat->nat_hm == NULL)
1277			nat->nat_hm = nat_hostmap(np, ip->ip_src,
1278						  nat->nat_outip);
1279
1280		sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)) + ntohs(sport);
1281		sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1282
1283		if (flags & IPN_TCPUDP) {
1284			nat->nat_inport = sport;
1285			nat->nat_outport = port;	/* sport */
1286			nat->nat_oport = dport;
1287		}
1288	} else {
1289		/*
1290		 * Otherwise, it's an inbound packet. Most likely, we don't
1291		 * want to rewrite source ports and source addresses. Instead,
1292		 * we want to rewrite to a fixed internal address and fixed
1293		 * internal port.
1294		 */
1295		if (np->in_flags & IPN_SPLIT) {
1296			in.s_addr = np->in_nip;
1297			if (np->in_inip == htonl(in.s_addr))
1298				np->in_nip = ntohl(np->in_inmsk);
1299			else {
1300				np->in_nip = ntohl(np->in_inip);
1301				if (np->in_flags & IPN_ROUNDR) {
1302					nat_delrdr(np);
1303					nat_addrdr(np);
1304				}
1305			}
1306		} else {
1307			in.s_addr = ntohl(np->in_inip);
1308			if (np->in_flags & IPN_ROUNDR) {
1309				nat_delrdr(np);
1310				nat_addrdr(np);
1311			}
1312		}
1313		if (!np->in_pnext)
1314			nport = dport;
1315		else {
1316			/*
1317			 * Whilst not optimized for the case where
1318			 * pmin == pmax, the gain is not significant.
1319			 */
1320			nport = ntohs(dport) - ntohs(np->in_pmin) +
1321				ntohs(np->in_pnext);
1322			nport = htons(nport);
1323		}
1324
1325		/*
1326		 * When the redirect-to address is set to 0.0.0.0, just
1327		 * assume a blank `forwarding' of the packet.  We don't
1328		 * setup any translation for this either.
1329		 */
1330		if (in.s_addr == 0) {
1331			if (nport == dport)
1332				goto badnat;
1333			in.s_addr = ntohl(ip->ip_dst.s_addr);
1334		}
1335
1336		nat->nat_inip.s_addr = htonl(in.s_addr);
1337		nat->nat_outip = ip->ip_dst;
1338		nat->nat_oip = ip->ip_src;
1339
1340		sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr)) + ntohs(dport);
1341		sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1342
1343		if (flags & IPN_TCPUDP) {
1344			nat->nat_inport = nport;
1345			nat->nat_outport = dport;
1346			nat->nat_oport = sport;
1347		}
1348	}
1349
1350	CALC_SUMD(sum1, sum2, sumd);
1351	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1352#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1353	if ((flags == IPN_TCP) && dohwcksum &&
1354	    (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1355		if (direction == NAT_OUTBOUND)
1356			sum1 = LONG_SUM(ntohl(in.s_addr));
1357		else
1358			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
1359		sum1 += LONG_SUM(ntohl(ip->ip_dst.s_addr));
1360		sum1 += 30;
1361		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1362		nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1363	} else
1364#endif
1365		nat->nat_sumd[1] = nat->nat_sumd[0];
1366
1367	if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1368		if (direction == NAT_OUTBOUND)
1369			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
1370		else
1371			sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr));
1372
1373		sum2 = LONG_SUM(in.s_addr);
1374
1375		CALC_SUMD(sum1, sum2, sumd);
1376		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1377	} else
1378		nat->nat_ipsumd = nat->nat_sumd[0];
1379
1380	in.s_addr = htonl(in.s_addr);
1381
1382#ifdef  _KERNEL
1383	strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1384#endif
1385	nat_insert(nat);
1386
1387	nat->nat_dir = direction;
1388	nat->nat_ifp = fin->fin_ifp;
1389	nat->nat_ptr = np;
1390	nat->nat_p = ip->ip_p;
1391	nat->nat_bytes = 0;
1392	nat->nat_pkts = 0;
1393	nat->nat_fr = fin->fin_fr;
1394	if (nat->nat_fr != NULL) {
1395		ATOMIC_INC32(nat->nat_fr->fr_ref);
1396	}
1397	if (direction == NAT_OUTBOUND) {
1398		if (flags & IPN_TCPUDP)
1399			tcp->th_sport = port;
1400	} else {
1401		if (flags & IPN_TCPUDP)
1402			tcp->th_dport = nport;
1403	}
1404	np->in_use++;
1405	return nat;
1406badnat:
1407	nat_stats.ns_badnat++;
1408	if ((hm = nat->nat_hm) != NULL)
1409		nat_hostmapdel(hm);
1410	KFREE(nat);
1411	return NULL;
1412}
1413
1414
1415void	nat_insert(nat)
1416nat_t	*nat;
1417{
1418	nat_t **natp;
1419	u_int hv;
1420
1421	MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1422
1423	nat->nat_age = fr_defnatage;
1424	nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1425	if (nat->nat_ifname[0] !='\0') {
1426		nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1427	}
1428
1429	nat->nat_next = nat_instances;
1430	nat_instances = nat;
1431	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1432			 ipf_nattable_sz);
1433	natp = &nat_table[0][hv];
1434	nat->nat_hstart[0] = natp;
1435	nat->nat_hnext[0] = *natp;
1436	*natp = nat;
1437	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1438			 ipf_nattable_sz);
1439	natp = &nat_table[1][hv];
1440	nat->nat_hstart[1] = natp;
1441	nat->nat_hnext[1] = *natp;
1442	*natp = nat;
1443
1444	nat_stats.ns_added++;
1445	nat_stats.ns_inuse++;
1446}
1447
1448
1449nat_t *nat_icmplookup(ip, fin, dir)
1450ip_t *ip;
1451fr_info_t *fin;
1452int dir;
1453{
1454	icmphdr_t *icmp;
1455	tcphdr_t *tcp = NULL;
1456	ip_t *oip;
1457	int flags = 0, type;
1458
1459	icmp = (icmphdr_t *)fin->fin_dp;
1460	/*
1461	 * Does it at least have the return (basic) IP header ?
1462	 * Only a basic IP header (no options) should be with an ICMP error
1463	 * header.
1464	 */
1465	if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1466		return NULL;
1467	type = icmp->icmp_type;
1468	/*
1469	 * If it's not an error type, then return.
1470	 */
1471	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1472	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1473	    (type != ICMP_PARAMPROB))
1474		return NULL;
1475
1476	oip = (ip_t *)((char *)fin->fin_dp + 8);
1477	if (ip->ip_len < ICMPERR_MAXPKTLEN + ((oip->ip_hl - 5) << 2))
1478		return NULL;
1479	if (oip->ip_p == IPPROTO_TCP)
1480		flags = IPN_TCP;
1481	else if (oip->ip_p == IPPROTO_UDP)
1482		flags = IPN_UDP;
1483	if (flags & IPN_TCPUDP) {
1484		tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1485		if (dir == NAT_INBOUND)
1486			return nat_inlookup(fin->fin_ifp, flags,
1487				(u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
1488				(tcp->th_sport << 16) | tcp->th_dport);
1489		else
1490			return nat_outlookup(fin->fin_ifp, flags,
1491				(u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
1492				(tcp->th_sport << 16) | tcp->th_dport);
1493	}
1494	if (dir == NAT_INBOUND)
1495		return nat_inlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
1496			oip->ip_dst, oip->ip_src, 0);
1497	else
1498		return nat_outlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
1499			oip->ip_dst, oip->ip_src, 0);
1500}
1501
1502
1503/*
1504 * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1505 * packet gets correctly recognised.
1506 */
1507nat_t *nat_icmp(ip, fin, nflags, dir)
1508ip_t *ip;
1509fr_info_t *fin;
1510u_int *nflags;
1511int dir;
1512{
1513	u_32_t sum1, sum2, sumd;
1514	struct in_addr in;
1515	icmphdr_t *icmp;
1516	nat_t *nat;
1517	ip_t *oip;
1518	int flags = 0;
1519
1520	if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1521		return NULL;
1522	*nflags = IPN_ICMPERR;
1523	icmp = (icmphdr_t *)fin->fin_dp;
1524	oip = (ip_t *)&icmp->icmp_ip;
1525	if (oip->ip_p == IPPROTO_TCP)
1526		flags = IPN_TCP;
1527	else if (oip->ip_p == IPPROTO_UDP)
1528		flags = IPN_UDP;
1529	/*
1530	 * Need to adjust ICMP header to include the real IP#'s and
1531	 * port #'s.  Only apply a checksum change relative to the
1532	 * IP address change is it will be modified again in ip_natout
1533	 * for both address and port.  Two checksum changes are
1534	 * necessary for the two header address changes.  Be careful
1535	 * to only modify the checksum once for the port # and twice
1536	 * for the IP#.
1537	 */
1538
1539	if (nat->nat_dir == NAT_OUTBOUND) {
1540		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1541		in = nat->nat_inip;
1542		oip->ip_src = in;
1543	} else {
1544		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1545		in = nat->nat_outip;
1546		oip->ip_dst = in;
1547	}
1548
1549	sum2 = LONG_SUM(ntohl(in.s_addr));
1550
1551	CALC_SUMD(sum1, sum2, sumd);
1552
1553	if (nat->nat_dir == NAT_OUTBOUND) {
1554		fix_incksum(&oip->ip_sum, sumd, 0);
1555
1556		sumd += (sumd & 0xffff);
1557		while (sumd > 0xffff)
1558			sumd = (sumd & 0xffff) + (sumd >> 16);
1559		fix_outcksum(&icmp->icmp_cksum, sumd, 0);
1560	} else {
1561		fix_outcksum(&oip->ip_sum, sumd, 0);
1562
1563		sumd += (sumd & 0xffff);
1564		while (sumd > 0xffff)
1565			sumd = (sumd & 0xffff) + (sumd >> 16);
1566/*		fix_incksum(&icmp->icmp_cksum, sumd, 0); */
1567	}
1568
1569
1570	if ((flags & IPN_TCPUDP) != 0) {
1571		tcphdr_t *tcp;
1572
1573		/* XXX - what if this is bogus hl and we go off the end ? */
1574		tcp = (tcphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1575
1576		if (nat->nat_dir == NAT_OUTBOUND) {
1577			if (tcp->th_sport != nat->nat_inport) {
1578				sum1 = ntohs(tcp->th_sport);
1579				sum2 = ntohs(nat->nat_inport);
1580				CALC_SUMD(sum1, sum2, sumd);
1581				tcp->th_sport = nat->nat_inport;
1582				fix_outcksum(&icmp->icmp_cksum, sumd, 0);
1583			}
1584		} else {
1585			if (tcp->th_dport != nat->nat_outport) {
1586				sum1 = ntohs(tcp->th_dport);
1587				sum2 = ntohs(nat->nat_outport);
1588				CALC_SUMD(sum1, sum2, sumd);
1589				tcp->th_dport = nat->nat_outport;
1590				fix_incksum(&icmp->icmp_cksum, sumd, 0);
1591			}
1592		}
1593	}
1594	nat->nat_age = fr_defnaticmpage;
1595	return nat;
1596}
1597
1598
1599/*
1600 * NB: these lookups don't lock access to the list, it assume it has already
1601 * been done!
1602 */
1603/*
1604 * Lookup a nat entry based on the mapped destination ip address/port and
1605 * real source address/port.  We use this lookup when receiving a packet,
1606 * we're looking for a table entry, based on the destination address.
1607 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1608 */
1609nat_t *nat_inlookup(ifp, flags, p, src, mapdst, ports)
1610void *ifp;
1611register u_int flags, p;
1612struct in_addr src , mapdst;
1613u_32_t ports;
1614{
1615	register u_short sport, mapdport;
1616	register nat_t *nat;
1617	register int nflags;
1618	u_int hv;
1619
1620	mapdport = ports >> 16;
1621	sport = ports & 0xffff;
1622	flags &= IPN_TCPUDP;
1623
1624	hv = NAT_HASH_FN(mapdst.s_addr, mapdport, ipf_nattable_sz);
1625	nat = nat_table[1][hv];
1626	for (; nat; nat = nat->nat_hnext[1]) {
1627		nflags = nat->nat_flags;
1628		if ((!ifp || ifp == nat->nat_ifp) &&
1629		    nat->nat_oip.s_addr == src.s_addr &&
1630		    nat->nat_outip.s_addr == mapdst.s_addr &&
1631		    (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP)))
1632		     || (p == nat->nat_p)) && (!flags ||
1633		     (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
1634		      ((nat->nat_outport == mapdport) ||
1635		       (nflags & FI_W_SPORT)))))
1636			return nat;
1637	}
1638	return NULL;
1639}
1640
1641
1642/*
1643 * Lookup a nat entry based on the source 'real' ip address/port and
1644 * destination address/port.  We use this lookup when sending a packet out,
1645 * we're looking for a table entry, based on the source address.
1646 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1647 */
1648nat_t *nat_outlookup(ifp, flags, p, src, dst, ports)
1649void *ifp;
1650register u_int flags, p;
1651struct in_addr src , dst;
1652u_32_t ports;
1653{
1654	register u_short sport, dport;
1655	register nat_t *nat;
1656	register int nflags;
1657	u_int hv;
1658
1659	sport = ports & 0xffff;
1660	dport = ports >> 16;
1661	flags &= IPN_TCPUDP;
1662
1663	hv = NAT_HASH_FN(src.s_addr, sport, ipf_nattable_sz);
1664	nat = nat_table[0][hv];
1665	for (; nat; nat = nat->nat_hnext[0]) {
1666		nflags = nat->nat_flags;
1667
1668		if ((!ifp || ifp == nat->nat_ifp) &&
1669		    nat->nat_inip.s_addr == src.s_addr &&
1670		    nat->nat_oip.s_addr == dst.s_addr &&
1671		    (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP)))
1672		     || (p == nat->nat_p)) && (!flags ||
1673		     ((nat->nat_inport == sport || nflags & FI_W_SPORT) &&
1674		      (nat->nat_oport == dport || nflags & FI_W_DPORT))))
1675			return nat;
1676	}
1677	return NULL;
1678}
1679
1680
1681/*
1682 * Lookup the NAT tables to search for a matching redirect
1683 */
1684nat_t *nat_lookupredir(np)
1685register natlookup_t *np;
1686{
1687	u_32_t ports;
1688	nat_t *nat;
1689
1690	ports = (np->nl_outport << 16) | np->nl_inport;
1691	/*
1692	 * If nl_inip is non null, this is a lookup based on the real
1693	 * ip address. Else, we use the fake.
1694	 */
1695	if ((nat = nat_outlookup(NULL, np->nl_flags, 0, np->nl_inip,
1696				 np->nl_outip, ports))) {
1697		np->nl_realip = nat->nat_outip;
1698		np->nl_realport = nat->nat_outport;
1699	}
1700	return nat;
1701}
1702
1703
1704static int nat_match(fin, np, ip)
1705fr_info_t *fin;
1706ipnat_t *np;
1707ip_t *ip;
1708{
1709	frtuc_t *ft;
1710
1711	if (ip->ip_v != 4)
1712		return 0;
1713
1714	if (np->in_p && ip->ip_p != np->in_p)
1715		return 0;
1716	if (fin->fin_out) {
1717		if (!(np->in_redir && (NAT_MAP|NAT_MAPBLK)))
1718			return 0;
1719		if ((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
1720			return 0;
1721		if ((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
1722			return 0;
1723	} else {
1724		if (!(np->in_redir && NAT_REDIRECT))
1725			return 0;
1726	}
1727
1728	ft = &np->in_tuc;
1729	if (!(fin->fin_fi.fi_fl & FI_TCPUDP)) {
1730		if (ft->ftu_scmp || ft->ftu_dcmp)
1731			return 0;
1732		return 1;
1733	}
1734
1735	return fr_tcpudpchk(ft, fin);
1736}
1737
1738
1739/*
1740 * Packets going out on the external interface go through this.
1741 * Here, the source address requires alteration, if anything.
1742 */
1743int ip_natout(ip, fin)
1744ip_t *ip;
1745fr_info_t *fin;
1746{
1747	register ipnat_t *np = NULL;
1748	register u_32_t ipa;
1749	tcphdr_t *tcp = NULL;
1750	u_short sport = 0, dport = 0, *csump = NULL;
1751	struct ifnet *ifp;
1752	int natadd = 1;
1753	frentry_t *fr;
1754	u_int nflags = 0, hv, msk;
1755	u_32_t iph;
1756	nat_t *nat;
1757	int i;
1758
1759	if (nat_list == NULL || (fr_nat_lock))
1760		return 0;
1761
1762	if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
1763	    fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1)
1764		ifp = fr->fr_tif.fd_ifp;
1765	else
1766		ifp = fin->fin_ifp;
1767
1768	if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) {
1769		if (ip->ip_p == IPPROTO_TCP)
1770			nflags = IPN_TCP;
1771		else if (ip->ip_p == IPPROTO_UDP)
1772			nflags = IPN_UDP;
1773		if ((nflags & IPN_TCPUDP)) {
1774			tcp = (tcphdr_t *)fin->fin_dp;
1775			sport = tcp->th_sport;
1776			dport = tcp->th_dport;
1777		}
1778	}
1779
1780	ipa = ip->ip_src.s_addr;
1781
1782	READ_ENTER(&ipf_nat);
1783
1784	if ((ip->ip_p == IPPROTO_ICMP) &&
1785	    (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
1786		;
1787	else if ((ip->ip_off & (IP_OFFMASK|IP_MF)) &&
1788			(nat = ipfr_nat_knownfrag(ip, fin)))
1789		natadd = 0;
1790	else if ((nat = nat_outlookup(ifp, nflags, (u_int)ip->ip_p, ip->ip_src,
1791				      ip->ip_dst, (dport << 16) | sport))) {
1792		nflags = nat->nat_flags;
1793		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
1794			if ((nflags & FI_W_SPORT) &&
1795			    (nat->nat_inport != sport))
1796				nat->nat_inport = sport;
1797			else if ((nflags & FI_W_DPORT) &&
1798				 (nat->nat_oport != dport))
1799				nat->nat_oport = dport;
1800			if (nat->nat_outport == 0)
1801				nat->nat_outport = sport;
1802			nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
1803			nflags = nat->nat_flags;
1804		}
1805	} else {
1806		RWLOCK_EXIT(&ipf_nat);
1807		WRITE_ENTER(&ipf_nat);
1808		/*
1809		 * If there is no current entry in the nat table for this IP#,
1810		 * create one for it (if there is a matching rule).
1811		 */
1812		msk = 0xffffffff;
1813		i = 32;
1814maskloop:
1815		iph = ipa & htonl(msk);
1816		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
1817		for (np = nat_rules[hv]; np; np = np->in_mnext)
1818		{
1819			if ((np->in_ifp && (np->in_ifp != ifp)) ||
1820			    !np->in_space)
1821				continue;
1822			if ((np->in_flags & IPN_RF) &&
1823			    !(np->in_flags & nflags))
1824				continue;
1825			if (np->in_flags & IPN_FILTER) {
1826				if (!nat_match(fin, np, ip))
1827					continue;
1828			} else if ((ipa & np->in_inmsk) != np->in_inip)
1829				continue;
1830			if (np->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1831				if (*np->in_plabel && !appr_ok(ip, tcp, np))
1832					continue;
1833				/*
1834				 * If it's a redirection, then we don't want to
1835				 * create new outgoing port stuff.
1836				 * Redirections are only for incoming
1837				 * connections.
1838				 */
1839				if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
1840					continue;
1841				if ((nat = nat_new(np, ip, fin, (u_int)nflags,
1842						    NAT_OUTBOUND))) {
1843					np->in_hits++;
1844#ifdef	IPFILTER_LOG
1845					nat_log(nat, (u_int)np->in_redir);
1846#endif
1847					break;
1848				}
1849			}
1850		}
1851		if ((np == NULL) && (i > 0)) {
1852			do {
1853				i--;
1854				msk <<= 1;
1855			} while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
1856			if (i >= 0)
1857				goto maskloop;
1858		}
1859		MUTEX_DOWNGRADE(&ipf_nat);
1860	}
1861
1862	if (nat) {
1863		np = nat->nat_ptr;
1864		if (natadd && fin->fin_fi.fi_fl & FI_FRAG)
1865			ipfr_nat_newfrag(ip, fin, 0, nat);
1866		ip->ip_src = nat->nat_outip;
1867		MUTEX_ENTER(&nat->nat_lock);
1868		nat->nat_age = fr_defnatage;
1869		nat->nat_bytes += ip->ip_len;
1870		nat->nat_pkts++;
1871		MUTEX_EXIT(&nat->nat_lock);
1872
1873		/*
1874		 * Fix up checksums, not by recalculating them, but
1875		 * simply computing adjustments.
1876		 */
1877#if SOLARIS || defined(__sgi)
1878		if (nat->nat_dir == NAT_OUTBOUND)
1879			fix_outcksum(&ip->ip_sum, nat->nat_ipsumd, 0);
1880		else
1881			fix_incksum(&ip->ip_sum, nat->nat_ipsumd, 0);
1882#endif
1883
1884		if (!(ip->ip_off & IP_OFFMASK) &&
1885		    !(fin->fin_fi.fi_fl & FI_SHORT)) {
1886
1887			if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
1888				tcp->th_sport = nat->nat_outport;
1889				fin->fin_data[0] = ntohs(tcp->th_sport);
1890			}
1891
1892			if (ip->ip_p == IPPROTO_TCP) {
1893				csump = &tcp->th_sum;
1894				MUTEX_ENTER(&nat->nat_lock);
1895				fr_tcp_age(&nat->nat_age,
1896					   nat->nat_tcpstate, fin, 1);
1897				if (nat->nat_age < fr_defnaticmpage)
1898					nat->nat_age = fr_defnaticmpage;
1899#ifdef LARGE_NAT
1900				else if (nat->nat_age > fr_defnatage)
1901					nat->nat_age = fr_defnatage;
1902#endif
1903				/*
1904				 * Increase this because we may have
1905				 * "keep state" following this too and
1906				 * packet storms can occur if this is
1907				 * removed too quickly.
1908				 */
1909				if (nat->nat_age == fr_tcpclosed)
1910					nat->nat_age = fr_tcplastack;
1911				MUTEX_EXIT(&nat->nat_lock);
1912			} else if (ip->ip_p == IPPROTO_UDP) {
1913				udphdr_t *udp = (udphdr_t *)tcp;
1914
1915				if (udp->uh_sum)
1916					csump = &udp->uh_sum;
1917			} else if (ip->ip_p == IPPROTO_ICMP) {
1918				nat->nat_age = fr_defnaticmpage;
1919			}
1920			if (csump) {
1921				if (nat->nat_dir == NAT_OUTBOUND)
1922					fix_outcksum(csump, nat->nat_sumd[1],
1923						     ip->ip_len);
1924				else
1925					fix_incksum(csump, nat->nat_sumd[1],
1926						     ip->ip_len);
1927			}
1928		}
1929
1930		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1931		     (tcp != NULL && dport == np->in_dport))) {
1932			i = appr_check(ip, fin, nat);
1933			if (i == 0)
1934				i = 1;
1935		} else
1936			i = 1;
1937		ATOMIC_INCL(nat_stats.ns_mapped[1]);
1938		RWLOCK_EXIT(&ipf_nat);	/* READ */
1939		return i;
1940	}
1941	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1942	return 0;
1943}
1944
1945
1946/*
1947 * Packets coming in from the external interface go through this.
1948 * Here, the destination address requires alteration, if anything.
1949 */
1950int ip_natin(ip, fin)
1951ip_t *ip;
1952fr_info_t *fin;
1953{
1954	register struct in_addr src;
1955	register struct in_addr in;
1956	register ipnat_t *np;
1957	u_int nflags = 0, natadd = 1, hv, msk;
1958	struct ifnet *ifp = fin->fin_ifp;
1959	tcphdr_t *tcp = NULL;
1960	u_short sport = 0, dport = 0, *csump = NULL;
1961	nat_t *nat;
1962	u_32_t iph;
1963	int i;
1964
1965	if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
1966		return 0;
1967
1968	if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) {
1969		if (ip->ip_p == IPPROTO_TCP)
1970			nflags = IPN_TCP;
1971		else if (ip->ip_p == IPPROTO_UDP)
1972			nflags = IPN_UDP;
1973		if ((nflags & IPN_TCPUDP)) {
1974			tcp = (tcphdr_t *)fin->fin_dp;
1975			dport = tcp->th_dport;
1976			sport = tcp->th_sport;
1977		}
1978	}
1979
1980	in = ip->ip_dst;
1981	/* make sure the source address is to be redirected */
1982	src = ip->ip_src;
1983
1984	READ_ENTER(&ipf_nat);
1985
1986	if ((ip->ip_p == IPPROTO_ICMP) &&
1987	    (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
1988		;
1989	else if ((ip->ip_off & IP_OFFMASK) &&
1990		 (nat = ipfr_nat_knownfrag(ip, fin)))
1991		natadd = 0;
1992	else if ((nat = nat_inlookup(fin->fin_ifp, nflags, (u_int)ip->ip_p,
1993				     ip->ip_src, in, (dport << 16) | sport))) {
1994		nflags = nat->nat_flags;
1995		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
1996			if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
1997				nat->nat_oport = sport;
1998			else if ((nat->nat_outport != dport) &&
1999				 (nflags & FI_W_SPORT))
2000				nat->nat_outport = dport;
2001			nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2002			nflags = nat->nat_flags;
2003		}
2004	} else {
2005		RWLOCK_EXIT(&ipf_nat);
2006		WRITE_ENTER(&ipf_nat);
2007		/*
2008		 * If there is no current entry in the nat table for this IP#,
2009		 * create one for it (if there is a matching rule).
2010		 */
2011		msk = 0xffffffff;
2012		i = 32;
2013maskloop:
2014		iph = in.s_addr & htonl(msk);
2015		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2016		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2017			if ((np->in_ifp && (np->in_ifp != ifp)) ||
2018			    (np->in_p && (np->in_p != ip->ip_p)) ||
2019			    (np->in_flags && !(nflags & np->in_flags)))
2020				continue;
2021			if (np->in_flags & IPN_FILTER) {
2022				if (!nat_match(fin, np, ip))
2023					continue;
2024			} else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2025				continue;
2026			if ((np->in_redir & NAT_REDIRECT) &&
2027			    (!np->in_pmin ||
2028			     ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2029			      (ntohs(dport) >= ntohs(np->in_pmin)))))
2030				if ((nat = nat_new(np, ip, fin, nflags,
2031						    NAT_INBOUND))) {
2032					np->in_hits++;
2033#ifdef	IPFILTER_LOG
2034					nat_log(nat, (u_int)np->in_redir);
2035#endif
2036					break;
2037				}
2038		}
2039
2040		if ((np == NULL) && (i > 0)) {
2041			do {
2042				i--;
2043				msk <<= 1;
2044			} while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2045			if (i >= 0)
2046				goto maskloop;
2047		}
2048		MUTEX_DOWNGRADE(&ipf_nat);
2049	}
2050	if (nat) {
2051		np = nat->nat_ptr;
2052		fin->fin_fr = nat->nat_fr;
2053		if (natadd && fin->fin_fi.fi_fl & FI_FRAG)
2054			ipfr_nat_newfrag(ip, fin, 0, nat);
2055		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
2056		    (tcp != NULL && sport == np->in_dport))) {
2057			i = appr_check(ip, fin, nat);
2058			if (i == -1) {
2059				RWLOCK_EXIT(&ipf_nat);
2060				return i;
2061			}
2062		}
2063
2064		MUTEX_ENTER(&nat->nat_lock);
2065		if (nflags != IPN_ICMPERR)
2066			nat->nat_age = fr_defnatage;
2067
2068		nat->nat_bytes += ip->ip_len;
2069		nat->nat_pkts++;
2070		MUTEX_EXIT(&nat->nat_lock);
2071		ip->ip_dst = nat->nat_inip;
2072		fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2073
2074		/*
2075		 * Fix up checksums, not by recalculating them, but
2076		 * simply computing adjustments.
2077		 */
2078#if SOLARIS || defined(__sgi)
2079		if (nat->nat_dir == NAT_OUTBOUND)
2080			fix_incksum(&ip->ip_sum, nat->nat_ipsumd, 0);
2081		else
2082			fix_outcksum(&ip->ip_sum, nat->nat_ipsumd, 0);
2083#endif
2084		if (!(ip->ip_off & IP_OFFMASK) &&
2085		    !(fin->fin_fi.fi_fl & FI_SHORT)) {
2086
2087			if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
2088				tcp->th_dport = nat->nat_inport;
2089				fin->fin_data[1] = ntohs(tcp->th_dport);
2090			}
2091
2092			if (ip->ip_p == IPPROTO_TCP) {
2093				csump = &tcp->th_sum;
2094				MUTEX_ENTER(&nat->nat_lock);
2095				fr_tcp_age(&nat->nat_age,
2096					   nat->nat_tcpstate, fin, 0);
2097				if (nat->nat_age < fr_defnaticmpage)
2098					nat->nat_age = fr_defnaticmpage;
2099#ifdef LARGE_NAT
2100				else if (nat->nat_age > fr_defnatage)
2101					nat->nat_age = fr_defnatage;
2102#endif
2103				/*
2104				 * Increase this because we may have
2105				 * "keep state" following this too and
2106				 * packet storms can occur if this is
2107				 * removed too quickly.
2108				 */
2109				if (nat->nat_age == fr_tcpclosed)
2110					nat->nat_age = fr_tcplastack;
2111				MUTEX_EXIT(&nat->nat_lock);
2112			} else if (ip->ip_p == IPPROTO_UDP) {
2113				udphdr_t *udp = (udphdr_t *)tcp;
2114
2115				if (udp->uh_sum)
2116					csump = &udp->uh_sum;
2117			} else if (ip->ip_p == IPPROTO_ICMP) {
2118				nat->nat_age = fr_defnaticmpage;
2119			}
2120
2121			if (csump) {
2122				if (nat->nat_dir == NAT_OUTBOUND)
2123					fix_incksum(csump, nat->nat_sumd[0],
2124						    0);
2125				else
2126					fix_outcksum(csump, nat->nat_sumd[0],
2127						     0);
2128			}
2129		}
2130		ATOMIC_INCL(nat_stats.ns_mapped[0]);
2131		RWLOCK_EXIT(&ipf_nat);			/* READ */
2132		return 1;
2133	}
2134	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
2135	return 0;
2136}
2137
2138
2139/*
2140 * Free all memory used by NAT structures allocated at runtime.
2141 */
2142void ip_natunload()
2143{
2144	WRITE_ENTER(&ipf_nat);
2145	(void) nat_clearlist();
2146	(void) nat_flushtable();
2147	RWLOCK_EXIT(&ipf_nat);
2148
2149	if (nat_table[0] != NULL) {
2150		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2151		nat_table[0] = NULL;
2152	}
2153	if (nat_table[1] != NULL) {
2154		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2155		nat_table[1] = NULL;
2156	}
2157	if (nat_rules != NULL) {
2158		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2159		nat_rules = NULL;
2160	}
2161	if (rdr_rules != NULL) {
2162		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2163		rdr_rules = NULL;
2164	}
2165	if (maptable != NULL) {
2166		KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2167		maptable = NULL;
2168	}
2169}
2170
2171
2172/*
2173 * Slowly expire held state for NAT entries.  Timeouts are set in
2174 * expectation of this being called twice per second.
2175 */
2176void ip_natexpire()
2177{
2178	register struct nat *nat, **natp;
2179#if defined(_KERNEL) && !SOLARIS
2180	int s;
2181#endif
2182
2183	SPL_NET(s);
2184	WRITE_ENTER(&ipf_nat);
2185	for (natp = &nat_instances; (nat = *natp); ) {
2186		nat->nat_age--;
2187		if (nat->nat_age) {
2188			natp = &nat->nat_next;
2189			continue;
2190		}
2191		*natp = nat->nat_next;
2192#ifdef	IPFILTER_LOG
2193		nat_log(nat, NL_EXPIRE);
2194#endif
2195		nat_delete(nat);
2196		nat_stats.ns_expire++;
2197	}
2198	RWLOCK_EXIT(&ipf_nat);
2199	SPL_X(s);
2200}
2201
2202
2203/*
2204 */
2205void ip_natsync(ifp)
2206void *ifp;
2207{
2208	register ipnat_t *n;
2209	register nat_t *nat;
2210	register u_32_t sum1, sum2, sumd;
2211	struct in_addr in;
2212	ipnat_t *np;
2213	void *ifp2;
2214#if defined(_KERNEL) && !SOLARIS
2215	int s;
2216#endif
2217
2218	/*
2219	 * Change IP addresses for NAT sessions for any protocol except TCP
2220	 * since it will break the TCP connection anyway.
2221	 */
2222	SPL_NET(s);
2223	WRITE_ENTER(&ipf_nat);
2224	for (nat = nat_instances; nat; nat = nat->nat_next)
2225		if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2226		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2227		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2228			ifp2 = nat->nat_ifp;
2229			/*
2230			 * Change the map-to address to be the same as the
2231			 * new one.
2232			 */
2233			sum1 = nat->nat_outip.s_addr;
2234			if (fr_ifpaddr(4, ifp2, &in) != -1)
2235				nat->nat_outip = in;
2236			sum2 = nat->nat_outip.s_addr;
2237
2238			if (sum1 == sum2)
2239				continue;
2240			/*
2241			 * Readjust the checksum adjustment to take into
2242			 * account the new IP#.
2243			 */
2244			CALC_SUMD(sum1, sum2, sumd);
2245			/* XXX - dont change for TCP when solaris does
2246			 * hardware checksumming.
2247			 */
2248			sumd += nat->nat_sumd[0];
2249			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2250			nat->nat_sumd[1] = nat->nat_sumd[0];
2251		}
2252
2253	for (n = nat_list; (n != NULL); n = n->in_next)
2254		if (n->in_ifp == ifp) {
2255			n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2256			if (!n->in_ifp)
2257				n->in_ifp = (void *)-1;
2258		}
2259	RWLOCK_EXIT(&ipf_nat);
2260	SPL_X(s);
2261}
2262
2263
2264#ifdef	IPFILTER_LOG
2265void nat_log(nat, type)
2266struct nat *nat;
2267u_int type;
2268{
2269	struct ipnat *np;
2270	struct natlog natl;
2271	void *items[1];
2272	size_t sizes[1];
2273	int rulen, types[1];
2274
2275	natl.nl_inip = nat->nat_inip;
2276	natl.nl_outip = nat->nat_outip;
2277	natl.nl_origip = nat->nat_oip;
2278	natl.nl_bytes = nat->nat_bytes;
2279	natl.nl_pkts = nat->nat_pkts;
2280	natl.nl_origport = nat->nat_oport;
2281	natl.nl_inport = nat->nat_inport;
2282	natl.nl_outport = nat->nat_outport;
2283	natl.nl_p = nat->nat_p;
2284	natl.nl_type = type;
2285	natl.nl_rule = -1;
2286#ifndef LARGE_NAT
2287	if (nat->nat_ptr != NULL) {
2288		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2289			if (np == nat->nat_ptr) {
2290				natl.nl_rule = rulen;
2291				break;
2292			}
2293	}
2294#endif
2295	items[0] = &natl;
2296	sizes[0] = sizeof(natl);
2297	types[0] = 0;
2298
2299	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2300}
2301#endif
2302