ip_nat.c revision 98004
1/*
2 * Copyright (C) 1995-2001 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7 */
8
9#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
10#define _KERNEL
11#endif
12
13#ifdef __sgi
14# include <sys/ptimers.h>
15#endif
16#include <sys/errno.h>
17#include <sys/types.h>
18#include <sys/param.h>
19#include <sys/time.h>
20#include <sys/file.h>
21#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22    defined(_KERNEL)
23# include "opt_ipfilter_log.h"
24#endif
25#if !defined(_KERNEL) && !defined(KERNEL)
26# include <stdio.h>
27# include <string.h>
28# include <stdlib.h>
29#endif
30#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
31# include <sys/filio.h>
32# include <sys/fcntl.h>
33#else
34# include <sys/ioctl.h>
35#endif
36#include <sys/fcntl.h>
37#ifndef linux
38# include <sys/protosw.h>
39#endif
40#include <sys/socket.h>
41#if defined(_KERNEL) && !defined(linux)
42# include <sys/systm.h>
43#endif
44#if !defined(__SVR4) && !defined(__svr4__)
45# ifndef linux
46#  include <sys/mbuf.h>
47# endif
48#else
49# include <sys/filio.h>
50# include <sys/byteorder.h>
51# ifdef _KERNEL
52#  include <sys/dditypes.h>
53# endif
54# include <sys/stream.h>
55# include <sys/kmem.h>
56#endif
57#if __FreeBSD_version >= 300000
58# include <sys/queue.h>
59#endif
60#include <net/if.h>
61#if __FreeBSD_version >= 300000
62# include <net/if_var.h>
63# if defined(_KERNEL) && !defined(IPFILTER_LKM)
64#  include "opt_ipfilter.h"
65# endif
66#endif
67#ifdef sun
68# include <net/af.h>
69#endif
70#include <net/route.h>
71#include <netinet/in.h>
72#include <netinet/in_systm.h>
73#include <netinet/ip.h>
74
75#ifdef __sgi
76# ifdef IFF_DRVRLOCK /* IRIX6 */
77#include <sys/hashing.h>
78#include <netinet/in_var.h>
79# endif
80#endif
81
82#ifdef RFC1825
83# include <vpn/md5.h>
84# include <vpn/ipsec.h>
85extern struct ifnet vpnif;
86#endif
87
88#ifndef linux
89# include <netinet/ip_var.h>
90# include <netinet/tcp_fsm.h>
91#endif
92#include <netinet/tcp.h>
93#include <netinet/udp.h>
94#include <netinet/ip_icmp.h>
95#include "netinet/ip_compat.h"
96#include <netinet/tcpip.h>
97#include "netinet/ip_fil.h"
98#include "netinet/ip_nat.h"
99#include "netinet/ip_frag.h"
100#include "netinet/ip_state.h"
101#include "netinet/ip_proxy.h"
102#if (__FreeBSD_version >= 300000)
103# include <sys/malloc.h>
104#endif
105#ifndef	MIN
106# define	MIN(a,b)	(((a)<(b))?(a):(b))
107#endif
108#undef	SOCKADDR_IN
109#define	SOCKADDR_IN	struct sockaddr_in
110
111#if !defined(lint)
112static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
113/* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.44 2001/07/21 07:17:22 darrenr Exp $"; */
114static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 98004 2002-06-07 08:56:30Z darrenr $";
115#endif
116
117nat_t	**nat_table[2] = { NULL, NULL },
118	*nat_instances = NULL;
119ipnat_t	*nat_list = NULL;
120u_int	ipf_nattable_sz = NAT_TABLE_SZ;
121u_int	ipf_natrules_sz = NAT_SIZE;
122u_int	ipf_rdrrules_sz = RDR_SIZE;
123u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
124u_32_t	nat_masks = 0;
125u_32_t	rdr_masks = 0;
126ipnat_t	**nat_rules = NULL;
127ipnat_t	**rdr_rules = NULL;
128hostmap_t	**maptable  = NULL;
129
130u_long	fr_defnatage = DEF_NAT_AGE,
131	fr_defnaticmpage = 6;		/* 3 seconds */
132natstat_t nat_stats;
133int	fr_nat_lock = 0;
134#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
135extern	kmutex_t	ipf_rw;
136extern	KRWLOCK_T	ipf_nat;
137#endif
138
139static	int	nat_flushtable __P((void));
140static	void	nat_addnat __P((struct ipnat *));
141static	void	nat_addrdr __P((struct ipnat *));
142static	void	nat_delete __P((struct nat *));
143static	void	nat_delrdr __P((struct ipnat *));
144static	void	nat_delnat __P((struct ipnat *));
145static	int	fr_natgetent __P((caddr_t));
146static	int	fr_natgetsz __P((caddr_t));
147static	int	fr_natputent __P((caddr_t));
148static	void	nat_tabmove __P((fr_info_t *, nat_t *));
149static	int	nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
150static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
151				    struct in_addr));
152static	void	nat_hostmapdel __P((struct hostmap *));
153
154
155int nat_init()
156{
157	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
158	if (nat_table[0] != NULL)
159		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
160	else
161		return -1;
162
163	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
164	if (nat_table[1] != NULL)
165		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
166	else
167		return -1;
168
169	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
170	if (nat_rules != NULL)
171		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
172	else
173		return -1;
174
175	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
176	if (rdr_rules != NULL)
177		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
178	else
179		return -1;
180
181	KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
182	if (maptable != NULL)
183		bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
184	else
185		return -1;
186	return 0;
187}
188
189
190static void nat_addrdr(n)
191ipnat_t *n;
192{
193	ipnat_t **np;
194	u_32_t j;
195	u_int hv;
196	int k;
197
198	k = countbits(n->in_outmsk);
199	if ((k >= 0) && (k != 32))
200		rdr_masks |= 1 << k;
201	j = (n->in_outip & n->in_outmsk);
202	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
203	np = rdr_rules + hv;
204	while (*np != NULL)
205		np = &(*np)->in_rnext;
206	n->in_rnext = NULL;
207	n->in_prnext = np;
208	*np = n;
209}
210
211
212static void nat_addnat(n)
213ipnat_t *n;
214{
215	ipnat_t **np;
216	u_32_t j;
217	u_int hv;
218	int k;
219
220	k = countbits(n->in_inmsk);
221	if ((k >= 0) && (k != 32))
222		nat_masks |= 1 << k;
223	j = (n->in_inip & n->in_inmsk);
224	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
225	np = nat_rules + hv;
226	while (*np != NULL)
227		np = &(*np)->in_mnext;
228	n->in_mnext = NULL;
229	n->in_pmnext = np;
230	*np = n;
231}
232
233
234static void nat_delrdr(n)
235ipnat_t *n;
236{
237	if (n->in_rnext)
238		n->in_rnext->in_prnext = n->in_prnext;
239	*n->in_prnext = n->in_rnext;
240}
241
242
243static void nat_delnat(n)
244ipnat_t *n;
245{
246	if (n->in_mnext)
247		n->in_mnext->in_pmnext = n->in_pmnext;
248	*n->in_pmnext = n->in_mnext;
249}
250
251
252/*
253 * check if an ip address has already been allocated for a given mapping that
254 * is not doing port based translation.
255 *
256 * Must be called with ipf_nat held as a write lock.
257 */
258static struct hostmap *nat_hostmap(np, real, map)
259ipnat_t *np;
260struct in_addr real;
261struct in_addr map;
262{
263	hostmap_t *hm;
264	u_int hv;
265
266	hv = real.s_addr % HOSTMAP_SIZE;
267	for (hm = maptable[hv]; hm; hm = hm->hm_next)
268		if ((hm->hm_realip.s_addr == real.s_addr) &&
269		    (np == hm->hm_ipnat)) {
270			hm->hm_ref++;
271			return hm;
272		}
273
274	KMALLOC(hm, hostmap_t *);
275	if (hm) {
276		hm->hm_next = maptable[hv];
277		hm->hm_pnext = maptable + hv;
278		if (maptable[hv])
279			maptable[hv]->hm_pnext = &hm->hm_next;
280		maptable[hv] = hm;
281		hm->hm_ipnat = np;
282		hm->hm_realip = real;
283		hm->hm_mapip = map;
284		hm->hm_ref = 1;
285	}
286	return hm;
287}
288
289
290/*
291 * Must be called with ipf_nat held as a write lock.
292 */
293static void nat_hostmapdel(hm)
294struct hostmap *hm;
295{
296	ATOMIC_DEC32(hm->hm_ref);
297	if (hm->hm_ref == 0) {
298		if (hm->hm_next)
299			hm->hm_next->hm_pnext = hm->hm_pnext;
300		*hm->hm_pnext = hm->hm_next;
301		KFREE(hm);
302	}
303}
304
305
306void fix_outcksum(fin, sp, n)
307fr_info_t *fin;
308u_short *sp;
309u_32_t n;
310{
311	register u_short sumshort;
312	register u_32_t sum1;
313
314	if (!n)
315		return;
316	else if (n & NAT_HW_CKSUM) {
317		n &= 0xffff;
318		n += fin->fin_dlen;
319		n = (n & 0xffff) + (n >> 16);
320		*sp = n & 0xffff;
321		return;
322	}
323	sum1 = (~ntohs(*sp)) & 0xffff;
324	sum1 += (n);
325	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
326	/* Again */
327	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
328	sumshort = ~(u_short)sum1;
329	*(sp) = htons(sumshort);
330}
331
332
333void fix_incksum(fin, sp, n)
334fr_info_t *fin;
335u_short *sp;
336u_32_t n;
337{
338	register u_short sumshort;
339	register u_32_t sum1;
340
341	if (!n)
342		return;
343	else if (n & NAT_HW_CKSUM) {
344		n &= 0xffff;
345		n += fin->fin_dlen;
346		n = (n & 0xffff) + (n >> 16);
347		*sp = n & 0xffff;
348		return;
349	}
350#ifdef sparc
351	sum1 = (~(*sp)) & 0xffff;
352#else
353	sum1 = (~ntohs(*sp)) & 0xffff;
354#endif
355	sum1 += ~(n) & 0xffff;
356	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
357	/* Again */
358	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
359	sumshort = ~(u_short)sum1;
360	*(sp) = htons(sumshort);
361}
362
363
364/*
365 * fix_datacksum is used *only* for the adjustments of checksums in the data
366 * section of an IP packet.
367 *
368 * The only situation in which you need to do this is when NAT'ing an
369 * ICMP error message. Such a message, contains in its body the IP header
370 * of the original IP packet, that causes the error.
371 *
372 * You can't use fix_incksum or fix_outcksum in that case, because for the
373 * kernel the data section of the ICMP error is just data, and no special
374 * processing like hardware cksum or ntohs processing have been done by the
375 * kernel on the data section.
376 */
377void fix_datacksum(sp, n)
378u_short *sp;
379u_32_t n;
380{
381	register u_short sumshort;
382	register u_32_t sum1;
383
384	if (!n)
385		return;
386
387	sum1 = (~ntohs(*sp)) & 0xffff;
388	sum1 += (n);
389	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
390	/* Again */
391	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
392	sumshort = ~(u_short)sum1;
393	*(sp) = htons(sumshort);
394}
395
396/*
397 * How the NAT is organised and works.
398 *
399 * Inside (interface y) NAT       Outside (interface x)
400 * -------------------- -+- -------------------------------------
401 * Packet going          |   out, processsed by ip_natout() for x
402 * ------------>         |   ------------>
403 * src=10.1.1.1          |   src=192.1.1.1
404 *                       |
405 *                       |   in, processed by ip_natin() for x
406 * <------------         |   <------------
407 * dst=10.1.1.1          |   dst=192.1.1.1
408 * -------------------- -+- -------------------------------------
409 * ip_natout() - changes ip_src and if required, sport
410 *             - creates a new mapping, if required.
411 * ip_natin()  - changes ip_dst and if required, dport
412 *
413 * In the NAT table, internal source is recorded as "in" and externally
414 * seen as "out".
415 */
416
417/*
418 * Handle ioctls which manipulate the NAT.
419 */
420int nat_ioctl(data, cmd, mode)
421#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
422u_long cmd;
423#else
424int cmd;
425#endif
426caddr_t data;
427int mode;
428{
429	register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
430	int error = 0, ret, arg, getlock;
431	ipnat_t natd;
432	u_32_t i, j;
433
434#if (BSD >= 199306) && defined(_KERNEL)
435	if ((securelevel >= 3) && (mode & FWRITE))
436		return EPERM;
437#endif
438
439	nat = NULL;     /* XXX gcc -Wuninitialized */
440	KMALLOC(nt, ipnat_t *);
441	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
442	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
443		if (mode & NAT_SYSSPACE) {
444			bcopy(data, (char *)&natd, sizeof(natd));
445			error = 0;
446		} else {
447			error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
448		}
449	} else if (cmd == SIOCIPFFL) {	/* SIOCFLNAT & SIOCCNATL */
450		error = IRCOPY(data, (char *)&arg, sizeof(arg));
451		if (error)
452			error = EFAULT;
453	}
454
455	if (error)
456		goto done;
457
458	/*
459	 * For add/delete, look to see if the NAT entry is already present
460	 */
461	if (getlock == 1)
462		WRITE_ENTER(&ipf_nat);
463	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
464		nat = &natd;
465		nat->in_flags &= IPN_USERFLAGS;
466		if ((nat->in_redir & NAT_MAPBLK) == 0) {
467			if ((nat->in_flags & IPN_SPLIT) == 0)
468				nat->in_inip &= nat->in_inmsk;
469			if ((nat->in_flags & IPN_IPRANGE) == 0)
470				nat->in_outip &= nat->in_outmsk;
471		}
472		for (np = &nat_list; (n = *np); np = &n->in_next)
473			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
474					IPN_CMPSIZ)) {
475				if (n->in_redir == NAT_REDIRECT &&
476				    n->in_pnext != nat->in_pnext)
477					continue;
478				break;
479			}
480	}
481
482	switch (cmd)
483	{
484#ifdef  IPFILTER_LOG
485	case SIOCIPFFB :
486	{
487		int tmp;
488
489		if (!(mode & FWRITE))
490			error = EPERM;
491		else {
492			tmp = ipflog_clear(IPL_LOGNAT);
493			IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
494		}
495		break;
496	}
497#endif
498	case SIOCADNAT :
499		if (!(mode & FWRITE)) {
500			error = EPERM;
501			break;
502		}
503		if (n) {
504			error = EEXIST;
505			break;
506		}
507		if (nt == NULL) {
508			error = ENOMEM;
509			break;
510		}
511		n = nt;
512		nt = NULL;
513		bcopy((char *)nat, (char *)n, sizeof(*n));
514		n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
515		if (!n->in_ifp)
516			n->in_ifp = (void *)-1;
517		if (n->in_plabel[0] != '\0') {
518			n->in_apr = appr_lookup(n->in_p, n->in_plabel);
519			if (!n->in_apr) {
520				error = ENOENT;
521				break;
522			}
523		}
524		n->in_next = NULL;
525		*np = n;
526
527		if (n->in_redir & NAT_REDIRECT) {
528			n->in_flags &= ~IPN_NOTDST;
529			nat_addrdr(n);
530		}
531		if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
532			n->in_flags &= ~IPN_NOTSRC;
533			nat_addnat(n);
534		}
535
536		n->in_use = 0;
537		if (n->in_redir & NAT_MAPBLK)
538			n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
539		else if (n->in_flags & IPN_AUTOPORTMAP)
540			n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
541		else if (n->in_flags & IPN_IPRANGE)
542			n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
543		else if (n->in_flags & IPN_SPLIT)
544			n->in_space = 2;
545		else
546			n->in_space = ~ntohl(n->in_outmsk);
547		/*
548		 * Calculate the number of valid IP addresses in the output
549		 * mapping range.  In all cases, the range is inclusive of
550		 * the start and ending IP addresses.
551		 * If to a CIDR address, lose 2: broadcast + network address
552		 *			         (so subtract 1)
553		 * If to a range, add one.
554		 * If to a single IP address, set to 1.
555		 */
556		if (n->in_space) {
557			if ((n->in_flags & IPN_IPRANGE) != 0)
558				n->in_space += 1;
559			else
560				n->in_space -= 1;
561		} else
562			n->in_space = 1;
563		if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
564		    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
565			n->in_nip = ntohl(n->in_outip) + 1;
566		else if ((n->in_flags & IPN_SPLIT) &&
567			 (n->in_redir & NAT_REDIRECT))
568			n->in_nip = ntohl(n->in_inip);
569		else
570			n->in_nip = ntohl(n->in_outip);
571		if (n->in_redir & NAT_MAP) {
572			n->in_pnext = ntohs(n->in_pmin);
573			/*
574			 * Multiply by the number of ports made available.
575			 */
576			if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
577				n->in_space *= (ntohs(n->in_pmax) -
578						ntohs(n->in_pmin) + 1);
579				/*
580				 * Because two different sources can map to
581				 * different destinations but use the same
582				 * local IP#/port #.
583				 * If the result is smaller than in_space, then
584				 * we may have wrapped around 32bits.
585				 */
586				i = n->in_inmsk;
587				if ((i != 0) && (i != 0xffffffff)) {
588					j = n->in_space * (~ntohl(i) + 1);
589					if (j >= n->in_space)
590						n->in_space = j;
591					else
592						n->in_space = 0xffffffff;
593				}
594			}
595			/*
596			 * If no protocol is specified, multiple by 256.
597			 */
598			if ((n->in_flags & IPN_TCPUDP) == 0) {
599					j = n->in_space * 256;
600					if (j >= n->in_space)
601						n->in_space = j;
602					else
603						n->in_space = 0xffffffff;
604			}
605		}
606		/* Otherwise, these fields are preset */
607		n = NULL;
608		nat_stats.ns_rules++;
609		break;
610	case SIOCRMNAT :
611		if (!(mode & FWRITE)) {
612			error = EPERM;
613			n = NULL;
614			break;
615		}
616		if (!n) {
617			error = ESRCH;
618			break;
619		}
620		if (n->in_redir & NAT_REDIRECT)
621			nat_delrdr(n);
622		if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
623			nat_delnat(n);
624		if (nat_list == NULL) {
625			nat_masks = 0;
626			rdr_masks = 0;
627		}
628		*np = n->in_next;
629		if (!n->in_use) {
630			if (n->in_apr)
631				appr_free(n->in_apr);
632			KFREE(n);
633			nat_stats.ns_rules--;
634		} else {
635			n->in_flags |= IPN_DELETE;
636			n->in_next = NULL;
637		}
638		n = NULL;
639		break;
640	case SIOCGNATS :
641		MUTEX_DOWNGRADE(&ipf_nat);
642		nat_stats.ns_table[0] = nat_table[0];
643		nat_stats.ns_table[1] = nat_table[1];
644		nat_stats.ns_list = nat_list;
645		nat_stats.ns_maptable = maptable;
646		nat_stats.ns_nattab_sz = ipf_nattable_sz;
647		nat_stats.ns_rultab_sz = ipf_natrules_sz;
648		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
649		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
650		nat_stats.ns_instances = nat_instances;
651		nat_stats.ns_apslist = ap_sess_list;
652		error = IWCOPYPTR((char *)&nat_stats, (char *)data,
653				  sizeof(nat_stats));
654		break;
655	case SIOCGNATL :
656	    {
657		natlookup_t nl;
658
659		MUTEX_DOWNGRADE(&ipf_nat);
660		error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
661		if (error)
662			break;
663
664		if (nat_lookupredir(&nl)) {
665			error = IWCOPYPTR((char *)&nl, (char *)data,
666					  sizeof(nl));
667		} else
668			error = ESRCH;
669		break;
670	    }
671	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
672		if (!(mode & FWRITE)) {
673			error = EPERM;
674			break;
675		}
676		error = 0;
677		if (arg == 0)
678			ret = nat_flushtable();
679		else if (arg == 1)
680			ret = nat_clearlist();
681		else
682			error = EINVAL;
683		MUTEX_DOWNGRADE(&ipf_nat);
684		if (!error) {
685			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
686			if (error)
687				error = EFAULT;
688		}
689		break;
690	case SIOCSTLCK :
691		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
692		if (!error) {
693			error = IWCOPY((caddr_t)&fr_nat_lock, data,
694					sizeof(fr_nat_lock));
695			if (!error)
696				fr_nat_lock = arg;
697		} else
698			error = EFAULT;
699		break;
700	case SIOCSTPUT :
701		if (fr_nat_lock)
702			error = fr_natputent(data);
703		else
704			error = EACCES;
705		break;
706	case SIOCSTGSZ :
707		if (fr_nat_lock)
708			error = fr_natgetsz(data);
709		else
710			error = EACCES;
711		break;
712	case SIOCSTGET :
713		if (fr_nat_lock)
714			error = fr_natgetent(data);
715		else
716			error = EACCES;
717		break;
718	case FIONREAD :
719#ifdef	IPFILTER_LOG
720		arg = (int)iplused[IPL_LOGNAT];
721		MUTEX_DOWNGRADE(&ipf_nat);
722		error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
723		if (error)
724			error = EFAULT;
725#endif
726		break;
727	default :
728		error = EINVAL;
729		break;
730	}
731	if (getlock == 1)
732		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
733done:
734	if (nt)
735		KFREE(nt);
736	return error;
737}
738
739
740static int fr_natgetsz(data)
741caddr_t data;
742{
743	ap_session_t *aps;
744	nat_t *nat, *n;
745	int error = 0;
746	natget_t ng;
747
748	error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
749	if (error)
750		return EFAULT;
751
752	nat = ng.ng_ptr;
753	if (!nat) {
754		nat = nat_instances;
755		ng.ng_sz = 0;
756		if (nat == NULL) {
757			error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
758			if (error)
759				error = EFAULT;
760			return error;
761		}
762	} else {
763		/*
764		 * Make sure the pointer we're copying from exists in the
765		 * current list of entries.  Security precaution to prevent
766		 * copying of random kernel data.
767		 */
768		for (n = nat_instances; n; n = n->nat_next)
769			if (n == nat)
770				break;
771		if (!n)
772			return ESRCH;
773	}
774
775	ng.ng_sz = sizeof(nat_save_t);
776	aps = nat->nat_aps;
777	if ((aps != NULL) && (aps->aps_data != 0)) {
778		ng.ng_sz += sizeof(ap_session_t);
779		ng.ng_sz += aps->aps_psiz;
780	}
781
782	error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
783	if (error)
784		error = EFAULT;
785	return error;
786}
787
788
789static int fr_natgetent(data)
790caddr_t data;
791{
792	nat_save_t ipn, *ipnp, *ipnn = NULL;
793	register nat_t *n, *nat;
794	ap_session_t *aps;
795	int error;
796
797	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
798	if (error)
799		return EFAULT;
800	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
801	if (error)
802		return EFAULT;
803
804	nat = ipn.ipn_next;
805	if (!nat) {
806		nat = nat_instances;
807		if (nat == NULL) {
808			if (nat_instances == NULL)
809				return ENOENT;
810			return 0;
811		}
812	} else {
813		/*
814		 * Make sure the pointer we're copying from exists in the
815		 * current list of entries.  Security precaution to prevent
816		 * copying of random kernel data.
817		 */
818		for (n = nat_instances; n; n = n->nat_next)
819			if (n == nat)
820				break;
821		if (!n)
822			return ESRCH;
823	}
824
825	ipn.ipn_next = nat->nat_next;
826	ipn.ipn_dsize = 0;
827	bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
828	ipn.ipn_nat.nat_data = NULL;
829
830	if (nat->nat_ptr) {
831		bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
832		      sizeof(ipn.ipn_ipnat));
833	}
834
835	if (nat->nat_fr)
836		bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
837		      sizeof(ipn.ipn_rule));
838
839	if ((aps = nat->nat_aps)) {
840		ipn.ipn_dsize = sizeof(*aps);
841		if (aps->aps_data)
842			ipn.ipn_dsize += aps->aps_psiz;
843		KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
844		if (ipnn == NULL)
845			return ENOMEM;
846		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
847
848		bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
849		if (aps->aps_data) {
850			bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
851			      aps->aps_psiz);
852			ipnn->ipn_dsize += aps->aps_psiz;
853		}
854		error = IWCOPY((caddr_t)ipnn, ipnp,
855			       sizeof(ipn) + ipn.ipn_dsize);
856		if (error)
857			error = EFAULT;
858		KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
859	} else {
860		error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
861		if (error)
862			error = EFAULT;
863	}
864	return error;
865}
866
867
868static int fr_natputent(data)
869caddr_t data;
870{
871	nat_save_t ipn, *ipnp, *ipnn = NULL;
872	register nat_t *n, *nat;
873	ap_session_t *aps;
874	frentry_t *fr;
875	ipnat_t *in;
876
877	int error;
878
879	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
880	if (error)
881		return EFAULT;
882	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
883	if (error)
884		return EFAULT;
885	nat = NULL;
886	if (ipn.ipn_dsize) {
887		KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
888		if (ipnn == NULL)
889			return ENOMEM;
890		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
891		error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
892			       ipn.ipn_dsize);
893		if (error) {
894			error = EFAULT;
895			goto junkput;
896		}
897	} else
898		ipnn = NULL;
899
900	KMALLOC(nat, nat_t *);
901	if (nat == NULL) {
902		error = EFAULT;
903		goto junkput;
904	}
905
906	bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
907	/*
908	 * Initialize all these so that nat_delete() doesn't cause a crash.
909	 */
910	nat->nat_phnext[0] = NULL;
911	nat->nat_phnext[1] = NULL;
912	fr = nat->nat_fr;
913	nat->nat_fr = NULL;
914	aps = nat->nat_aps;
915	nat->nat_aps = NULL;
916	in = nat->nat_ptr;
917	nat->nat_ptr = NULL;
918	nat->nat_hm = NULL;
919	nat->nat_data = NULL;
920	nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
921
922	/*
923	 * Restore the rule associated with this nat session
924	 */
925	if (in) {
926		KMALLOC(in, ipnat_t *);
927		if (in == NULL) {
928			error = ENOMEM;
929			goto junkput;
930		}
931		nat->nat_ptr = in;
932		bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
933		in->in_use = 1;
934		in->in_flags |= IPN_DELETE;
935		in->in_next = NULL;
936		in->in_rnext = NULL;
937		in->in_prnext = NULL;
938		in->in_mnext = NULL;
939		in->in_pmnext = NULL;
940		in->in_ifp = GETUNIT(in->in_ifname, 4);
941		if (in->in_plabel[0] != '\0') {
942			in->in_apr = appr_lookup(in->in_p, in->in_plabel);
943		}
944	}
945
946	/*
947	 * Restore ap_session_t structure.  Include the private data allocated
948	 * if it was there.
949	 */
950	if (aps) {
951		KMALLOC(aps, ap_session_t *);
952		if (aps == NULL) {
953			error = ENOMEM;
954			goto junkput;
955		}
956		nat->nat_aps = aps;
957		aps->aps_next = ap_sess_list;
958		ap_sess_list = aps;
959		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
960		if (in)
961			aps->aps_apr = in->in_apr;
962		if (aps->aps_psiz) {
963			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
964			if (aps->aps_data == NULL) {
965				error = ENOMEM;
966				goto junkput;
967			}
968			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
969			      aps->aps_psiz);
970		} else {
971			aps->aps_psiz = 0;
972			aps->aps_data = NULL;
973		}
974	}
975
976	/*
977	 * If there was a filtering rule associated with this entry then
978	 * build up a new one.
979	 */
980	if (fr != NULL) {
981		if (nat->nat_flags & FI_NEWFR) {
982			KMALLOC(fr, frentry_t *);
983			nat->nat_fr = fr;
984			if (fr == NULL) {
985				error = ENOMEM;
986				goto junkput;
987			}
988			bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
989			ipn.ipn_nat.nat_fr = fr;
990			error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
991			if (error) {
992				error = EFAULT;
993				goto junkput;
994			}
995		} else {
996			for (n = nat_instances; n; n = n->nat_next)
997				if (n->nat_fr == fr)
998					break;
999			if (!n) {
1000				error = ESRCH;
1001				goto junkput;
1002			}
1003		}
1004	}
1005
1006	if (ipnn)
1007		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1008	nat_insert(nat);
1009	return 0;
1010junkput:
1011	if (ipnn)
1012		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1013	if (nat)
1014		nat_delete(nat);
1015	return error;
1016}
1017
1018
1019/*
1020 * Delete a nat entry from the various lists and table.
1021 */
1022static void nat_delete(natd)
1023struct nat *natd;
1024{
1025	struct ipnat *ipn;
1026
1027	if (natd->nat_flags & FI_WILDP)
1028		nat_stats.ns_wilds--;
1029	if (natd->nat_hnext[0])
1030		natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1031	*natd->nat_phnext[0] = natd->nat_hnext[0];
1032	if (natd->nat_hnext[1])
1033		natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1034	*natd->nat_phnext[1] = natd->nat_hnext[1];
1035	if (natd->nat_me != NULL)
1036		*natd->nat_me = NULL;
1037
1038	if (natd->nat_fr != NULL) {
1039		ATOMIC_DEC32(natd->nat_fr->fr_ref);
1040	}
1041
1042	if (natd->nat_hm != NULL)
1043		nat_hostmapdel(natd->nat_hm);
1044
1045	/*
1046	 * If there is an active reference from the nat entry to its parent
1047	 * rule, decrement the rule's reference count and free it too if no
1048	 * longer being used.
1049	 */
1050	ipn = natd->nat_ptr;
1051	if (ipn != NULL) {
1052		ipn->in_space++;
1053		ipn->in_use--;
1054		if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1055			if (ipn->in_apr)
1056				appr_free(ipn->in_apr);
1057			KFREE(ipn);
1058			nat_stats.ns_rules--;
1059		}
1060	}
1061
1062	MUTEX_DESTROY(&natd->nat_lock);
1063	/*
1064	 * If there's a fragment table entry too for this nat entry, then
1065	 * dereference that as well.
1066	 */
1067	ipfr_forget((void *)natd);
1068	aps_free(natd->nat_aps);
1069	nat_stats.ns_inuse--;
1070	KFREE(natd);
1071}
1072
1073
1074/*
1075 * nat_flushtable - clear the NAT table of all mapping entries.
1076 * (this is for the dynamic mappings)
1077 */
1078static int nat_flushtable()
1079{
1080	register nat_t *nat, **natp;
1081	register int j = 0;
1082
1083	/*
1084	 * ALL NAT mappings deleted, so lets just make the deletions
1085	 * quicker.
1086	 */
1087	if (nat_table[0] != NULL)
1088		bzero((char *)nat_table[0],
1089		      sizeof(nat_table[0]) * ipf_nattable_sz);
1090	if (nat_table[1] != NULL)
1091		bzero((char *)nat_table[1],
1092		      sizeof(nat_table[1]) * ipf_nattable_sz);
1093
1094	for (natp = &nat_instances; (nat = *natp); ) {
1095		*natp = nat->nat_next;
1096#ifdef	IPFILTER_LOG
1097		nat_log(nat, NL_FLUSH);
1098#endif
1099		nat_delete(nat);
1100		j++;
1101	}
1102	nat_stats.ns_inuse = 0;
1103	return j;
1104}
1105
1106
1107/*
1108 * nat_clearlist - delete all rules in the active NAT mapping list.
1109 * (this is for NAT/RDR rules)
1110 */
1111int nat_clearlist()
1112{
1113	register ipnat_t *n, **np = &nat_list;
1114	int i = 0;
1115
1116	if (nat_rules != NULL)
1117		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1118	if (rdr_rules != NULL)
1119		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1120
1121	while ((n = *np)) {
1122		*np = n->in_next;
1123		if (!n->in_use) {
1124			if (n->in_apr)
1125				appr_free(n->in_apr);
1126			KFREE(n);
1127			nat_stats.ns_rules--;
1128		} else {
1129			n->in_flags |= IPN_DELETE;
1130			n->in_next = NULL;
1131		}
1132		i++;
1133	}
1134	nat_masks = 0;
1135	rdr_masks = 0;
1136	return i;
1137}
1138
1139
1140/*
1141 * Create a new NAT table entry.
1142 * NOTE: Assumes write lock on ipf_nat has been obtained already.
1143 *       If you intend on changing this, beware: appr_new() may call nat_new()
1144 *       recursively!
1145 */
1146nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1147fr_info_t *fin;
1148ip_t *ip;
1149ipnat_t *np;
1150nat_t **natsave;
1151u_int flags;
1152int direction;
1153{
1154	register u_32_t sum1, sum2, sumd, l;
1155	u_short port = 0, sport = 0, dport = 0, nport = 0;
1156	struct in_addr in, inb;
1157	u_short nflags, sp, dp;
1158	tcphdr_t *tcp = NULL;
1159	hostmap_t *hm = NULL;
1160	nat_t *nat, *natl;
1161#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1162	qif_t *qf = fin->fin_qif;
1163#endif
1164
1165	nflags = flags & np->in_flags;
1166	if (flags & IPN_TCPUDP) {
1167		tcp = (tcphdr_t *)fin->fin_dp;
1168		sport = htons(fin->fin_data[0]);
1169		dport = htons(fin->fin_data[1]);
1170	}
1171
1172	/* Give me a new nat */
1173	KMALLOC(nat, nat_t *);
1174	if (nat == NULL) {
1175		nat_stats.ns_memfail++;
1176		return NULL;
1177	}
1178
1179	bzero((char *)nat, sizeof(*nat));
1180	nat->nat_flags = flags;
1181	if (flags & FI_WILDP)
1182		nat_stats.ns_wilds++;
1183	/*
1184	 * Search the current table for a match.
1185	 */
1186	if (direction == NAT_OUTBOUND) {
1187		/*
1188		 * Values at which the search for a free resouce starts.
1189		 */
1190		u_32_t st_ip;
1191		u_short st_port;
1192
1193		/*
1194		 * If it's an outbound packet which doesn't match any existing
1195		 * record, then create a new port
1196		 */
1197		l = 0;
1198		st_ip = np->in_nip;
1199		st_port = np->in_pnext;
1200
1201		do {
1202			port = 0;
1203			in.s_addr = htonl(np->in_nip);
1204			if (l == 0) {
1205				/*
1206				 * Check to see if there is an existing NAT
1207				 * setup for this IP address pair.
1208				 */
1209				hm = nat_hostmap(np, fin->fin_src, in);
1210				if (hm != NULL)
1211					in.s_addr = hm->hm_mapip.s_addr;
1212			} else if ((l == 1) && (hm != NULL)) {
1213				nat_hostmapdel(hm);
1214				hm = NULL;
1215			}
1216			in.s_addr = ntohl(in.s_addr);
1217
1218			nat->nat_hm = hm;
1219
1220			if ((np->in_outmsk == 0xffffffff) &&
1221			    (np->in_pnext == 0)) {
1222				if (l > 0)
1223					goto badnat;
1224			}
1225
1226			if (np->in_redir & NAT_MAPBLK) {
1227				if ((l >= np->in_ppip) || ((l > 0) &&
1228				     !(flags & IPN_TCPUDP)))
1229					goto badnat;
1230				/*
1231				 * map-block - Calculate destination address.
1232				 */
1233				in.s_addr = ntohl(fin->fin_saddr);
1234				in.s_addr &= ntohl(~np->in_inmsk);
1235				inb.s_addr = in.s_addr;
1236				in.s_addr /= np->in_ippip;
1237				in.s_addr &= ntohl(~np->in_outmsk);
1238				in.s_addr += ntohl(np->in_outip);
1239				/*
1240				 * Calculate destination port.
1241				 */
1242				if ((flags & IPN_TCPUDP) &&
1243				    (np->in_ppip != 0)) {
1244					port = ntohs(sport) + l;
1245					port %= np->in_ppip;
1246					port += np->in_ppip *
1247						(inb.s_addr % np->in_ippip);
1248					port += MAPBLK_MINPORT;
1249					port = htons(port);
1250				}
1251			} else if (!np->in_outip &&
1252				   (np->in_outmsk == 0xffffffff)) {
1253				/*
1254				 * 0/32 - use the interface's IP address.
1255				 */
1256				if ((l > 0) ||
1257				    fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1258					goto badnat;
1259				in.s_addr = ntohl(in.s_addr);
1260			} else if (!np->in_outip && !np->in_outmsk) {
1261				/*
1262				 * 0/0 - use the original source address/port.
1263				 */
1264				if (l > 0)
1265					goto badnat;
1266				in.s_addr = ntohl(fin->fin_saddr);
1267			} else if ((np->in_outmsk != 0xffffffff) &&
1268				   (np->in_pnext == 0) &&
1269				   ((l > 0) || (hm == NULL)))
1270				np->in_nip++;
1271			natl = NULL;
1272
1273			if ((nflags & IPN_TCPUDP) &&
1274			    ((np->in_redir & NAT_MAPBLK) == 0) &&
1275			    (np->in_flags & IPN_AUTOPORTMAP)) {
1276				if ((l > 0) && (l % np->in_ppip == 0)) {
1277					if (l > np->in_space) {
1278						goto badnat;
1279					} else if ((l > np->in_ppip) &&
1280						   np->in_outmsk != 0xffffffff)
1281						np->in_nip++;
1282				}
1283				if (np->in_ppip != 0) {
1284					port = ntohs(sport);
1285					port += (l % np->in_ppip);
1286					port %= np->in_ppip;
1287					port += np->in_ppip *
1288						(ntohl(fin->fin_saddr) %
1289						 np->in_ippip);
1290					port += MAPBLK_MINPORT;
1291					port = htons(port);
1292				}
1293			} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1294				   (nflags & IPN_TCPUDP) &&
1295				   (np->in_pnext != 0)) {
1296				port = htons(np->in_pnext++);
1297				if (np->in_pnext > ntohs(np->in_pmax)) {
1298					np->in_pnext = ntohs(np->in_pmin);
1299					if (np->in_outmsk != 0xffffffff)
1300						np->in_nip++;
1301				}
1302			}
1303
1304			if (np->in_flags & IPN_IPRANGE) {
1305				if (np->in_nip > ntohl(np->in_outmsk))
1306					np->in_nip = ntohl(np->in_outip);
1307			} else {
1308				if ((np->in_outmsk != 0xffffffff) &&
1309				    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1310				    ntohl(np->in_outip))
1311					np->in_nip = ntohl(np->in_outip) + 1;
1312			}
1313
1314			if (!port && (flags & IPN_TCPUDP))
1315				port = sport;
1316
1317			/*
1318			 * Here we do a lookup of the connection as seen from
1319			 * the outside.  If an IP# pair already exists, try
1320			 * again.  So if you have A->B becomes C->B, you can
1321			 * also have D->E become C->E but not D->B causing
1322			 * another C->B.  Also take protocol and ports into
1323			 * account when determining whether a pre-existing
1324			 * NAT setup will cause an external conflict where
1325			 * this is appropriate.
1326			 */
1327			inb.s_addr = htonl(in.s_addr);
1328			sp = fin->fin_data[0];
1329			dp = fin->fin_data[1];
1330			fin->fin_data[0] = fin->fin_data[1];
1331			fin->fin_data[1] = htons(port);
1332			natl = nat_inlookup(fin, flags & ~FI_WILDP,
1333					    (u_int)fin->fin_p, fin->fin_dst,
1334					    inb, 1);
1335			fin->fin_data[0] = sp;
1336			fin->fin_data[1] = dp;
1337
1338			/*
1339			 * Has the search wrapped around and come back to the
1340			 * start ?
1341			 */
1342			if ((natl != NULL) &&
1343			    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1344			    (np->in_nip != 0) && (st_ip == np->in_nip))
1345				goto badnat;
1346			l++;
1347		} while (natl != NULL);
1348
1349		if (np->in_space > 0)
1350			np->in_space--;
1351
1352		/* Setup the NAT table */
1353		nat->nat_inip = fin->fin_src;
1354		nat->nat_outip.s_addr = htonl(in.s_addr);
1355		nat->nat_oip = fin->fin_dst;
1356		if (nat->nat_hm == NULL)
1357			nat->nat_hm = nat_hostmap(np, fin->fin_src,
1358						  nat->nat_outip);
1359
1360		sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1361		sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1362
1363		if (flags & IPN_TCPUDP) {
1364			nat->nat_inport = sport;
1365			nat->nat_outport = port;	/* sport */
1366			nat->nat_oport = dport;
1367		}
1368	} else {
1369		/*
1370		 * Otherwise, it's an inbound packet. Most likely, we don't
1371		 * want to rewrite source ports and source addresses. Instead,
1372		 * we want to rewrite to a fixed internal address and fixed
1373		 * internal port.
1374		 */
1375		if (np->in_flags & IPN_SPLIT) {
1376			in.s_addr = np->in_nip;
1377			if (np->in_inip == htonl(in.s_addr))
1378				np->in_nip = ntohl(np->in_inmsk);
1379			else {
1380				np->in_nip = ntohl(np->in_inip);
1381				if (np->in_flags & IPN_ROUNDR) {
1382					nat_delrdr(np);
1383					nat_addrdr(np);
1384				}
1385			}
1386		} else {
1387			in.s_addr = ntohl(np->in_inip);
1388			if (np->in_flags & IPN_ROUNDR) {
1389				nat_delrdr(np);
1390				nat_addrdr(np);
1391			}
1392		}
1393		if (!np->in_pnext)
1394			nport = dport;
1395		else {
1396			/*
1397			 * Whilst not optimized for the case where
1398			 * pmin == pmax, the gain is not significant.
1399			 */
1400			if (np->in_pmin != np->in_pmax) {
1401				nport = ntohs(dport) - ntohs(np->in_pmin) +
1402					ntohs(np->in_pnext);
1403				nport = ntohs(nport);
1404			} else
1405				nport = np->in_pnext;
1406		}
1407
1408		/*
1409		 * When the redirect-to address is set to 0.0.0.0, just
1410		 * assume a blank `forwarding' of the packet.
1411		 */
1412		if (in.s_addr == 0)
1413			in.s_addr = ntohl(fin->fin_daddr);
1414
1415		nat->nat_inip.s_addr = htonl(in.s_addr);
1416		nat->nat_outip = fin->fin_dst;
1417		nat->nat_oip = fin->fin_src;
1418
1419		sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1420		sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1421
1422		if (flags & IPN_TCPUDP) {
1423			nat->nat_inport = nport;
1424			nat->nat_outport = dport;
1425			nat->nat_oport = sport;
1426		}
1427	}
1428
1429	CALC_SUMD(sum1, sum2, sumd);
1430	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1431#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1432	if ((flags & IPN_TCPUDP) && dohwcksum &&
1433	    (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1434		if (direction == NAT_OUTBOUND)
1435			sum1 = LONG_SUM(ntohl(in.s_addr));
1436		else
1437			sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1438		sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1439		sum1 += IPPROTO_TCP;
1440		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1441		nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1442	} else
1443#endif
1444		nat->nat_sumd[1] = nat->nat_sumd[0];
1445
1446	if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1447		if (direction == NAT_OUTBOUND)
1448			sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1449		else
1450			sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1451
1452		sum2 = LONG_SUM(in.s_addr);
1453
1454		CALC_SUMD(sum1, sum2, sumd);
1455		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1456	} else
1457		nat->nat_ipsumd = nat->nat_sumd[0];
1458
1459	in.s_addr = htonl(in.s_addr);
1460
1461	strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1462
1463	nat->nat_me = natsave;
1464	nat->nat_dir = direction;
1465	nat->nat_ifp = fin->fin_ifp;
1466	nat->nat_ptr = np;
1467	nat->nat_p = fin->fin_p;
1468	nat->nat_bytes = 0;
1469	nat->nat_pkts = 0;
1470	nat->nat_fr = fin->fin_fr;
1471	if (nat->nat_fr != NULL) {
1472		ATOMIC_INC32(nat->nat_fr->fr_ref);
1473	}
1474	if (direction == NAT_OUTBOUND) {
1475		if (flags & IPN_TCPUDP)
1476			tcp->th_sport = port;
1477	} else {
1478		if (flags & IPN_TCPUDP)
1479			tcp->th_dport = nport;
1480	}
1481
1482	nat_insert(nat);
1483
1484	if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1485	    (tcp != NULL && dport == np->in_dport)))
1486		(void) appr_new(fin, ip, nat);
1487
1488	np->in_use++;
1489#ifdef	IPFILTER_LOG
1490	nat_log(nat, (u_int)np->in_redir);
1491#endif
1492	return nat;
1493badnat:
1494	nat_stats.ns_badnat++;
1495	if ((hm = nat->nat_hm) != NULL)
1496		nat_hostmapdel(hm);
1497	KFREE(nat);
1498	return NULL;
1499}
1500
1501
1502/*
1503 * Insert a NAT entry into the hash tables for searching and add it to the
1504 * list of active NAT entries.  Adjust global counters when complete.
1505 */
1506void	nat_insert(nat)
1507nat_t	*nat;
1508{
1509	u_int hv1, hv2;
1510	nat_t **natp;
1511
1512	MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1513
1514	nat->nat_age = fr_defnatage;
1515	nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1516	if (nat->nat_ifname[0] !='\0') {
1517		nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1518	}
1519
1520	nat->nat_next = nat_instances;
1521	nat_instances = nat;
1522
1523	if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1524		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1525				  0xffffffff);
1526		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1527				  ipf_nattable_sz);
1528		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1529				  0xffffffff);
1530		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1531				 ipf_nattable_sz);
1532	} else {
1533		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1534				  ipf_nattable_sz);
1535		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1536				  ipf_nattable_sz);
1537	}
1538
1539	natp = &nat_table[0][hv1];
1540	if (*natp)
1541		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1542	nat->nat_phnext[0] = natp;
1543	nat->nat_hnext[0] = *natp;
1544	*natp = nat;
1545
1546	natp = &nat_table[1][hv2];
1547	if (*natp)
1548		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1549	nat->nat_phnext[1] = natp;
1550	nat->nat_hnext[1] = *natp;
1551	*natp = nat;
1552
1553	nat_stats.ns_added++;
1554	nat_stats.ns_inuse++;
1555}
1556
1557
1558nat_t *nat_icmplookup(ip, fin, dir)
1559ip_t *ip;
1560fr_info_t *fin;
1561int dir;
1562{
1563	icmphdr_t *icmp;
1564	tcphdr_t *tcp = NULL;
1565	ip_t *oip;
1566	int flags = 0, type, minlen;
1567
1568	icmp = (icmphdr_t *)fin->fin_dp;
1569	/*
1570	 * Does it at least have the return (basic) IP header ?
1571	 * Only a basic IP header (no options) should be with an ICMP error
1572	 * header.
1573	 */
1574	if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1575		return NULL;
1576	type = icmp->icmp_type;
1577	/*
1578	 * If it's not an error type, then return.
1579	 */
1580	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1581	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1582	    (type != ICMP_PARAMPROB))
1583		return NULL;
1584
1585	oip = (ip_t *)((char *)fin->fin_dp + 8);
1586	minlen = (oip->ip_hl << 2);
1587	if (minlen < sizeof(ip_t))
1588		return NULL;
1589	if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1590		return NULL;
1591	/*
1592	 * Is the buffer big enough for all of it ?  It's the size of the IP
1593	 * header claimed in the encapsulated part which is of concern.  It
1594	 * may be too big to be in this buffer but not so big that it's
1595	 * outside the ICMP packet, leading to TCP deref's causing problems.
1596	 * This is possible because we don't know how big oip_hl is when we
1597	 * do the pullup early in fr_check() and thus can't gaurantee it is
1598	 * all here now.
1599	 */
1600#ifdef  _KERNEL
1601	{
1602	mb_t *m;
1603
1604# if SOLARIS
1605	m = fin->fin_qfm;
1606	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1607		return NULL;
1608# else
1609	m = *(mb_t **)fin->fin_mp;
1610	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1611	    (char *)ip + m->m_len)
1612		return NULL;
1613# endif
1614	}
1615#endif
1616
1617	if (oip->ip_p == IPPROTO_TCP)
1618		flags = IPN_TCP;
1619	else if (oip->ip_p == IPPROTO_UDP)
1620		flags = IPN_UDP;
1621	if (flags & IPN_TCPUDP) {
1622		u_short	data[2];
1623		nat_t *nat;
1624
1625		minlen += 8;		/* + 64bits of data to get ports */
1626		if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1627			return NULL;
1628
1629		data[0] = fin->fin_data[0];
1630		data[1] = fin->fin_data[1];
1631		tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1632		fin->fin_data[0] = ntohs(tcp->th_dport);
1633		fin->fin_data[1] = ntohs(tcp->th_sport);
1634
1635		if (dir == NAT_INBOUND) {
1636			nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1637					    oip->ip_dst, oip->ip_src, 0);
1638		} else {
1639			nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1640					    oip->ip_dst, oip->ip_src, 0);
1641		}
1642		fin->fin_data[0] = data[0];
1643		fin->fin_data[1] = data[1];
1644		return nat;
1645	}
1646	if (dir == NAT_INBOUND)
1647		return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1648				    oip->ip_dst, oip->ip_src, 0);
1649	else
1650		return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1651				    oip->ip_dst, oip->ip_src, 0);
1652}
1653
1654
1655/*
1656 * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1657 * packet gets correctly recognised.
1658 */
1659nat_t *nat_icmp(ip, fin, nflags, dir)
1660ip_t *ip;
1661fr_info_t *fin;
1662u_int *nflags;
1663int dir;
1664{
1665	u_32_t sum1, sum2, sumd, sumd2 = 0;
1666	struct in_addr in;
1667	int flags, dlen;
1668	icmphdr_t *icmp;
1669	udphdr_t *udp;
1670	tcphdr_t *tcp;
1671	nat_t *nat;
1672	ip_t *oip;
1673
1674	if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1675		return NULL;
1676	/*
1677	 * nat_icmplookup() will return NULL for `defective' packets.
1678	 */
1679	if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1680		return NULL;
1681
1682	flags = 0;
1683	*nflags = IPN_ICMPERR;
1684	icmp = (icmphdr_t *)fin->fin_dp;
1685	oip = (ip_t *)&icmp->icmp_ip;
1686	if (oip->ip_p == IPPROTO_TCP)
1687		flags = IPN_TCP;
1688	else if (oip->ip_p == IPPROTO_UDP)
1689		flags = IPN_UDP;
1690	udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1691	dlen = ip->ip_len - ((char *)udp - (char *)ip);
1692	/*
1693	 * XXX - what if this is bogus hl and we go off the end ?
1694	 * In this case, nat_icmplookup() will have returned NULL.
1695	 */
1696	tcp = (tcphdr_t *)udp;
1697
1698	/*
1699	 * Need to adjust ICMP header to include the real IP#'s and
1700	 * port #'s.  Only apply a checksum change relative to the
1701	 * IP address change as it will be modified again in ip_natout
1702	 * for both address and port.  Two checksum changes are
1703	 * necessary for the two header address changes.  Be careful
1704	 * to only modify the checksum once for the port # and twice
1705	 * for the IP#.
1706	 */
1707
1708	/*
1709	 * Step 1
1710	 * Fix the IP addresses in the offending IP packet. You also need
1711	 * to adjust the IP header checksum of that offending IP packet
1712	 * and the ICMP checksum of the ICMP error message itself.
1713	 *
1714	 * Unfortunately, for UDP and TCP, the IP addresses are also contained
1715	 * in the pseudo header that is used to compute the UDP resp. TCP
1716	 * checksum. So, we must compensate that as well. Even worse, the
1717	 * change in the UDP and TCP checksums require yet another
1718	 * adjustment of the ICMP checksum of the ICMP error message.
1719	 *
1720	 */
1721
1722	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1723		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1724		in = nat->nat_inip;
1725		oip->ip_src = in;
1726	} else {
1727		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1728		in = nat->nat_outip;
1729		oip->ip_dst = in;
1730	}
1731
1732	sum2 = LONG_SUM(ntohl(in.s_addr));
1733
1734	CALC_SUMD(sum1, sum2, sumd);
1735
1736	if (nat->nat_dir == NAT_OUTBOUND) {
1737		/*
1738		 * Fix IP checksum of the offending IP packet to adjust for
1739		 * the change in the IP address.
1740		 *
1741		 * Normally, you would expect that the ICMP checksum of the
1742		 * ICMP error message needs to be adjusted as well for the
1743		 * IP address change in oip.
1744		 * However, this is a NOP, because the ICMP checksum is
1745		 * calculated over the complete ICMP packet, which includes the
1746		 * changed oip IP addresses and oip->ip_sum. However, these
1747		 * two changes cancel each other out (if the delta for
1748		 * the IP address is x, then the delta for ip_sum is minus x),
1749		 * so no change in the icmp_cksum is necessary.
1750		 *
1751		 * Be careful that nat_dir refers to the direction of the
1752		 * offending IP packet (oip), not to its ICMP response (icmp)
1753		 */
1754		fix_datacksum(&oip->ip_sum, sumd);
1755
1756		/*
1757		 * Fix UDP pseudo header checksum to compensate for the
1758		 * IP address change.
1759		 */
1760		if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1761			/*
1762			 * The UDP checksum is optional, only adjust it
1763			 * if it has been set.
1764			 */
1765			sum1 = ntohs(udp->uh_sum);
1766			fix_datacksum(&udp->uh_sum, sumd);
1767			sum2 = ntohs(udp->uh_sum);
1768
1769			/*
1770			 * Fix ICMP checksum to compensate the UDP
1771			 * checksum adjustment.
1772			 */
1773			CALC_SUMD(sum1, sum2, sumd);
1774			sumd2 = sumd;
1775		}
1776
1777		/*
1778		 * Fix TCP pseudo header checksum to compensate for the
1779		 * IP address change. Before we can do the change, we
1780		 * must make sure that oip is sufficient large to hold
1781		 * the TCP checksum (normally it does not!).
1782		 */
1783		if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1784
1785			sum1 = ntohs(tcp->th_sum);
1786			fix_datacksum(&tcp->th_sum, sumd);
1787			sum2 = ntohs(tcp->th_sum);
1788
1789			/*
1790			 * Fix ICMP checksum to compensate the TCP
1791			 * checksum adjustment.
1792			 */
1793			CALC_SUMD(sum1, sum2, sumd);
1794			sumd2 = sumd;
1795		}
1796	} else {
1797
1798		/*
1799		 * Fix IP checksum of the offending IP packet to adjust for
1800		 * the change in the IP address.
1801		 *
1802		 * Normally, you would expect that the ICMP checksum of the
1803		 * ICMP error message needs to be adjusted as well for the
1804		 * IP address change in oip.
1805		 * However, this is a NOP, because the ICMP checksum is
1806		 * calculated over the complete ICMP packet, which includes the
1807		 * changed oip IP addresses and oip->ip_sum. However, these
1808		 * two changes cancel each other out (if the delta for
1809		 * the IP address is x, then the delta for ip_sum is minus x),
1810		 * so no change in the icmp_cksum is necessary.
1811		 *
1812		 * Be careful that nat_dir refers to the direction of the
1813		 * offending IP packet (oip), not to its ICMP response (icmp)
1814		 */
1815		fix_datacksum(&oip->ip_sum, sumd);
1816
1817/* XXX FV : without having looked at Solaris source code, it seems unlikely
1818 * that SOLARIS would compensate this in the kernel (a body of an IP packet
1819 * in the data section of an ICMP packet). I have the feeling that this should
1820 * be unconditional, but I'm not in a position to check.
1821 */
1822#if !SOLARIS && !defined(__sgi)
1823		/*
1824		 * Fix UDP pseudo header checksum to compensate for the
1825		 * IP address change.
1826		 */
1827		if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1828			/*
1829			 * The UDP checksum is optional, only adjust it
1830			 * if it has been set
1831			 */
1832			sum1 = ntohs(udp->uh_sum);
1833			fix_datacksum(&udp->uh_sum, sumd);
1834			sum2 = ntohs(udp->uh_sum);
1835
1836			/*
1837			 * Fix ICMP checksum to compensate the UDP
1838			 * checksum adjustment.
1839			 */
1840			CALC_SUMD(sum1, sum2, sumd);
1841			sumd2 = sumd;
1842		}
1843
1844		/*
1845		 * Fix TCP pseudo header checksum to compensate for the
1846		 * IP address change. Before we can do the change, we
1847		 * must make sure that oip is sufficient large to hold
1848		 * the TCP checksum (normally it does not!).
1849		 */
1850		if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1851
1852			sum1 = ntohs(tcp->th_sum);
1853			fix_datacksum(&tcp->th_sum, sumd);
1854			sum2 = ntohs(tcp->th_sum);
1855
1856			/*
1857			 * Fix ICMP checksum to compensate the TCP
1858			 * checksum adjustment.
1859			 */
1860			CALC_SUMD(sum1, sum2, sumd);
1861			sumd2 = sumd;
1862		}
1863#endif
1864	}
1865
1866	if ((flags & IPN_TCPUDP) != 0) {
1867		/*
1868		 * Step 2 :
1869		 * For offending TCP/UDP IP packets, translate the ports as
1870		 * well, based on the NAT specification. Of course such
1871		 * a change must be reflected in the ICMP checksum as well.
1872		 *
1873		 * Advance notice : Now it becomes complicated :-)
1874		 *
1875		 * Since the port fields are part of the TCP/UDP checksum
1876		 * of the offending IP packet, you need to adjust that checksum
1877		 * as well... but, if you change, you must change the icmp
1878		 * checksum *again*, to reflect that change.
1879		 *
1880		 * To further complicate: the TCP checksum is not in the first
1881		 * 8 bytes of the offending ip packet, so it most likely is not
1882		 * available. Some OSses like Solaris return enough bytes to
1883		 * include the TCP checksum. So we have to check if the
1884		 * ip->ip_len actually holds the TCP checksum of the oip!
1885		 */
1886
1887		if (nat->nat_oport == tcp->th_dport) {
1888			if (tcp->th_sport != nat->nat_inport) {
1889				/*
1890				 * Fix ICMP checksum to compensate port
1891				 * adjustment.
1892				 */
1893				sum1 = ntohs(tcp->th_sport);
1894				sum2 = ntohs(nat->nat_inport);
1895				CALC_SUMD(sum1, sum2, sumd);
1896				sumd2 += sumd;
1897				tcp->th_sport = nat->nat_inport;
1898
1899				/*
1900				 * Fix udp checksum to compensate port
1901				 * adjustment.  NOTE : the offending IP packet
1902				 * flows the other direction compared to the
1903				 * ICMP message.
1904				 *
1905				 * The UDP checksum is optional, only adjust
1906				 * it if it has been set.
1907				 */
1908				if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1909
1910					sum1 = ntohs(udp->uh_sum);
1911					fix_datacksum(&udp->uh_sum, sumd);
1912					sum2 = ntohs(udp->uh_sum);
1913
1914					/*
1915					 * Fix ICMP checksum to
1916					 * compensate UDP checksum
1917					 * adjustment.
1918					 */
1919					CALC_SUMD(sum1, sum2, sumd);
1920					sumd2 += sumd;
1921				}
1922
1923				/*
1924				 * Fix tcp checksum (if present) to compensate
1925				 * port adjustment. NOTE : the offending IP
1926				 * packet flows the other direction compared to
1927				 * the ICMP message.
1928				 */
1929				if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1930
1931					sum1 = ntohs(tcp->th_sum);
1932					fix_datacksum(&tcp->th_sum, sumd);
1933					sum2 = ntohs(tcp->th_sum);
1934
1935					/*
1936					 * Fix ICMP checksum to
1937					 * compensate TCP checksum
1938					 * adjustment.
1939					 */
1940					CALC_SUMD(sum1, sum2, sumd);
1941					sumd2 += sumd;
1942				}
1943			}
1944		} else {
1945			if (tcp->th_dport != nat->nat_outport) {
1946				/*
1947				 * Fix ICMP checksum to compensate port
1948				 * adjustment.
1949				 */
1950				sum1 = ntohs(tcp->th_dport);
1951				sum2 = ntohs(nat->nat_outport);
1952				CALC_SUMD(sum1, sum2, sumd);
1953				sumd2 += sumd;
1954				tcp->th_dport = nat->nat_outport;
1955
1956				/*
1957				 * Fix udp checksum to compensate port
1958				 * adjustment.   NOTE : the offending IP
1959				 * packet flows the other direction compared
1960				 * to the ICMP message.
1961				 *
1962				 * The UDP checksum is optional, only adjust
1963				 * it if it has been set.
1964				 */
1965				if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1966
1967					sum1 = ntohs(udp->uh_sum);
1968					fix_datacksum(&udp->uh_sum, sumd);
1969					sum2 = ntohs(udp->uh_sum);
1970
1971					/*
1972					 * Fix ICMP checksum to compensate
1973					 * UDP checksum adjustment.
1974					 */
1975					CALC_SUMD(sum1, sum2, sumd);
1976					sumd2 += sumd;
1977				}
1978
1979				/*
1980				 * Fix tcp checksum (if present) to compensate
1981				 * port adjustment. NOTE : the offending IP
1982				 * packet flows the other direction compared to
1983				 * the ICMP message.
1984				 */
1985				if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1986
1987					sum1 = ntohs(tcp->th_sum);
1988					fix_datacksum(&tcp->th_sum, sumd);
1989					sum2 = ntohs(tcp->th_sum);
1990
1991					/*
1992					 * Fix ICMP checksum to compensate
1993					 * UDP checksum adjustment.
1994					 */
1995					CALC_SUMD(sum1, sum2, sumd);
1996					sumd2 += sumd;
1997				}
1998			}
1999		}
2000		if (sumd2) {
2001			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2002			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2003			if (nat->nat_dir == NAT_OUTBOUND) {
2004				fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
2005			} else {
2006				fix_incksum(fin, &icmp->icmp_cksum, sumd2);
2007			}
2008		}
2009	}
2010	if (oip->ip_p == IPPROTO_ICMP)
2011		nat->nat_age = fr_defnaticmpage;
2012	return nat;
2013}
2014
2015
2016/*
2017 * NB: these lookups don't lock access to the list, it assume it has already
2018 * been done!
2019 */
2020/*
2021 * Lookup a nat entry based on the mapped destination ip address/port and
2022 * real source address/port.  We use this lookup when receiving a packet,
2023 * we're looking for a table entry, based on the destination address.
2024 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2025 */
2026nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2027fr_info_t *fin;
2028register u_int flags, p;
2029struct in_addr src , mapdst;
2030int rw;
2031{
2032	register u_short sport, dport;
2033	register nat_t *nat;
2034	register int nflags;
2035	register u_32_t dst;
2036	ipnat_t *ipn;
2037	void *ifp;
2038	u_int hv;
2039
2040	if (fin != NULL)
2041		ifp = fin->fin_ifp;
2042	else
2043		ifp = NULL;
2044	dst = mapdst.s_addr;
2045	if (flags & IPN_TCPUDP) {
2046		sport = htons(fin->fin_data[0]);
2047		dport = htons(fin->fin_data[1]);
2048	} else {
2049		sport = 0;
2050		dport = 0;
2051	}
2052
2053	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2054	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2055	nat = nat_table[1][hv];
2056	for (; nat; nat = nat->nat_hnext[1]) {
2057		nflags = nat->nat_flags;
2058		if ((!ifp || ifp == nat->nat_ifp) &&
2059		    nat->nat_oip.s_addr == src.s_addr &&
2060		    nat->nat_outip.s_addr == dst &&
2061		    ((p == 0) || (p == nat->nat_p))) {
2062			switch (p)
2063			{
2064			case IPPROTO_TCP :
2065			case IPPROTO_UDP :
2066				if (nat->nat_oport != sport)
2067					continue;
2068				if (nat->nat_outport != dport)
2069					continue;
2070				break;
2071			default :
2072				break;
2073			}
2074
2075			ipn = nat->nat_ptr;
2076			if ((ipn != NULL) && (nat->nat_aps != NULL))
2077				if (appr_match(fin, nat) != 0)
2078					continue;
2079			return nat;
2080		}
2081	}
2082	if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2083		return NULL;
2084	if (!rw) {
2085		RWLOCK_EXIT(&ipf_nat);
2086	}
2087	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2088	hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2089	if (!rw) {
2090		WRITE_ENTER(&ipf_nat);
2091	}
2092	nat = nat_table[1][hv];
2093	for (; nat; nat = nat->nat_hnext[1]) {
2094		nflags = nat->nat_flags;
2095		if (ifp && ifp != nat->nat_ifp)
2096			continue;
2097		if (!(nflags & FI_WILDP))
2098			continue;
2099		if (nat->nat_oip.s_addr != src.s_addr ||
2100		    nat->nat_outip.s_addr != dst)
2101			continue;
2102		if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2103		    ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2104			nat_tabmove(fin, nat);
2105			break;
2106		}
2107	}
2108	if (!rw) {
2109		MUTEX_DOWNGRADE(&ipf_nat);
2110	}
2111	return nat;
2112}
2113
2114
2115/*
2116 * This function is only called for TCP/UDP NAT table entries where the
2117 * original was placed in the table without hashing on the ports and we now
2118 * want to include hashing on port numbers.
2119 */
2120static void nat_tabmove(fin, nat)
2121fr_info_t *fin;
2122nat_t *nat;
2123{
2124	register u_short sport, dport;
2125	u_int hv, nflags;
2126	nat_t **natp;
2127
2128	nflags = nat->nat_flags;
2129
2130	sport = ntohs(fin->fin_data[0]);
2131	dport = ntohs(fin->fin_data[1]);
2132
2133	/*
2134	 * Remove the NAT entry from the old location
2135	 */
2136	if (nat->nat_hnext[0])
2137		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2138	*nat->nat_phnext[0] = nat->nat_hnext[0];
2139
2140	if (nat->nat_hnext[1])
2141		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2142	*nat->nat_phnext[1] = nat->nat_hnext[1];
2143
2144	/*
2145	 * Add into the NAT table in the new position
2146	 */
2147	hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2148	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2149	natp = &nat_table[0][hv];
2150	if (*natp)
2151		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2152	nat->nat_phnext[0] = natp;
2153	nat->nat_hnext[0] = *natp;
2154	*natp = nat;
2155
2156	hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2157	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2158	natp = &nat_table[1][hv];
2159	if (*natp)
2160		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2161	nat->nat_phnext[1] = natp;
2162	nat->nat_hnext[1] = *natp;
2163	*natp = nat;
2164}
2165
2166
2167/*
2168 * Lookup a nat entry based on the source 'real' ip address/port and
2169 * destination address/port.  We use this lookup when sending a packet out,
2170 * we're looking for a table entry, based on the source address.
2171 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2172 */
2173nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2174fr_info_t *fin;
2175register u_int flags, p;
2176struct in_addr src , dst;
2177int rw;
2178{
2179	register u_short sport, dport;
2180	register nat_t *nat;
2181	register int nflags;
2182	ipnat_t *ipn;
2183	u_32_t srcip;
2184	void *ifp;
2185	u_int hv;
2186
2187	ifp = fin->fin_ifp;
2188	srcip = src.s_addr;
2189	if (flags & IPN_TCPUDP) {
2190		sport = ntohs(fin->fin_data[0]);
2191		dport = ntohs(fin->fin_data[1]);
2192	} else {
2193		sport = 0;
2194		dport = 0;
2195	}
2196
2197	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2198	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2199	nat = nat_table[0][hv];
2200	for (; nat; nat = nat->nat_hnext[0]) {
2201		nflags = nat->nat_flags;
2202
2203		if ((!ifp || ifp == nat->nat_ifp) &&
2204		    nat->nat_inip.s_addr == srcip &&
2205		    nat->nat_oip.s_addr == dst.s_addr &&
2206		    ((p == 0) || (p == nat->nat_p))) {
2207			switch (p)
2208			{
2209			case IPPROTO_TCP :
2210			case IPPROTO_UDP :
2211				if (nat->nat_oport != dport)
2212					continue;
2213				if (nat->nat_inport != sport)
2214					continue;
2215				break;
2216			default :
2217				break;
2218			}
2219
2220			ipn = nat->nat_ptr;
2221			if ((ipn != NULL) && (nat->nat_aps != NULL))
2222				if (appr_match(fin, nat) != 0)
2223					continue;
2224			return nat;
2225		}
2226	}
2227	if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2228		return NULL;
2229	if (!rw) {
2230		RWLOCK_EXIT(&ipf_nat);
2231	}
2232
2233	hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2234	if (!rw) {
2235		WRITE_ENTER(&ipf_nat);
2236	}
2237	nat = nat_table[0][hv];
2238	for (; nat; nat = nat->nat_hnext[0]) {
2239		nflags = nat->nat_flags;
2240		if (ifp && ifp != nat->nat_ifp)
2241			continue;
2242		if (!(nflags & FI_WILDP))
2243			continue;
2244		if ((nat->nat_inip.s_addr != srcip) ||
2245		    (nat->nat_oip.s_addr != dst.s_addr))
2246			continue;
2247		if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2248		    ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2249			nat_tabmove(fin, nat);
2250			break;
2251		}
2252	}
2253	if (!rw) {
2254		MUTEX_DOWNGRADE(&ipf_nat);
2255	}
2256	return nat;
2257}
2258
2259
2260/*
2261 * Lookup the NAT tables to search for a matching redirect
2262 */
2263nat_t *nat_lookupredir(np)
2264register natlookup_t *np;
2265{
2266	nat_t *nat;
2267	fr_info_t fi;
2268
2269	bzero((char *)&fi, sizeof(fi));
2270	fi.fin_data[0] = np->nl_inport;
2271	fi.fin_data[1] = np->nl_outport;
2272
2273	/*
2274	 * If nl_inip is non null, this is a lookup based on the real
2275	 * ip address. Else, we use the fake.
2276	 */
2277	if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2278				 np->nl_outip, 0))) {
2279		np->nl_realip = nat->nat_outip;
2280		np->nl_realport = nat->nat_outport;
2281	}
2282	return nat;
2283}
2284
2285
2286static int nat_match(fin, np, ip)
2287fr_info_t *fin;
2288ipnat_t *np;
2289ip_t *ip;
2290{
2291	frtuc_t *ft;
2292
2293	if (ip->ip_v != 4)
2294		return 0;
2295
2296	if (np->in_p && fin->fin_p != np->in_p)
2297		return 0;
2298	if (fin->fin_out) {
2299		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2300			return 0;
2301		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2302		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
2303			return 0;
2304		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2305		    ^ ((np->in_flags & IPN_NOTDST) != 0))
2306			return 0;
2307	} else {
2308		if (!(np->in_redir & NAT_REDIRECT))
2309			return 0;
2310		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2311		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
2312			return 0;
2313		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2314		    ^ ((np->in_flags & IPN_NOTDST) != 0))
2315			return 0;
2316	}
2317
2318	ft = &np->in_tuc;
2319	if (!(fin->fin_fl & FI_TCPUDP) ||
2320	    (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2321		if (ft->ftu_scmp || ft->ftu_dcmp)
2322			return 0;
2323		return 1;
2324	}
2325
2326	return fr_tcpudpchk(ft, fin);
2327}
2328
2329
2330/*
2331 * Packets going out on the external interface go through this.
2332 * Here, the source address requires alteration, if anything.
2333 */
2334int ip_natout(ip, fin)
2335ip_t *ip;
2336fr_info_t *fin;
2337{
2338	register ipnat_t *np = NULL;
2339	register u_32_t ipa;
2340	tcphdr_t *tcp = NULL;
2341	u_short sport = 0, dport = 0, *csump = NULL;
2342	int natadd = 1, i, icmpset = 1;
2343	u_int nflags = 0, hv, msk;
2344	struct ifnet *ifp;
2345	frentry_t *fr;
2346	void *sifp;
2347	u_32_t iph;
2348	nat_t *nat;
2349
2350	if (nat_list == NULL || (fr_nat_lock))
2351		return 0;
2352
2353	if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2354	    fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2355		sifp = fin->fin_ifp;
2356		fin->fin_ifp = fr->fr_tif.fd_ifp;
2357	} else
2358		sifp = fin->fin_ifp;
2359	ifp = fin->fin_ifp;
2360
2361	if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2362		if (fin->fin_p == IPPROTO_TCP)
2363			nflags = IPN_TCP;
2364		else if (fin->fin_p == IPPROTO_UDP)
2365			nflags = IPN_UDP;
2366		if ((nflags & IPN_TCPUDP)) {
2367			tcp = (tcphdr_t *)fin->fin_dp;
2368			sport = tcp->th_sport;
2369			dport = tcp->th_dport;
2370		}
2371	}
2372
2373	ipa = fin->fin_saddr;
2374
2375	READ_ENTER(&ipf_nat);
2376
2377	if ((fin->fin_p == IPPROTO_ICMP) &&
2378	    (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2379		icmpset = 1;
2380	else if ((fin->fin_fl & FI_FRAG) &&
2381	    (nat = ipfr_nat_knownfrag(ip, fin)))
2382		natadd = 0;
2383	else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2384				      (u_int)fin->fin_p, fin->fin_src,
2385				      fin->fin_dst, 0))) {
2386		nflags = nat->nat_flags;
2387		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2388			if ((nflags & FI_W_SPORT) &&
2389			    (nat->nat_inport != sport))
2390				nat->nat_inport = sport;
2391			if ((nflags & FI_W_DPORT) &&
2392			    (nat->nat_oport != dport))
2393				nat->nat_oport = dport;
2394
2395			if (nat->nat_outport == 0)
2396				nat->nat_outport = sport;
2397			nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2398			nflags = nat->nat_flags;
2399			nat_stats.ns_wilds--;
2400		}
2401	} else {
2402		RWLOCK_EXIT(&ipf_nat);
2403
2404		msk = 0xffffffff;
2405		i = 32;
2406
2407		WRITE_ENTER(&ipf_nat);
2408		/*
2409		 * If there is no current entry in the nat table for this IP#,
2410		 * create one for it (if there is a matching rule).
2411		 */
2412maskloop:
2413		iph = ipa & htonl(msk);
2414		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2415		for (np = nat_rules[hv]; np; np = np->in_mnext)
2416		{
2417			if (np->in_ifp && (np->in_ifp != ifp))
2418				continue;
2419			if ((np->in_flags & IPN_RF) &&
2420			    !(np->in_flags & nflags))
2421				continue;
2422			if (np->in_flags & IPN_FILTER) {
2423				if (!nat_match(fin, np, ip))
2424					continue;
2425			} else if ((ipa & np->in_inmsk) != np->in_inip)
2426				continue;
2427			if (*np->in_plabel && !appr_ok(ip, tcp, np))
2428				continue;
2429			nat = nat_new(fin, ip, np, NULL,
2430				      (u_int)nflags, NAT_OUTBOUND);
2431			if (nat != NULL) {
2432				np->in_hits++;
2433				break;
2434			}
2435		}
2436		if ((np == NULL) && (i > 0)) {
2437			do {
2438				i--;
2439				msk <<= 1;
2440			} while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2441			if (i >= 0)
2442				goto maskloop;
2443		}
2444		MUTEX_DOWNGRADE(&ipf_nat);
2445	}
2446
2447	/*
2448	 * NOTE: ipf_nat must now only be held as a read lock
2449	 */
2450	if (nat) {
2451		np = nat->nat_ptr;
2452		if (natadd && (fin->fin_fl & FI_FRAG) && np)
2453			ipfr_nat_newfrag(ip, fin, 0, nat);
2454		MUTEX_ENTER(&nat->nat_lock);
2455		if (fin->fin_p != IPPROTO_TCP) {
2456			if (np && np->in_age[1])
2457				nat->nat_age = np->in_age[1];
2458			else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2459				nat->nat_age = fr_defnaticmpage;
2460			else
2461				nat->nat_age = fr_defnatage;
2462		}
2463		nat->nat_bytes += ip->ip_len;
2464		nat->nat_pkts++;
2465		MUTEX_EXIT(&nat->nat_lock);
2466
2467		/*
2468		 * Fix up checksums, not by recalculating them, but
2469		 * simply computing adjustments.
2470		 */
2471		if (nflags == IPN_ICMPERR) {
2472			u_32_t s1, s2, sumd;
2473
2474			s1 = LONG_SUM(ntohl(fin->fin_saddr));
2475			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2476			CALC_SUMD(s1, s2, sumd);
2477
2478			if (nat->nat_dir == NAT_OUTBOUND)
2479				fix_outcksum(fin, &ip->ip_sum, sumd);
2480			else
2481				fix_incksum(fin, &ip->ip_sum, sumd);
2482		}
2483#if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2484		else {
2485			if (nat->nat_dir == NAT_OUTBOUND)
2486				fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2487			else
2488				fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2489		}
2490#endif
2491		/*
2492		 * Only change the packet contents, not what is filtered upon.
2493		 */
2494		ip->ip_src = nat->nat_outip;
2495
2496		if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2497
2498			if ((nat->nat_outport != 0) && (tcp != NULL)) {
2499				tcp->th_sport = nat->nat_outport;
2500				fin->fin_data[0] = ntohs(tcp->th_sport);
2501			}
2502
2503			if (fin->fin_p == IPPROTO_TCP) {
2504				csump = &tcp->th_sum;
2505				MUTEX_ENTER(&nat->nat_lock);
2506				fr_tcp_age(&nat->nat_age,
2507					   nat->nat_tcpstate, fin, 1, 0);
2508				if (nat->nat_age < fr_defnaticmpage)
2509					nat->nat_age = fr_defnaticmpage;
2510#ifdef LARGE_NAT
2511				else if (nat->nat_age > fr_defnatage)
2512					nat->nat_age = fr_defnatage;
2513#endif
2514				/*
2515				 * Increase this because we may have
2516				 * "keep state" following this too and
2517				 * packet storms can occur if this is
2518				 * removed too quickly.
2519				 */
2520				if (nat->nat_age == fr_tcpclosed)
2521					nat->nat_age = fr_tcplastack;
2522				MUTEX_EXIT(&nat->nat_lock);
2523			} else if (fin->fin_p == IPPROTO_UDP) {
2524				udphdr_t *udp = (udphdr_t *)tcp;
2525
2526				if (udp->uh_sum)
2527					csump = &udp->uh_sum;
2528			}
2529
2530			if (csump) {
2531				if (nat->nat_dir == NAT_OUTBOUND)
2532					fix_outcksum(fin, csump,
2533						     nat->nat_sumd[1]);
2534				else
2535					fix_incksum(fin, csump,
2536						    nat->nat_sumd[1]);
2537			}
2538		}
2539
2540		if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2541		     (tcp != NULL && dport == np->in_dport))) {
2542			i = appr_check(ip, fin, nat);
2543			if (i == 0)
2544				i = 1;
2545		} else
2546			i = 1;
2547		ATOMIC_INCL(nat_stats.ns_mapped[1]);
2548		RWLOCK_EXIT(&ipf_nat);	/* READ */
2549		fin->fin_ifp = sifp;
2550		return i;
2551	}
2552	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
2553	fin->fin_ifp = sifp;
2554	return 0;
2555}
2556
2557
2558/*
2559 * Packets coming in from the external interface go through this.
2560 * Here, the destination address requires alteration, if anything.
2561 */
2562int ip_natin(ip, fin)
2563ip_t *ip;
2564fr_info_t *fin;
2565{
2566	register struct in_addr src;
2567	register struct in_addr in;
2568	register ipnat_t *np;
2569	u_short sport = 0, dport = 0, *csump = NULL;
2570	u_int nflags = 0, natadd = 1, hv, msk;
2571	struct ifnet *ifp = fin->fin_ifp;
2572	tcphdr_t *tcp = NULL;
2573	int i, icmpset = 0;
2574	nat_t *nat;
2575	u_32_t iph;
2576
2577	if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2578		return 0;
2579
2580	if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2581		if (fin->fin_p == IPPROTO_TCP)
2582			nflags = IPN_TCP;
2583		else if (fin->fin_p == IPPROTO_UDP)
2584			nflags = IPN_UDP;
2585		if ((nflags & IPN_TCPUDP)) {
2586			tcp = (tcphdr_t *)fin->fin_dp;
2587			sport = tcp->th_sport;
2588			dport = tcp->th_dport;
2589		}
2590	}
2591
2592	in = fin->fin_dst;
2593	/* make sure the source address is to be redirected */
2594	src = fin->fin_src;
2595
2596	READ_ENTER(&ipf_nat);
2597
2598	if ((fin->fin_p == IPPROTO_ICMP) &&
2599	    (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2600		icmpset = 1;
2601	else if ((fin->fin_fl & FI_FRAG) &&
2602		 (nat = ipfr_nat_knownfrag(ip, fin)))
2603		natadd = 0;
2604	else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2605				     (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2606		nflags = nat->nat_flags;
2607		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2608			if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2609				nat->nat_oport = sport;
2610			if ((nat->nat_outport != dport) &&
2611				 (nflags & FI_W_SPORT))
2612				nat->nat_outport = dport;
2613			nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2614			nflags = nat->nat_flags;
2615			nat_stats.ns_wilds--;
2616		}
2617	} else {
2618		RWLOCK_EXIT(&ipf_nat);
2619
2620		msk = 0xffffffff;
2621		i = 32;
2622
2623		WRITE_ENTER(&ipf_nat);
2624		/*
2625		 * If there is no current entry in the nat table for this IP#,
2626		 * create one for it (if there is a matching rule).
2627		 */
2628maskloop:
2629		iph = in.s_addr & htonl(msk);
2630		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2631		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2632			if ((np->in_ifp && (np->in_ifp != ifp)) ||
2633			    (np->in_p && (np->in_p != fin->fin_p)) ||
2634			    (np->in_flags && !(nflags & np->in_flags)))
2635				continue;
2636			if (np->in_flags & IPN_FILTER) {
2637				if (!nat_match(fin, np, ip))
2638					continue;
2639			} else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2640				continue;
2641			if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2642			     ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2643			      (ntohs(dport) >= ntohs(np->in_pmin)))))
2644				if ((nat = nat_new(fin, ip, np, NULL, nflags,
2645						    NAT_INBOUND))) {
2646					np->in_hits++;
2647					break;
2648				}
2649		}
2650
2651		if ((np == NULL) && (i > 0)) {
2652			do {
2653				i--;
2654				msk <<= 1;
2655			} while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2656			if (i >= 0)
2657				goto maskloop;
2658		}
2659		MUTEX_DOWNGRADE(&ipf_nat);
2660	}
2661
2662	/*
2663	 * NOTE: ipf_nat must now only be held as a read lock
2664	 */
2665	if (nat) {
2666		np = nat->nat_ptr;
2667		fin->fin_fr = nat->nat_fr;
2668		if (natadd && (fin->fin_fl & FI_FRAG) && np)
2669			ipfr_nat_newfrag(ip, fin, 0, nat);
2670		if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2671		     (tcp != NULL && sport == np->in_dport))) {
2672			i = appr_check(ip, fin, nat);
2673			if (i == -1) {
2674				RWLOCK_EXIT(&ipf_nat);
2675				return i;
2676			}
2677		}
2678
2679		MUTEX_ENTER(&nat->nat_lock);
2680		if (fin->fin_p != IPPROTO_TCP) {
2681			if (np && np->in_age[0])
2682				nat->nat_age = np->in_age[0];
2683			else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2684				nat->nat_age = fr_defnaticmpage;
2685			else
2686				nat->nat_age = fr_defnatage;
2687		}
2688		nat->nat_bytes += ip->ip_len;
2689		nat->nat_pkts++;
2690		MUTEX_EXIT(&nat->nat_lock);
2691		ip->ip_dst = nat->nat_inip;
2692		fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2693
2694		/*
2695		 * Fix up checksums, not by recalculating them, but
2696		 * simply computing adjustments.
2697		 */
2698#if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2699		if (nat->nat_dir == NAT_OUTBOUND)
2700			fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2701		else
2702			fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2703#endif
2704		if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2705
2706			if ((nat->nat_inport != 0) && (tcp != NULL)) {
2707				tcp->th_dport = nat->nat_inport;
2708				fin->fin_data[1] = ntohs(tcp->th_dport);
2709			}
2710
2711			if (fin->fin_p == IPPROTO_TCP) {
2712				csump = &tcp->th_sum;
2713				MUTEX_ENTER(&nat->nat_lock);
2714				fr_tcp_age(&nat->nat_age,
2715					   nat->nat_tcpstate, fin, 0, 0);
2716				if (nat->nat_age < fr_defnaticmpage)
2717					nat->nat_age = fr_defnaticmpage;
2718#ifdef LARGE_NAT
2719				else if (nat->nat_age > fr_defnatage)
2720					nat->nat_age = fr_defnatage;
2721#endif
2722				/*
2723				 * Increase this because we may have
2724				 * "keep state" following this too and
2725				 * packet storms can occur if this is
2726				 * removed too quickly.
2727				 */
2728				if (nat->nat_age == fr_tcpclosed)
2729					nat->nat_age = fr_tcplastack;
2730				MUTEX_EXIT(&nat->nat_lock);
2731			} else if (fin->fin_p == IPPROTO_UDP) {
2732				udphdr_t *udp = (udphdr_t *)tcp;
2733
2734				if (udp->uh_sum)
2735					csump = &udp->uh_sum;
2736			}
2737
2738			if (csump) {
2739				if (nat->nat_dir == NAT_OUTBOUND)
2740					fix_incksum(fin, csump,
2741						    nat->nat_sumd[0]);
2742				else
2743					fix_outcksum(fin, csump,
2744						    nat->nat_sumd[0]);
2745			}
2746		}
2747		ATOMIC_INCL(nat_stats.ns_mapped[0]);
2748		RWLOCK_EXIT(&ipf_nat);			/* READ */
2749		return 1;
2750	}
2751	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
2752	return 0;
2753}
2754
2755
2756/*
2757 * Free all memory used by NAT structures allocated at runtime.
2758 */
2759void ip_natunload()
2760{
2761	WRITE_ENTER(&ipf_nat);
2762	(void) nat_clearlist();
2763	(void) nat_flushtable();
2764	RWLOCK_EXIT(&ipf_nat);
2765
2766	if (nat_table[0] != NULL) {
2767		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2768		nat_table[0] = NULL;
2769	}
2770	if (nat_table[1] != NULL) {
2771		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2772		nat_table[1] = NULL;
2773	}
2774	if (nat_rules != NULL) {
2775		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2776		nat_rules = NULL;
2777	}
2778	if (rdr_rules != NULL) {
2779		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2780		rdr_rules = NULL;
2781	}
2782	if (maptable != NULL) {
2783		KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2784		maptable = NULL;
2785	}
2786}
2787
2788
2789/*
2790 * Slowly expire held state for NAT entries.  Timeouts are set in
2791 * expectation of this being called twice per second.
2792 */
2793void ip_natexpire()
2794{
2795	register struct nat *nat, **natp;
2796#if defined(_KERNEL) && !SOLARIS
2797	int s;
2798#endif
2799
2800	SPL_NET(s);
2801	WRITE_ENTER(&ipf_nat);
2802	for (natp = &nat_instances; (nat = *natp); ) {
2803		nat->nat_age--;
2804		if (nat->nat_age) {
2805			natp = &nat->nat_next;
2806			continue;
2807		}
2808		*natp = nat->nat_next;
2809#ifdef	IPFILTER_LOG
2810		nat_log(nat, NL_EXPIRE);
2811#endif
2812		nat_delete(nat);
2813		nat_stats.ns_expire++;
2814	}
2815	RWLOCK_EXIT(&ipf_nat);
2816	SPL_X(s);
2817}
2818
2819
2820/*
2821 */
2822void ip_natsync(ifp)
2823void *ifp;
2824{
2825	register ipnat_t *n;
2826	register nat_t *nat;
2827	register u_32_t sum1, sum2, sumd;
2828	struct in_addr in;
2829	ipnat_t *np;
2830	void *ifp2;
2831#if defined(_KERNEL) && !SOLARIS
2832	int s;
2833#endif
2834
2835	/*
2836	 * Change IP addresses for NAT sessions for any protocol except TCP
2837	 * since it will break the TCP connection anyway.
2838	 */
2839	SPL_NET(s);
2840	WRITE_ENTER(&ipf_nat);
2841	for (nat = nat_instances; nat; nat = nat->nat_next)
2842		if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2843		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2844		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2845			ifp2 = nat->nat_ifp;
2846			/*
2847			 * Change the map-to address to be the same as the
2848			 * new one.
2849			 */
2850			sum1 = nat->nat_outip.s_addr;
2851			if (fr_ifpaddr(4, ifp2, &in) != -1)
2852				nat->nat_outip = in;
2853			sum2 = nat->nat_outip.s_addr;
2854
2855			if (sum1 == sum2)
2856				continue;
2857			/*
2858			 * Readjust the checksum adjustment to take into
2859			 * account the new IP#.
2860			 */
2861			CALC_SUMD(sum1, sum2, sumd);
2862			/* XXX - dont change for TCP when solaris does
2863			 * hardware checksumming.
2864			 */
2865			sumd += nat->nat_sumd[0];
2866			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2867			nat->nat_sumd[1] = nat->nat_sumd[0];
2868		}
2869
2870	for (n = nat_list; (n != NULL); n = n->in_next)
2871		if (n->in_ifp == ifp) {
2872			n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2873			if (!n->in_ifp)
2874				n->in_ifp = (void *)-1;
2875		}
2876	RWLOCK_EXIT(&ipf_nat);
2877	SPL_X(s);
2878}
2879
2880
2881#ifdef	IPFILTER_LOG
2882void nat_log(nat, type)
2883struct nat *nat;
2884u_int type;
2885{
2886	struct ipnat *np;
2887	struct natlog natl;
2888	void *items[1];
2889	size_t sizes[1];
2890	int rulen, types[1];
2891
2892	natl.nl_inip = nat->nat_inip;
2893	natl.nl_outip = nat->nat_outip;
2894	natl.nl_origip = nat->nat_oip;
2895	natl.nl_bytes = nat->nat_bytes;
2896	natl.nl_pkts = nat->nat_pkts;
2897	natl.nl_origport = nat->nat_oport;
2898	natl.nl_inport = nat->nat_inport;
2899	natl.nl_outport = nat->nat_outport;
2900	natl.nl_p = nat->nat_p;
2901	natl.nl_type = type;
2902	natl.nl_rule = -1;
2903#ifndef LARGE_NAT
2904	if (nat->nat_ptr != NULL) {
2905		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2906			if (np == nat->nat_ptr) {
2907				natl.nl_rule = rulen;
2908				break;
2909			}
2910	}
2911#endif
2912	items[0] = &natl;
2913	sizes[0] = sizeof(natl);
2914	types[0] = 0;
2915
2916	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2917}
2918#endif
2919
2920
2921#if defined(__OpenBSD__)
2922void nat_ifdetach(ifp)
2923void *ifp;
2924{
2925	frsync();
2926	return;
2927}
2928#endif
2929