ip_nat.c revision 130886
1/*
2 * Copyright (C) 1995-2001 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7 */
8
9#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
10#define _KERNEL
11#endif
12
13#if defined(__sgi) && (IRIX > 602)
14# include <sys/ptimers.h>
15#endif
16#include <sys/errno.h>
17#include <sys/types.h>
18#include <sys/param.h>
19#include <sys/time.h>
20#include <sys/file.h>
21#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22    defined(_KERNEL)
23# include "opt_ipfilter_log.h"
24#endif
25#if !defined(_KERNEL) && !defined(KERNEL)
26# include <stdio.h>
27# include <string.h>
28# include <stdlib.h>
29#endif
30#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
31# include <sys/filio.h>
32# include <sys/fcntl.h>
33#else
34# include <sys/ioctl.h>
35#endif
36#include <sys/fcntl.h>
37#ifndef linux
38# include <sys/protosw.h>
39#endif
40#include <sys/socket.h>
41#if defined(_KERNEL) && !defined(linux)
42# include <sys/systm.h>
43#endif
44#if !defined(__SVR4) && !defined(__svr4__)
45# ifndef linux
46#  include <sys/mbuf.h>
47# endif
48#else
49# include <sys/filio.h>
50# include <sys/byteorder.h>
51# ifdef _KERNEL
52#  include <sys/dditypes.h>
53# endif
54# include <sys/stream.h>
55# include <sys/kmem.h>
56#endif
57#if __FreeBSD_version >= 300000
58# include <sys/queue.h>
59#endif
60#include <net/if.h>
61#if __FreeBSD_version >= 300000
62# include <net/if_var.h>
63# if defined(_KERNEL) && !defined(IPFILTER_LKM)
64#  include "opt_ipfilter.h"
65# endif
66#endif
67#ifdef sun
68# include <net/af.h>
69#endif
70#include <net/route.h>
71#include <netinet/in.h>
72#include <netinet/in_systm.h>
73#include <netinet/ip.h>
74
75#ifdef __sgi
76# ifdef IFF_DRVRLOCK /* IRIX6 */
77#include <sys/hashing.h>
78#include <netinet/in_var.h>
79# endif
80#endif
81
82#ifdef RFC1825
83# include <vpn/md5.h>
84# include <vpn/ipsec.h>
85extern struct ifnet vpnif;
86#endif
87
88#ifndef linux
89# include <netinet/ip_var.h>
90# include <netinet/tcp_fsm.h>
91#endif
92#include <netinet/tcp.h>
93#include <netinet/udp.h>
94#include <netinet/ip_icmp.h>
95#include "netinet/ip_compat.h"
96#include <netinet/tcpip.h>
97#include "netinet/ip_fil.h"
98#include "netinet/ip_nat.h"
99#include "netinet/ip_frag.h"
100#include "netinet/ip_state.h"
101#include "netinet/ip_proxy.h"
102#if (__FreeBSD_version >= 300000)
103# include <sys/malloc.h>
104#endif
105#ifndef	MIN
106# define	MIN(a,b)	(((a)<(b))?(a):(b))
107#endif
108#undef	SOCKADDR_IN
109#define	SOCKADDR_IN	struct sockaddr_in
110
111#if !defined(lint)
112static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
113/* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.44 2001/07/21 07:17:22 darrenr Exp $"; */
114static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 130886 2004-06-21 22:46:36Z darrenr $";
115#endif
116
117nat_t	**nat_table[2] = { NULL, NULL },
118	*nat_instances = NULL;
119ipnat_t	*nat_list = NULL;
120u_int	ipf_nattable_max = NAT_TABLE_MAX;
121u_int	ipf_nattable_sz = NAT_TABLE_SZ;
122u_int	ipf_natrules_sz = NAT_SIZE;
123u_int	ipf_rdrrules_sz = RDR_SIZE;
124u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
125u_32_t	nat_masks = 0;
126u_32_t	rdr_masks = 0;
127ipnat_t	**nat_rules = NULL;
128ipnat_t	**rdr_rules = NULL;
129hostmap_t	**maptable  = NULL;
130
131u_long	fr_defnatage = DEF_NAT_AGE,
132	fr_defnaticmpage = 6;		/* 3 seconds */
133natstat_t nat_stats;
134int	fr_nat_lock = 0;
135#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
136extern	kmutex_t	ipf_rw;
137extern	KRWLOCK_T	ipf_nat;
138#endif
139
140static	int	nat_flushtable __P((void));
141static	void	nat_addnat __P((struct ipnat *));
142static	void	nat_addrdr __P((struct ipnat *));
143static	void	nat_delete __P((struct nat *));
144static	void	nat_delrdr __P((struct ipnat *));
145static	void	nat_delnat __P((struct ipnat *));
146static	int	fr_natgetent __P((caddr_t));
147static	int	fr_natgetsz __P((caddr_t));
148static	int	fr_natputent __P((caddr_t));
149static	void	nat_tabmove __P((fr_info_t *, nat_t *));
150static	int	nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
151static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
152				    struct in_addr));
153static	void	nat_hostmapdel __P((struct hostmap *));
154static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
155
156
157int nat_init()
158{
159	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
160	if (nat_table[0] != NULL)
161		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
162	else
163		return -1;
164
165	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
166	if (nat_table[1] != NULL)
167		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
168	else
169		return -1;
170
171	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
172	if (nat_rules != NULL)
173		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
174	else
175		return -1;
176
177	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
178	if (rdr_rules != NULL)
179		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
180	else
181		return -1;
182
183	KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
184	if (maptable != NULL)
185		bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
186	else
187		return -1;
188	return 0;
189}
190
191
192static void nat_addrdr(n)
193ipnat_t *n;
194{
195	ipnat_t **np;
196	u_32_t j;
197	u_int hv;
198	int k;
199
200	k = countbits(n->in_outmsk);
201	if ((k >= 0) && (k != 32))
202		rdr_masks |= 1 << k;
203	j = (n->in_outip & n->in_outmsk);
204	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
205	np = rdr_rules + hv;
206	while (*np != NULL)
207		np = &(*np)->in_rnext;
208	n->in_rnext = NULL;
209	n->in_prnext = np;
210	*np = n;
211}
212
213
214static void nat_addnat(n)
215ipnat_t *n;
216{
217	ipnat_t **np;
218	u_32_t j;
219	u_int hv;
220	int k;
221
222	k = countbits(n->in_inmsk);
223	if ((k >= 0) && (k != 32))
224		nat_masks |= 1 << k;
225	j = (n->in_inip & n->in_inmsk);
226	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
227	np = nat_rules + hv;
228	while (*np != NULL)
229		np = &(*np)->in_mnext;
230	n->in_mnext = NULL;
231	n->in_pmnext = np;
232	*np = n;
233}
234
235
236static void nat_delrdr(n)
237ipnat_t *n;
238{
239	if (n->in_rnext)
240		n->in_rnext->in_prnext = n->in_prnext;
241	*n->in_prnext = n->in_rnext;
242}
243
244
245static void nat_delnat(n)
246ipnat_t *n;
247{
248	if (n->in_mnext)
249		n->in_mnext->in_pmnext = n->in_pmnext;
250	*n->in_pmnext = n->in_mnext;
251}
252
253
254/*
255 * check if an ip address has already been allocated for a given mapping that
256 * is not doing port based translation.
257 *
258 * Must be called with ipf_nat held as a write lock.
259 */
260static struct hostmap *nat_hostmap(np, real, map)
261ipnat_t *np;
262struct in_addr real;
263struct in_addr map;
264{
265	hostmap_t *hm;
266	u_int hv;
267
268	hv = real.s_addr % HOSTMAP_SIZE;
269	for (hm = maptable[hv]; hm; hm = hm->hm_next)
270		if ((hm->hm_realip.s_addr == real.s_addr) &&
271		    (np == hm->hm_ipnat)) {
272			hm->hm_ref++;
273			return hm;
274		}
275
276	KMALLOC(hm, hostmap_t *);
277	if (hm) {
278		hm->hm_next = maptable[hv];
279		hm->hm_pnext = maptable + hv;
280		if (maptable[hv])
281			maptable[hv]->hm_pnext = &hm->hm_next;
282		maptable[hv] = hm;
283		hm->hm_ipnat = np;
284		hm->hm_realip = real;
285		hm->hm_mapip = map;
286		hm->hm_ref = 1;
287	}
288	return hm;
289}
290
291
292/*
293 * Must be called with ipf_nat held as a write lock.
294 */
295static void nat_hostmapdel(hm)
296struct hostmap *hm;
297{
298	ATOMIC_DEC32(hm->hm_ref);
299	if (hm->hm_ref == 0) {
300		if (hm->hm_next)
301			hm->hm_next->hm_pnext = hm->hm_pnext;
302		*hm->hm_pnext = hm->hm_next;
303		KFREE(hm);
304	}
305}
306
307
308void fix_outcksum(fin, sp, n)
309fr_info_t *fin;
310u_short *sp;
311u_32_t n;
312{
313	register u_short sumshort;
314	register u_32_t sum1;
315
316	if (!n)
317		return;
318	else if (n & NAT_HW_CKSUM) {
319		n &= 0xffff;
320		n += fin->fin_dlen;
321		n = (n & 0xffff) + (n >> 16);
322		*sp = n & 0xffff;
323		return;
324	}
325	sum1 = (~ntohs(*sp)) & 0xffff;
326	sum1 += (n);
327	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
328	/* Again */
329	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
330	sumshort = ~(u_short)sum1;
331	*(sp) = htons(sumshort);
332}
333
334
335void fix_incksum(fin, sp, n)
336fr_info_t *fin;
337u_short *sp;
338u_32_t n;
339{
340	register u_short sumshort;
341	register u_32_t sum1;
342
343	if (!n)
344		return;
345	else if (n & NAT_HW_CKSUM) {
346		n &= 0xffff;
347		n += fin->fin_dlen;
348		n = (n & 0xffff) + (n >> 16);
349		*sp = n & 0xffff;
350		return;
351	}
352#ifdef sparc
353	sum1 = (~(*sp)) & 0xffff;
354#else
355	sum1 = (~ntohs(*sp)) & 0xffff;
356#endif
357	sum1 += ~(n) & 0xffff;
358	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
359	/* Again */
360	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
361	sumshort = ~(u_short)sum1;
362	*(sp) = htons(sumshort);
363}
364
365
366/*
367 * fix_datacksum is used *only* for the adjustments of checksums in the data
368 * section of an IP packet.
369 *
370 * The only situation in which you need to do this is when NAT'ing an
371 * ICMP error message. Such a message, contains in its body the IP header
372 * of the original IP packet, that causes the error.
373 *
374 * You can't use fix_incksum or fix_outcksum in that case, because for the
375 * kernel the data section of the ICMP error is just data, and no special
376 * processing like hardware cksum or ntohs processing have been done by the
377 * kernel on the data section.
378 */
379void fix_datacksum(sp, n)
380u_short *sp;
381u_32_t n;
382{
383	register u_short sumshort;
384	register u_32_t sum1;
385
386	if (!n)
387		return;
388
389	sum1 = (~ntohs(*sp)) & 0xffff;
390	sum1 += (n);
391	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
392	/* Again */
393	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
394	sumshort = ~(u_short)sum1;
395	*(sp) = htons(sumshort);
396}
397
398/*
399 * How the NAT is organised and works.
400 *
401 * Inside (interface y) NAT       Outside (interface x)
402 * -------------------- -+- -------------------------------------
403 * Packet going          |   out, processsed by ip_natout() for x
404 * ------------>         |   ------------>
405 * src=10.1.1.1          |   src=192.1.1.1
406 *                       |
407 *                       |   in, processed by ip_natin() for x
408 * <------------         |   <------------
409 * dst=10.1.1.1          |   dst=192.1.1.1
410 * -------------------- -+- -------------------------------------
411 * ip_natout() - changes ip_src and if required, sport
412 *             - creates a new mapping, if required.
413 * ip_natin()  - changes ip_dst and if required, dport
414 *
415 * In the NAT table, internal source is recorded as "in" and externally
416 * seen as "out".
417 */
418
419/*
420 * Handle ioctls which manipulate the NAT.
421 */
422int nat_ioctl(data, cmd, mode)
423#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
424u_long cmd;
425#else
426int cmd;
427#endif
428caddr_t data;
429int mode;
430{
431	register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
432	int error = 0, ret, arg, getlock;
433	ipnat_t natd;
434	u_32_t i, j;
435
436#if (BSD >= 199306) && defined(_KERNEL)
437	if ((securelevel >= 3) && (mode & FWRITE))
438		return EPERM;
439#endif
440
441	nat = NULL;     /* XXX gcc -Wuninitialized */
442	KMALLOC(nt, ipnat_t *);
443	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
444	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
445		if (mode & NAT_SYSSPACE) {
446			bcopy(data, (char *)&natd, sizeof(natd));
447			error = 0;
448		} else {
449			error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
450		}
451	} else if (cmd == SIOCIPFFL) {	/* SIOCFLNAT & SIOCCNATL */
452		error = IRCOPY(data, (char *)&arg, sizeof(arg));
453		if (error)
454			error = EFAULT;
455	}
456
457	if (error)
458		goto done;
459
460	/*
461	 * For add/delete, look to see if the NAT entry is already present
462	 */
463	if (getlock == 1) {
464		WRITE_ENTER(&ipf_nat);
465	}
466	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
467		nat = &natd;
468		nat->in_flags &= IPN_USERFLAGS;
469		if ((nat->in_redir & NAT_MAPBLK) == 0) {
470			if ((nat->in_flags & IPN_SPLIT) == 0)
471				nat->in_inip &= nat->in_inmsk;
472			if ((nat->in_flags & IPN_IPRANGE) == 0)
473				nat->in_outip &= nat->in_outmsk;
474		}
475		for (np = &nat_list; (n = *np); np = &n->in_next)
476			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
477					IPN_CMPSIZ)) {
478				if (n->in_redir == NAT_REDIRECT &&
479				    n->in_pnext != nat->in_pnext)
480					continue;
481				break;
482			}
483	}
484
485	switch (cmd)
486	{
487#ifdef  IPFILTER_LOG
488	case SIOCIPFFB :
489	{
490		int tmp;
491
492		if (!(mode & FWRITE))
493			error = EPERM;
494		else {
495			tmp = ipflog_clear(IPL_LOGNAT);
496			IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
497		}
498		break;
499	}
500#endif
501	case SIOCADNAT :
502		if (!(mode & FWRITE)) {
503			error = EPERM;
504			break;
505		}
506		if (n) {
507			error = EEXIST;
508			break;
509		}
510		if (nt == NULL) {
511			error = ENOMEM;
512			break;
513		}
514		n = nt;
515		nt = NULL;
516		bcopy((char *)nat, (char *)n, sizeof(*n));
517		n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
518		if (!n->in_ifp)
519			n->in_ifp = (void *)-1;
520		if (n->in_plabel[0] != '\0') {
521			n->in_apr = appr_lookup(n->in_p, n->in_plabel);
522			if (!n->in_apr) {
523				error = ENOENT;
524				break;
525			}
526		}
527		n->in_next = NULL;
528		*np = n;
529
530		if (n->in_redir & NAT_REDIRECT) {
531			n->in_flags &= ~IPN_NOTDST;
532			nat_addrdr(n);
533		}
534		if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
535			n->in_flags &= ~IPN_NOTSRC;
536			nat_addnat(n);
537		}
538
539		n->in_use = 0;
540		if (n->in_redir & NAT_MAPBLK)
541			n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
542		else if (n->in_flags & IPN_AUTOPORTMAP)
543			n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
544		else if (n->in_flags & IPN_IPRANGE)
545			n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
546		else if (n->in_flags & IPN_SPLIT)
547			n->in_space = 2;
548		else
549			n->in_space = ~ntohl(n->in_outmsk);
550		/*
551		 * Calculate the number of valid IP addresses in the output
552		 * mapping range.  In all cases, the range is inclusive of
553		 * the start and ending IP addresses.
554		 * If to a CIDR address, lose 2: broadcast + network address
555		 *			         (so subtract 1)
556		 * If to a range, add one.
557		 * If to a single IP address, set to 1.
558		 */
559		if (n->in_space) {
560			if ((n->in_flags & IPN_IPRANGE) != 0)
561				n->in_space += 1;
562			else
563				n->in_space -= 1;
564		} else
565			n->in_space = 1;
566		if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
567		    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
568			n->in_nip = ntohl(n->in_outip) + 1;
569		else if ((n->in_flags & IPN_SPLIT) &&
570			 (n->in_redir & NAT_REDIRECT))
571			n->in_nip = ntohl(n->in_inip);
572		else
573			n->in_nip = ntohl(n->in_outip);
574		if (n->in_redir & NAT_MAP) {
575			n->in_pnext = ntohs(n->in_pmin);
576			/*
577			 * Multiply by the number of ports made available.
578			 */
579			if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
580				n->in_space *= (ntohs(n->in_pmax) -
581						ntohs(n->in_pmin) + 1);
582				/*
583				 * Because two different sources can map to
584				 * different destinations but use the same
585				 * local IP#/port #.
586				 * If the result is smaller than in_space, then
587				 * we may have wrapped around 32bits.
588				 */
589				i = n->in_inmsk;
590				if ((i != 0) && (i != 0xffffffff)) {
591					j = n->in_space * (~ntohl(i) + 1);
592					if (j >= n->in_space)
593						n->in_space = j;
594					else
595						n->in_space = 0xffffffff;
596				}
597			}
598			/*
599			 * If no protocol is specified, multiple by 256.
600			 */
601			if ((n->in_flags & IPN_TCPUDP) == 0) {
602					j = n->in_space * 256;
603					if (j >= n->in_space)
604						n->in_space = j;
605					else
606						n->in_space = 0xffffffff;
607			}
608		}
609		/* Otherwise, these fields are preset */
610		n = NULL;
611		nat_stats.ns_rules++;
612		break;
613	case SIOCRMNAT :
614		if (!(mode & FWRITE)) {
615			error = EPERM;
616			n = NULL;
617			break;
618		}
619		if (!n) {
620			error = ESRCH;
621			break;
622		}
623		if (n->in_redir & NAT_REDIRECT)
624			nat_delrdr(n);
625		if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
626			nat_delnat(n);
627		if (nat_list == NULL) {
628			nat_masks = 0;
629			rdr_masks = 0;
630		}
631		*np = n->in_next;
632		if (!n->in_use) {
633			if (n->in_apr)
634				appr_free(n->in_apr);
635			KFREE(n);
636			nat_stats.ns_rules--;
637		} else {
638			n->in_flags |= IPN_DELETE;
639			n->in_next = NULL;
640		}
641		n = NULL;
642		break;
643	case SIOCGNATS :
644		MUTEX_DOWNGRADE(&ipf_nat);
645		nat_stats.ns_table[0] = nat_table[0];
646		nat_stats.ns_table[1] = nat_table[1];
647		nat_stats.ns_list = nat_list;
648		nat_stats.ns_maptable = maptable;
649		nat_stats.ns_nattab_sz = ipf_nattable_sz;
650		nat_stats.ns_rultab_sz = ipf_natrules_sz;
651		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
652		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
653		nat_stats.ns_instances = nat_instances;
654		nat_stats.ns_apslist = ap_sess_list;
655		error = IWCOPYPTR((char *)&nat_stats, (char *)data,
656				  sizeof(nat_stats));
657		break;
658	case SIOCGNATL :
659	    {
660		natlookup_t nl;
661
662		MUTEX_DOWNGRADE(&ipf_nat);
663		error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
664		if (error)
665			break;
666
667		if (nat_lookupredir(&nl)) {
668			error = IWCOPYPTR((char *)&nl, (char *)data,
669					  sizeof(nl));
670		} else
671			error = ESRCH;
672		break;
673	    }
674	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
675		if (!(mode & FWRITE)) {
676			error = EPERM;
677			break;
678		}
679		error = 0;
680		if (arg == 0)
681			ret = nat_flushtable();
682		else if (arg == 1)
683			ret = nat_clearlist();
684		else
685			error = EINVAL;
686		MUTEX_DOWNGRADE(&ipf_nat);
687		if (!error) {
688			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
689			if (error)
690				error = EFAULT;
691		}
692		break;
693	case SIOCSTLCK :
694		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
695		if (!error) {
696			error = IWCOPY((caddr_t)&fr_nat_lock, data,
697					sizeof(fr_nat_lock));
698			if (!error)
699				fr_nat_lock = arg;
700		} else
701			error = EFAULT;
702		break;
703	case SIOCSTPUT :
704		if (fr_nat_lock)
705			error = fr_natputent(data);
706		else
707			error = EACCES;
708		break;
709	case SIOCSTGSZ :
710		if (fr_nat_lock)
711			error = fr_natgetsz(data);
712		else
713			error = EACCES;
714		break;
715	case SIOCSTGET :
716		if (fr_nat_lock)
717			error = fr_natgetent(data);
718		else
719			error = EACCES;
720		break;
721	case FIONREAD :
722#ifdef	IPFILTER_LOG
723		arg = (int)iplused[IPL_LOGNAT];
724		MUTEX_DOWNGRADE(&ipf_nat);
725		error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
726		if (error)
727			error = EFAULT;
728#endif
729		break;
730	default :
731		error = EINVAL;
732		break;
733	}
734	if (getlock == 1) {
735		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
736	}
737done:
738	if (nt)
739		KFREE(nt);
740	return error;
741}
742
743
744static int fr_natgetsz(data)
745caddr_t data;
746{
747	ap_session_t *aps;
748	nat_t *nat, *n;
749	int error = 0;
750	natget_t ng;
751
752	error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
753	if (error)
754		return EFAULT;
755
756	nat = ng.ng_ptr;
757	if (!nat) {
758		nat = nat_instances;
759		ng.ng_sz = 0;
760		if (nat == NULL) {
761			error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
762			if (error)
763				error = EFAULT;
764			return error;
765		}
766	} else {
767		/*
768		 * Make sure the pointer we're copying from exists in the
769		 * current list of entries.  Security precaution to prevent
770		 * copying of random kernel data.
771		 */
772		for (n = nat_instances; n; n = n->nat_next)
773			if (n == nat)
774				break;
775		if (!n)
776			return ESRCH;
777	}
778
779	ng.ng_sz = sizeof(nat_save_t);
780	aps = nat->nat_aps;
781	if ((aps != NULL) && (aps->aps_data != 0)) {
782		ng.ng_sz += sizeof(ap_session_t);
783		ng.ng_sz += aps->aps_psiz;
784		if (aps->aps_psiz > 4)	/* XXX - sizeof(ipn_data) */
785			ng.ng_sz -= 4;
786	}
787
788	error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
789	if (error)
790		error = EFAULT;
791	return error;
792}
793
794
795static int fr_natgetent(data)
796caddr_t data;
797{
798	nat_save_t ipn, *ipnp, *ipnn = NULL;
799	register nat_t *n, *nat;
800	ap_session_t *aps;
801	size_t dsz;
802	int error;
803
804	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
805	if (error)
806		return EFAULT;
807	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
808	if (error)
809		return EFAULT;
810
811	nat = ipn.ipn_next;
812	if (!nat) {
813		nat = nat_instances;
814		if (nat == NULL) {
815			if (nat_instances == NULL)
816				return ENOENT;
817			return 0;
818		}
819	} else {
820		/*
821		 * Make sure the pointer we're copying from exists in the
822		 * current list of entries.  Security precaution to prevent
823		 * copying of random kernel data.
824		 */
825		for (n = nat_instances; n; n = n->nat_next)
826			if (n == nat)
827				break;
828		if (!n)
829			return ESRCH;
830	}
831
832	ipn.ipn_next = nat->nat_next;
833	bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
834	ipn.ipn_nat.nat_data = NULL;
835
836	if (nat->nat_ptr) {
837		bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
838		      sizeof(ipn.ipn_ipnat));
839	}
840
841	if (nat->nat_fr)
842		bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
843		      sizeof(ipn.ipn_rule));
844
845	if ((aps = nat->nat_aps)) {
846		dsz = sizeof(*aps);
847		if (aps->aps_data)
848			dsz += aps->aps_psiz;
849		ipn.ipn_dsize = dsz;
850		if (dsz > sizeof(ipn.ipn_data))
851			dsz -= sizeof(ipn.ipn_data);
852		KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + dsz);
853		if (ipnn == NULL)
854			return ENOMEM;
855		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
856
857		bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
858		if (aps->aps_data) {
859			bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
860			      aps->aps_psiz);
861		}
862		error = IWCOPY((caddr_t)ipnn, ipnp,
863			       sizeof(ipn) + dsz);
864		if (error)
865			error = EFAULT;
866		KFREES(ipnn, sizeof(*ipnn) + dsz);
867	} else {
868		ipn.ipn_dsize = 0;
869		error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
870		if (error)
871			error = EFAULT;
872	}
873	return error;
874}
875
876
877static int fr_natputent(data)
878caddr_t data;
879{
880	nat_save_t ipn, *ipnp, *ipnn = NULL;
881	register nat_t *n, *nat;
882	ap_session_t *aps;
883	frentry_t *fr;
884	ipnat_t *in;
885
886	int error;
887
888	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
889	if (error)
890		return EFAULT;
891	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
892	if (error)
893		return EFAULT;
894	nat = NULL;
895	if (ipn.ipn_dsize) {
896		KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
897		if (ipnn == NULL)
898			return ENOMEM;
899		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
900		error = IRCOPY((caddr_t)ipnp + offsetof(nat_save_t, ipn_data),
901			       (caddr_t)ipnn->ipn_data, ipn.ipn_dsize);
902		if (error) {
903			error = EFAULT;
904			goto junkput;
905		}
906	} else
907		ipnn = NULL;
908
909	KMALLOC(nat, nat_t *);
910	if (nat == NULL) {
911		error = EFAULT;
912		goto junkput;
913	}
914
915	bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
916	/*
917	 * Initialize all these so that nat_delete() doesn't cause a crash.
918	 */
919	nat->nat_phnext[0] = NULL;
920	nat->nat_phnext[1] = NULL;
921	fr = nat->nat_fr;
922	nat->nat_fr = NULL;
923	aps = nat->nat_aps;
924	nat->nat_aps = NULL;
925	in = nat->nat_ptr;
926	nat->nat_ptr = NULL;
927	nat->nat_hm = NULL;
928	nat->nat_data = NULL;
929	nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
930
931	/*
932	 * Restore the rule associated with this nat session
933	 */
934	if (in) {
935		KMALLOC(in, ipnat_t *);
936		if (in == NULL) {
937			error = ENOMEM;
938			goto junkput;
939		}
940		nat->nat_ptr = in;
941		bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
942		in->in_use = 1;
943		in->in_flags |= IPN_DELETE;
944		in->in_next = NULL;
945		in->in_rnext = NULL;
946		in->in_prnext = NULL;
947		in->in_mnext = NULL;
948		in->in_pmnext = NULL;
949		in->in_ifp = GETUNIT(in->in_ifname, 4);
950		if (in->in_plabel[0] != '\0') {
951			in->in_apr = appr_lookup(in->in_p, in->in_plabel);
952		}
953	}
954
955	/*
956	 * Restore ap_session_t structure.  Include the private data allocated
957	 * if it was there.
958	 */
959	if (aps) {
960		KMALLOC(aps, ap_session_t *);
961		if (aps == NULL) {
962			error = ENOMEM;
963			goto junkput;
964		}
965		nat->nat_aps = aps;
966		aps->aps_next = ap_sess_list;
967		ap_sess_list = aps;
968		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
969		if (in)
970			aps->aps_apr = in->in_apr;
971		if (aps->aps_psiz) {
972			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
973			if (aps->aps_data == NULL) {
974				error = ENOMEM;
975				goto junkput;
976			}
977			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
978			      aps->aps_psiz);
979		} else {
980			aps->aps_psiz = 0;
981			aps->aps_data = NULL;
982		}
983	}
984
985	/*
986	 * If there was a filtering rule associated with this entry then
987	 * build up a new one.
988	 */
989	if (fr != NULL) {
990		if (nat->nat_flags & FI_NEWFR) {
991			KMALLOC(fr, frentry_t *);
992			nat->nat_fr = fr;
993			if (fr == NULL) {
994				error = ENOMEM;
995				goto junkput;
996			}
997			bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
998			ipn.ipn_nat.nat_fr = fr;
999			error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
1000			if (error) {
1001				error = EFAULT;
1002				goto junkput;
1003			}
1004		} else {
1005			for (n = nat_instances; n; n = n->nat_next)
1006				if (n->nat_fr == fr)
1007					break;
1008			if (!n) {
1009				error = ESRCH;
1010				goto junkput;
1011			}
1012		}
1013	}
1014
1015	if (ipnn)
1016		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1017	nat_insert(nat);
1018	return 0;
1019junkput:
1020	if (ipnn)
1021		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1022	if (nat)
1023		nat_delete(nat);
1024	return error;
1025}
1026
1027
1028/*
1029 * Delete a nat entry from the various lists and table.
1030 */
1031static void nat_delete(natd)
1032struct nat *natd;
1033{
1034	struct ipnat *ipn;
1035
1036	if (natd->nat_flags & FI_WILDP)
1037		nat_stats.ns_wilds--;
1038	if (natd->nat_hnext[0])
1039		natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1040	*natd->nat_phnext[0] = natd->nat_hnext[0];
1041	if (natd->nat_hnext[1])
1042		natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1043	*natd->nat_phnext[1] = natd->nat_hnext[1];
1044	if (natd->nat_me != NULL)
1045		*natd->nat_me = NULL;
1046
1047	if (natd->nat_fr != NULL) {
1048		ATOMIC_DEC32(natd->nat_fr->fr_ref);
1049	}
1050
1051	if (natd->nat_hm != NULL)
1052		nat_hostmapdel(natd->nat_hm);
1053
1054	/*
1055	 * If there is an active reference from the nat entry to its parent
1056	 * rule, decrement the rule's reference count and free it too if no
1057	 * longer being used.
1058	 */
1059	ipn = natd->nat_ptr;
1060	if (ipn != NULL) {
1061		ipn->in_space++;
1062		ipn->in_use--;
1063		if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1064			if (ipn->in_apr)
1065				appr_free(ipn->in_apr);
1066			KFREE(ipn);
1067			nat_stats.ns_rules--;
1068		}
1069	}
1070
1071	MUTEX_DESTROY(&natd->nat_lock);
1072	/*
1073	 * If there's a fragment table entry too for this nat entry, then
1074	 * dereference that as well.
1075	 */
1076	ipfr_forgetnat((void *)natd);
1077	aps_free(natd->nat_aps);
1078	nat_stats.ns_inuse--;
1079	KFREE(natd);
1080}
1081
1082
1083/*
1084 * nat_flushtable - clear the NAT table of all mapping entries.
1085 * (this is for the dynamic mappings)
1086 */
1087static int nat_flushtable()
1088{
1089	register nat_t *nat, **natp;
1090	register int j = 0;
1091
1092	/*
1093	 * ALL NAT mappings deleted, so lets just make the deletions
1094	 * quicker.
1095	 */
1096	if (nat_table[0] != NULL)
1097		bzero((char *)nat_table[0],
1098		      sizeof(nat_table[0]) * ipf_nattable_sz);
1099	if (nat_table[1] != NULL)
1100		bzero((char *)nat_table[1],
1101		      sizeof(nat_table[1]) * ipf_nattable_sz);
1102
1103	for (natp = &nat_instances; (nat = *natp); ) {
1104		*natp = nat->nat_next;
1105#ifdef	IPFILTER_LOG
1106		nat_log(nat, NL_FLUSH);
1107#endif
1108		nat_delete(nat);
1109		j++;
1110	}
1111	nat_stats.ns_inuse = 0;
1112	return j;
1113}
1114
1115
1116/*
1117 * nat_clearlist - delete all rules in the active NAT mapping list.
1118 * (this is for NAT/RDR rules)
1119 */
1120int nat_clearlist()
1121{
1122	register ipnat_t *n, **np = &nat_list;
1123	int i = 0;
1124
1125	if (nat_rules != NULL)
1126		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1127	if (rdr_rules != NULL)
1128		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1129
1130	while ((n = *np)) {
1131		*np = n->in_next;
1132		if (!n->in_use) {
1133			if (n->in_apr)
1134				appr_free(n->in_apr);
1135			KFREE(n);
1136			nat_stats.ns_rules--;
1137		} else {
1138			n->in_flags |= IPN_DELETE;
1139			n->in_next = NULL;
1140		}
1141		i++;
1142	}
1143	nat_masks = 0;
1144	rdr_masks = 0;
1145	return i;
1146}
1147
1148
1149/*
1150 * Create a new NAT table entry.
1151 * NOTE: Assumes write lock on ipf_nat has been obtained already.
1152 *       If you intend on changing this, beware: appr_new() may call nat_new()
1153 *       recursively!
1154 */
1155nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1156fr_info_t *fin;
1157ip_t *ip;
1158ipnat_t *np;
1159nat_t **natsave;
1160u_int flags;
1161int direction;
1162{
1163	register u_32_t sum1, sum2, sumd, l;
1164	u_short port = 0, sport = 0, dport = 0, nport = 0;
1165	struct in_addr in, inb;
1166	u_short nflags, sp, dp;
1167	tcphdr_t *tcp = NULL;
1168	hostmap_t *hm = NULL;
1169	nat_t *nat, *natl;
1170#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1171	qif_t *qf = fin->fin_qif;
1172#endif
1173
1174	if (nat_stats.ns_inuse >= ipf_nattable_max) {
1175		nat_stats.ns_memfail++;
1176		return NULL;
1177	}
1178
1179	nflags = flags & np->in_flags;
1180	if (flags & IPN_TCPUDP) {
1181		tcp = (tcphdr_t *)fin->fin_dp;
1182		sport = htons(fin->fin_data[0]);
1183		dport = htons(fin->fin_data[1]);
1184	}
1185
1186	/* Give me a new nat */
1187	KMALLOC(nat, nat_t *);
1188	if (nat == NULL) {
1189		nat_stats.ns_memfail++;
1190		/*
1191		 * Try to automatically tune the max # of entries in the
1192		 * table allowed to be less than what will cause kmem_alloc()
1193		 * to fail and try to eliminate panics due to out of memory
1194		 * conditions arising.
1195		 */
1196		if (ipf_nattable_max > ipf_nattable_sz) {
1197			ipf_nattable_max = nat_stats.ns_inuse - 100;
1198			printf("ipf_nattable_max reduced to %d\n",
1199				ipf_nattable_max);
1200		}
1201		return NULL;
1202	}
1203
1204	bzero((char *)nat, sizeof(*nat));
1205	nat->nat_flags = flags;
1206	if (flags & FI_WILDP)
1207		nat_stats.ns_wilds++;
1208	/*
1209	 * Search the current table for a match.
1210	 */
1211	if (direction == NAT_OUTBOUND) {
1212		/*
1213		 * Values at which the search for a free resouce starts.
1214		 */
1215		u_32_t st_ip;
1216		u_short st_port;
1217
1218		/*
1219		 * If it's an outbound packet which doesn't match any existing
1220		 * record, then create a new port
1221		 */
1222		l = 0;
1223		st_ip = np->in_nip;
1224		st_port = np->in_pnext;
1225
1226		do {
1227			port = 0;
1228			in.s_addr = htonl(np->in_nip);
1229			if (l == 0) {
1230				/*
1231				 * Check to see if there is an existing NAT
1232				 * setup for this IP address pair.
1233				 */
1234				hm = nat_hostmap(np, fin->fin_src, in);
1235				if (hm != NULL)
1236					in.s_addr = hm->hm_mapip.s_addr;
1237			} else if ((l == 1) && (hm != NULL)) {
1238				nat_hostmapdel(hm);
1239				hm = NULL;
1240			}
1241			in.s_addr = ntohl(in.s_addr);
1242
1243			nat->nat_hm = hm;
1244
1245			if ((np->in_outmsk == 0xffffffff) &&
1246			    (np->in_pnext == 0)) {
1247				if (l > 0)
1248					goto badnat;
1249			}
1250
1251			if (np->in_redir & NAT_MAPBLK) {
1252				if ((l >= np->in_ppip) || ((l > 0) &&
1253				     !(flags & IPN_TCPUDP)))
1254					goto badnat;
1255				/*
1256				 * map-block - Calculate destination address.
1257				 */
1258				in.s_addr = ntohl(fin->fin_saddr);
1259				in.s_addr &= ntohl(~np->in_inmsk);
1260				inb.s_addr = in.s_addr;
1261				in.s_addr /= np->in_ippip;
1262				in.s_addr &= ntohl(~np->in_outmsk);
1263				in.s_addr += ntohl(np->in_outip);
1264				/*
1265				 * Calculate destination port.
1266				 */
1267				if ((flags & IPN_TCPUDP) &&
1268				    (np->in_ppip != 0)) {
1269					port = ntohs(sport) + l;
1270					port %= np->in_ppip;
1271					port += np->in_ppip *
1272						(inb.s_addr % np->in_ippip);
1273					port += MAPBLK_MINPORT;
1274					port = htons(port);
1275				}
1276			} else if (!np->in_outip &&
1277				   (np->in_outmsk == 0xffffffff)) {
1278				/*
1279				 * 0/32 - use the interface's IP address.
1280				 */
1281				if ((l > 0) ||
1282				    fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1283					goto badnat;
1284				in.s_addr = ntohl(in.s_addr);
1285			} else if (!np->in_outip && !np->in_outmsk) {
1286				/*
1287				 * 0/0 - use the original source address/port.
1288				 */
1289				if (l > 0)
1290					goto badnat;
1291				in.s_addr = ntohl(fin->fin_saddr);
1292			} else if ((np->in_outmsk != 0xffffffff) &&
1293				   (np->in_pnext == 0) &&
1294				   ((l > 0) || (hm == NULL)))
1295				np->in_nip++;
1296			natl = NULL;
1297
1298			if ((nflags & IPN_TCPUDP) &&
1299			    ((np->in_redir & NAT_MAPBLK) == 0) &&
1300			    (np->in_flags & IPN_AUTOPORTMAP)) {
1301				if ((l > 0) && (l % np->in_ppip == 0)) {
1302					if (l > np->in_space) {
1303						goto badnat;
1304					} else if ((l > np->in_ppip) &&
1305						   np->in_outmsk != 0xffffffff)
1306						np->in_nip++;
1307				}
1308				if (np->in_ppip != 0) {
1309					port = ntohs(sport);
1310					port += (l % np->in_ppip);
1311					port %= np->in_ppip;
1312					port += np->in_ppip *
1313						(ntohl(fin->fin_saddr) %
1314						 np->in_ippip);
1315					port += MAPBLK_MINPORT;
1316					port = htons(port);
1317				}
1318			} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1319				   (nflags & IPN_TCPUDP) &&
1320				   (np->in_pnext != 0)) {
1321				port = htons(np->in_pnext++);
1322				if (np->in_pnext > ntohs(np->in_pmax)) {
1323					np->in_pnext = ntohs(np->in_pmin);
1324					if (np->in_outmsk != 0xffffffff)
1325						np->in_nip++;
1326				}
1327			}
1328
1329			if (np->in_flags & IPN_IPRANGE) {
1330				if (np->in_nip > ntohl(np->in_outmsk))
1331					np->in_nip = ntohl(np->in_outip);
1332			} else {
1333				if ((np->in_outmsk != 0xffffffff) &&
1334				    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1335				    ntohl(np->in_outip))
1336					np->in_nip = ntohl(np->in_outip) + 1;
1337			}
1338
1339			if (!port && (flags & IPN_TCPUDP))
1340				port = sport;
1341
1342			/*
1343			 * Here we do a lookup of the connection as seen from
1344			 * the outside.  If an IP# pair already exists, try
1345			 * again.  So if you have A->B becomes C->B, you can
1346			 * also have D->E become C->E but not D->B causing
1347			 * another C->B.  Also take protocol and ports into
1348			 * account when determining whether a pre-existing
1349			 * NAT setup will cause an external conflict where
1350			 * this is appropriate.
1351			 */
1352			inb.s_addr = htonl(in.s_addr);
1353			sp = fin->fin_data[0];
1354			dp = fin->fin_data[1];
1355			fin->fin_data[0] = fin->fin_data[1];
1356			fin->fin_data[1] = htons(port);
1357			natl = nat_inlookup(fin, flags & ~FI_WILDP,
1358					    (u_int)fin->fin_p, fin->fin_dst,
1359					    inb, 1);
1360			fin->fin_data[0] = sp;
1361			fin->fin_data[1] = dp;
1362
1363			/*
1364			 * Has the search wrapped around and come back to the
1365			 * start ?
1366			 */
1367			if ((natl != NULL) &&
1368			    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1369			    (np->in_nip != 0) && (st_ip == np->in_nip))
1370				goto badnat;
1371			l++;
1372		} while (natl != NULL);
1373
1374		if (np->in_space > 0)
1375			np->in_space--;
1376
1377		/* Setup the NAT table */
1378		nat->nat_inip = fin->fin_src;
1379		nat->nat_outip.s_addr = htonl(in.s_addr);
1380		nat->nat_oip = fin->fin_dst;
1381		if (nat->nat_hm == NULL)
1382			nat->nat_hm = nat_hostmap(np, fin->fin_src,
1383						  nat->nat_outip);
1384
1385		sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1386		sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1387
1388		if (flags & IPN_TCPUDP) {
1389			nat->nat_inport = sport;
1390			nat->nat_outport = port;	/* sport */
1391			nat->nat_oport = dport;
1392		}
1393	} else {
1394		/*
1395		 * Otherwise, it's an inbound packet. Most likely, we don't
1396		 * want to rewrite source ports and source addresses. Instead,
1397		 * we want to rewrite to a fixed internal address and fixed
1398		 * internal port.
1399		 */
1400		if (np->in_flags & IPN_SPLIT) {
1401			in.s_addr = np->in_nip;
1402			if (np->in_inip == htonl(in.s_addr))
1403				np->in_nip = ntohl(np->in_inmsk);
1404			else {
1405				np->in_nip = ntohl(np->in_inip);
1406				if (np->in_flags & IPN_ROUNDR) {
1407					nat_delrdr(np);
1408					nat_addrdr(np);
1409				}
1410			}
1411		} else {
1412			in.s_addr = ntohl(np->in_inip);
1413			if (np->in_flags & IPN_ROUNDR) {
1414				nat_delrdr(np);
1415				nat_addrdr(np);
1416			}
1417		}
1418		if (!np->in_pnext)
1419			nport = dport;
1420		else {
1421			/*
1422			 * Whilst not optimized for the case where
1423			 * pmin == pmax, the gain is not significant.
1424			 */
1425			if (np->in_pmin != np->in_pmax) {
1426				nport = ntohs(dport) - ntohs(np->in_pmin) +
1427					ntohs(np->in_pnext);
1428				nport = ntohs(nport);
1429			} else
1430				nport = np->in_pnext;
1431		}
1432
1433		/*
1434		 * When the redirect-to address is set to 0.0.0.0, just
1435		 * assume a blank `forwarding' of the packet.
1436		 */
1437		if (in.s_addr == 0)
1438			in.s_addr = ntohl(fin->fin_daddr);
1439
1440		nat->nat_inip.s_addr = htonl(in.s_addr);
1441		nat->nat_outip = fin->fin_dst;
1442		nat->nat_oip = fin->fin_src;
1443
1444		sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1445		sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1446
1447		if (flags & IPN_TCPUDP) {
1448			nat->nat_inport = nport;
1449			nat->nat_outport = dport;
1450			nat->nat_oport = sport;
1451		}
1452	}
1453
1454	CALC_SUMD(sum1, sum2, sumd);
1455	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1456#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1457	if ((flags & IPN_TCP) && dohwcksum &&
1458	    (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1459		if (direction == NAT_OUTBOUND)
1460			sum1 = LONG_SUM(ntohl(in.s_addr));
1461		else
1462			sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1463		sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1464		sum1 += IPPROTO_TCP;
1465		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1466		nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1467	} else
1468#endif
1469		nat->nat_sumd[1] = nat->nat_sumd[0];
1470
1471	if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1472		if (direction == NAT_OUTBOUND)
1473			sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1474		else
1475			sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1476
1477		sum2 = LONG_SUM(in.s_addr);
1478
1479		CALC_SUMD(sum1, sum2, sumd);
1480		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1481	} else
1482		nat->nat_ipsumd = nat->nat_sumd[0];
1483
1484	in.s_addr = htonl(in.s_addr);
1485
1486	strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1487
1488	nat->nat_me = natsave;
1489	nat->nat_dir = direction;
1490	nat->nat_ifp = fin->fin_ifp;
1491	nat->nat_ptr = np;
1492	nat->nat_p = fin->fin_p;
1493	nat->nat_bytes = 0;
1494	nat->nat_pkts = 0;
1495	nat->nat_mssclamp = np->in_mssclamp;
1496	nat->nat_fr = fin->fin_fr;
1497	if (nat->nat_fr != NULL) {
1498		ATOMIC_INC32(nat->nat_fr->fr_ref);
1499	}
1500	if (direction == NAT_OUTBOUND) {
1501		if (flags & IPN_TCPUDP)
1502			tcp->th_sport = port;
1503	} else {
1504		if (flags & IPN_TCPUDP)
1505			tcp->th_dport = nport;
1506	}
1507
1508	nat_insert(nat);
1509
1510	if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1511	    (tcp != NULL && dport == np->in_dport)))
1512		(void) appr_new(fin, ip, nat);
1513
1514	np->in_use++;
1515#ifdef	IPFILTER_LOG
1516	nat_log(nat, (u_int)np->in_redir);
1517#endif
1518	return nat;
1519badnat:
1520	nat_stats.ns_badnat++;
1521	if ((hm = nat->nat_hm) != NULL)
1522		nat_hostmapdel(hm);
1523	KFREE(nat);
1524	return NULL;
1525}
1526
1527
1528/*
1529 * Insert a NAT entry into the hash tables for searching and add it to the
1530 * list of active NAT entries.  Adjust global counters when complete.
1531 */
1532void	nat_insert(nat)
1533nat_t	*nat;
1534{
1535	u_int hv1, hv2;
1536	nat_t **natp;
1537
1538	MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1539
1540	nat->nat_age = fr_defnatage;
1541	nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1542	if (nat->nat_ifname[0] !='\0') {
1543		nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1544	}
1545
1546	nat->nat_next = nat_instances;
1547	nat_instances = nat;
1548
1549	if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1550		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1551				  0xffffffff);
1552		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1553				  ipf_nattable_sz);
1554		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1555				  0xffffffff);
1556		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1557				 ipf_nattable_sz);
1558	} else {
1559		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1560				  ipf_nattable_sz);
1561		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1562				  ipf_nattable_sz);
1563	}
1564
1565	natp = &nat_table[0][hv1];
1566	if (*natp)
1567		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1568	nat->nat_phnext[0] = natp;
1569	nat->nat_hnext[0] = *natp;
1570	*natp = nat;
1571
1572	natp = &nat_table[1][hv2];
1573	if (*natp)
1574		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1575	nat->nat_phnext[1] = natp;
1576	nat->nat_hnext[1] = *natp;
1577	*natp = nat;
1578
1579	nat_stats.ns_added++;
1580	nat_stats.ns_inuse++;
1581}
1582
1583
1584nat_t *nat_icmplookup(ip, fin, dir)
1585ip_t *ip;
1586fr_info_t *fin;
1587int dir;
1588{
1589	icmphdr_t *icmp;
1590	tcphdr_t *tcp = NULL;
1591	ip_t *oip;
1592	int flags = 0, type, minlen;
1593
1594	icmp = (icmphdr_t *)fin->fin_dp;
1595	/*
1596	 * Does it at least have the return (basic) IP header ?
1597	 * Only a basic IP header (no options) should be with an ICMP error
1598	 * header.
1599	 */
1600	if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1601		return NULL;
1602	type = icmp->icmp_type;
1603	/*
1604	 * If it's not an error type, then return.
1605	 */
1606	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1607	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1608	    (type != ICMP_PARAMPROB))
1609		return NULL;
1610
1611	oip = (ip_t *)((char *)fin->fin_dp + 8);
1612	minlen = (oip->ip_hl << 2);
1613	if (minlen < sizeof(ip_t))
1614		return NULL;
1615	if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1616		return NULL;
1617	/*
1618	 * Is the buffer big enough for all of it ?  It's the size of the IP
1619	 * header claimed in the encapsulated part which is of concern.  It
1620	 * may be too big to be in this buffer but not so big that it's
1621	 * outside the ICMP packet, leading to TCP deref's causing problems.
1622	 * This is possible because we don't know how big oip_hl is when we
1623	 * do the pullup early in fr_check() and thus can't gaurantee it is
1624	 * all here now.
1625	 */
1626#ifdef  _KERNEL
1627	{
1628	mb_t *m;
1629
1630# if SOLARIS
1631	m = fin->fin_qfm;
1632	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1633		return NULL;
1634# else
1635	m = *(mb_t **)fin->fin_mp;
1636	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1637	    (char *)ip + m->m_len)
1638		return NULL;
1639# endif
1640	}
1641#endif
1642
1643	if (oip->ip_p == IPPROTO_TCP)
1644		flags = IPN_TCP;
1645	else if (oip->ip_p == IPPROTO_UDP)
1646		flags = IPN_UDP;
1647	if (flags & IPN_TCPUDP) {
1648		u_short	data[2];
1649		nat_t *nat;
1650
1651		minlen += 8;		/* + 64bits of data to get ports */
1652		if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1653			return NULL;
1654
1655		data[0] = fin->fin_data[0];
1656		data[1] = fin->fin_data[1];
1657		tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1658		fin->fin_data[0] = ntohs(tcp->th_dport);
1659		fin->fin_data[1] = ntohs(tcp->th_sport);
1660
1661		if (dir == NAT_INBOUND) {
1662			nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1663					    oip->ip_dst, oip->ip_src, 0);
1664		} else {
1665			nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1666					    oip->ip_dst, oip->ip_src, 0);
1667		}
1668		fin->fin_data[0] = data[0];
1669		fin->fin_data[1] = data[1];
1670		return nat;
1671	}
1672	if (dir == NAT_INBOUND)
1673		return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1674				    oip->ip_dst, oip->ip_src, 0);
1675	else
1676		return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1677				    oip->ip_dst, oip->ip_src, 0);
1678}
1679
1680
1681/*
1682 * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1683 * packet gets correctly recognised.
1684 */
1685nat_t *nat_icmp(ip, fin, nflags, dir)
1686ip_t *ip;
1687fr_info_t *fin;
1688u_int *nflags;
1689int dir;
1690{
1691	u_32_t sum1, sum2, sumd, sumd2 = 0;
1692	struct in_addr in;
1693	int flags, dlen;
1694	icmphdr_t *icmp;
1695	udphdr_t *udp;
1696	tcphdr_t *tcp;
1697	nat_t *nat;
1698	ip_t *oip;
1699
1700	if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1701		return NULL;
1702	/*
1703	 * nat_icmplookup() will return NULL for `defective' packets.
1704	 */
1705	if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1706		return NULL;
1707
1708	flags = 0;
1709	sumd2 = 0;
1710	*nflags = IPN_ICMPERR;
1711	icmp = (icmphdr_t *)fin->fin_dp;
1712	oip = (ip_t *)&icmp->icmp_ip;
1713	if (oip->ip_p == IPPROTO_TCP)
1714		flags = IPN_TCP;
1715	else if (oip->ip_p == IPPROTO_UDP)
1716		flags = IPN_UDP;
1717	udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1718	dlen = ip->ip_len - ((char *)udp - (char *)ip);
1719	/*
1720	 * XXX - what if this is bogus hl and we go off the end ?
1721	 * In this case, nat_icmplookup() will have returned NULL.
1722	 */
1723	tcp = (tcphdr_t *)udp;
1724
1725	/*
1726	 * Need to adjust ICMP header to include the real IP#'s and
1727	 * port #'s.  Only apply a checksum change relative to the
1728	 * IP address change as it will be modified again in ip_natout
1729	 * for both address and port.  Two checksum changes are
1730	 * necessary for the two header address changes.  Be careful
1731	 * to only modify the checksum once for the port # and twice
1732	 * for the IP#.
1733	 */
1734
1735	/*
1736	 * Step 1
1737	 * Fix the IP addresses in the offending IP packet. You also need
1738	 * to adjust the IP header checksum of that offending IP packet
1739	 * and the ICMP checksum of the ICMP error message itself.
1740	 *
1741	 * Unfortunately, for UDP and TCP, the IP addresses are also contained
1742	 * in the pseudo header that is used to compute the UDP resp. TCP
1743	 * checksum. So, we must compensate that as well. Even worse, the
1744	 * change in the UDP and TCP checksums require yet another
1745	 * adjustment of the ICMP checksum of the ICMP error message.
1746	 *
1747	 */
1748
1749	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1750		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1751		in = nat->nat_inip;
1752		oip->ip_src = in;
1753	} else {
1754		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1755		in = nat->nat_outip;
1756		oip->ip_dst = in;
1757	}
1758
1759	sum2 = LONG_SUM(ntohl(in.s_addr));
1760
1761	CALC_SUMD(sum1, sum2, sumd);
1762
1763	/*
1764	 * Fix IP checksum of the offending IP packet to adjust for
1765	 * the change in the IP address.
1766	 *
1767	 * Normally, you would expect that the ICMP checksum of the
1768	 * ICMP error message needs to be adjusted as well for the
1769	 * IP address change in oip.
1770	 * However, this is a NOP, because the ICMP checksum is
1771	 * calculated over the complete ICMP packet, which includes the
1772	 * changed oip IP addresses and oip->ip_sum. However, these
1773	 * two changes cancel each other out (if the delta for
1774	 * the IP address is x, then the delta for ip_sum is minus x),
1775	 * so no change in the icmp_cksum is necessary.
1776	 *
1777	 * Be careful that nat_dir refers to the direction of the
1778	 * offending IP packet (oip), not to its ICMP response (icmp)
1779	 */
1780	fix_datacksum(&oip->ip_sum, sumd);
1781	/* Fix icmp cksum : IP Addr + Cksum */
1782
1783	/*
1784	 * Fix UDP pseudo header checksum to compensate for the
1785	 * IP address change.
1786	 */
1787	if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && udp->uh_sum) {
1788		/*
1789		 * The UDP checksum is optional, only adjust it
1790		 * if it has been set.
1791		 */
1792		sum1 = ntohs(udp->uh_sum);
1793		fix_datacksum(&udp->uh_sum, sumd);
1794		sum2 = ntohs(udp->uh_sum);
1795
1796		/*
1797		 * Fix ICMP checksum to compensate the UDP
1798		 * checksum adjustment.
1799		 */
1800		sumd2 = sumd << 1;
1801		CALC_SUMD(sum1, sum2, sumd);
1802		sumd2 += sumd;
1803	}
1804
1805	/*
1806	 * Fix TCP pseudo header checksum to compensate for the
1807	 * IP address change. Before we can do the change, we
1808	 * must make sure that oip is sufficient large to hold
1809	 * the TCP checksum (normally it does not!).
1810	 */
1811	else if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
1812		sum1 = ntohs(tcp->th_sum);
1813		fix_datacksum(&tcp->th_sum, sumd);
1814		sum2 = ntohs(tcp->th_sum);
1815
1816		/*
1817		 * Fix ICMP checksum to compensate the TCP
1818		 * checksum adjustment.
1819		 */
1820		sumd2 = sumd << 1;
1821		CALC_SUMD(sum1, sum2, sumd);
1822		sumd2 += sumd;
1823	} else {
1824		sumd2 = (sumd >> 16);
1825		if (nat->nat_dir == NAT_OUTBOUND)
1826			sumd2 = ~sumd2;
1827		else
1828			sumd2 = ~sumd2 + 1;
1829	}
1830
1831	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
1832		/*
1833		 * Step 2 :
1834		 * For offending TCP/UDP IP packets, translate the ports as
1835		 * well, based on the NAT specification. Of course such
1836		 * a change must be reflected in the ICMP checksum as well.
1837		 *
1838		 * Advance notice : Now it becomes complicated :-)
1839		 *
1840		 * Since the port fields are part of the TCP/UDP checksum
1841		 * of the offending IP packet, you need to adjust that checksum
1842		 * as well... but, if you change, you must change the icmp
1843		 * checksum *again*, to reflect that change.
1844		 *
1845		 * To further complicate: the TCP checksum is not in the first
1846		 * 8 bytes of the offending ip packet, so it most likely is not
1847		 * available. Some OSses like Solaris return enough bytes to
1848		 * include the TCP checksum. So we have to check if the
1849		 * ip->ip_len actually holds the TCP checksum of the oip!
1850		 */
1851		if (nat->nat_oport == tcp->th_dport) {
1852			if (tcp->th_sport != nat->nat_inport) {
1853				/*
1854				 * Fix ICMP checksum to compensate port
1855				 * adjustment.
1856				 */
1857				sum1 = ntohs(nat->nat_inport);
1858				sum2 = ntohs(tcp->th_sport);
1859				tcp->th_sport = nat->nat_inport;
1860
1861				/*
1862				 * Fix udp checksum to compensate port
1863				 * adjustment.  NOTE : the offending IP packet
1864				 * flows the other direction compared to the
1865				 * ICMP message.
1866				 *
1867				 * The UDP checksum is optional, only adjust
1868				 * it if it has been set.
1869				 */
1870				if ((oip->ip_p == IPPROTO_UDP) &&
1871				    (dlen >= 8) && udp->uh_sum) {
1872					sumd = sum1 - sum2;
1873					sumd2 += sumd;
1874
1875					sum1 = ntohs(udp->uh_sum);
1876					fix_datacksum(&udp->uh_sum, sumd);
1877					sum2 = ntohs(udp->uh_sum);
1878
1879					/*
1880					 * Fix ICMP checksum to compensate
1881					 * UDP checksum adjustment.
1882					 */
1883					CALC_SUMD(sum1, sum2, sumd);
1884					sumd2 += sumd;
1885				}
1886
1887				/*
1888				 * Fix tcp checksum (if present) to compensate
1889				 * port adjustment. NOTE : the offending IP
1890				 * packet flows the other direction compared to
1891				 * the ICMP message.
1892				 */
1893				if (oip->ip_p == IPPROTO_TCP) {
1894					if (dlen >= 18) {
1895						sumd = sum1 - sum2;
1896						sumd2 += sumd;
1897
1898						sum1 = ntohs(tcp->th_sum);
1899						fix_datacksum(&tcp->th_sum,
1900							      sumd);
1901						sum2 = ntohs(tcp->th_sum);
1902
1903						/*
1904						 * Fix ICMP checksum to
1905						 * compensate TCP checksum
1906						 * adjustment.
1907						 */
1908						CALC_SUMD(sum1, sum2, sumd);
1909						sumd2 += sumd;
1910					} else {
1911						sumd = sum2 - sum1 + 1;
1912						sumd2 += sumd;
1913					}
1914				}
1915			}
1916		} else if (tcp->th_dport != nat->nat_outport) {
1917			/*
1918			 * Fix ICMP checksum to compensate port
1919			 * adjustment.
1920			 */
1921			sum1 = ntohs(nat->nat_outport);
1922			sum2 = ntohs(tcp->th_dport);
1923			tcp->th_dport = nat->nat_outport;
1924
1925			/*
1926			 * Fix udp checksum to compensate port
1927			 * adjustment.   NOTE : the offending IP
1928			 * packet flows the other direction compared
1929			 * to the ICMP message.
1930			 *
1931			 * The UDP checksum is optional, only adjust
1932			 * it if it has been set.
1933			 */
1934			if ((oip->ip_p == IPPROTO_UDP) &&
1935			    (dlen >= 8) && udp->uh_sum) {
1936				sumd = sum1 - sum2;
1937				sumd2 += sumd;
1938
1939				sum1 = ntohs(udp->uh_sum);
1940				fix_datacksum(&udp->uh_sum, sumd);
1941				sum2 = ntohs(udp->uh_sum);
1942
1943				/*
1944				 * Fix ICMP checksum to compensate
1945				 * UDP checksum adjustment.
1946				 */
1947				CALC_SUMD(sum1, sum2, sumd);
1948			}
1949
1950			/*
1951			 * Fix tcp checksum (if present) to compensate
1952			 * port adjustment. NOTE : the offending IP
1953			 * packet flows the other direction compared to
1954			 * the ICMP message.
1955			 */
1956			if (oip->ip_p == IPPROTO_TCP) {
1957				if (dlen >= 18) {
1958					sumd = sum1 - sum2;
1959					sumd2 += sumd;
1960
1961					sum1 = ntohs(tcp->th_sum);
1962					fix_datacksum(&tcp->th_sum, sumd);
1963					sum2 = ntohs(tcp->th_sum);
1964
1965					/*
1966					 * Fix ICMP checksum to compensate
1967					 * UDP checksum adjustment.
1968					 */
1969					CALC_SUMD(sum1, sum2, sumd);
1970				} else {
1971					sumd = sum2 - sum1;
1972					if (nat->nat_dir == NAT_OUTBOUND)
1973						sumd++;
1974				}
1975			}
1976			sumd2 += sumd;
1977		}
1978		if (sumd2) {
1979			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1980			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1981			fix_incksum(fin, &icmp->icmp_cksum, sumd2);
1982		}
1983	}
1984	if (oip->ip_p == IPPROTO_ICMP)
1985		nat->nat_age = fr_defnaticmpage;
1986	return nat;
1987}
1988
1989
1990/*
1991 * NB: these lookups don't lock access to the list, it assume it has already
1992 * been done!
1993 */
1994/*
1995 * Lookup a nat entry based on the mapped destination ip address/port and
1996 * real source address/port.  We use this lookup when receiving a packet,
1997 * we're looking for a table entry, based on the destination address.
1998 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1999 */
2000nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2001fr_info_t *fin;
2002register u_int flags, p;
2003struct in_addr src , mapdst;
2004int rw;
2005{
2006	register u_short sport, dport;
2007	register nat_t *nat;
2008	register int nflags;
2009	register u_32_t dst;
2010	ipnat_t *ipn;
2011	void *ifp;
2012	u_int hv;
2013
2014	if (fin != NULL)
2015		ifp = fin->fin_ifp;
2016	else
2017		ifp = NULL;
2018	dst = mapdst.s_addr;
2019	if (flags & IPN_TCPUDP) {
2020		sport = htons(fin->fin_data[0]);
2021		dport = htons(fin->fin_data[1]);
2022	} else {
2023		sport = 0;
2024		dport = 0;
2025	}
2026
2027	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2028	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2029	nat = nat_table[1][hv];
2030	for (; nat; nat = nat->nat_hnext[1]) {
2031		nflags = nat->nat_flags;
2032		if ((!ifp || ifp == nat->nat_ifp) &&
2033		    nat->nat_oip.s_addr == src.s_addr &&
2034		    nat->nat_outip.s_addr == dst &&
2035		    ((p == 0) || (p == nat->nat_p))) {
2036			switch (p)
2037			{
2038			case IPPROTO_TCP :
2039			case IPPROTO_UDP :
2040				if (nat->nat_oport != sport)
2041					continue;
2042				if (nat->nat_outport != dport)
2043					continue;
2044				break;
2045			default :
2046				break;
2047			}
2048
2049			ipn = nat->nat_ptr;
2050			if ((ipn != NULL) && (nat->nat_aps != NULL))
2051				if (appr_match(fin, nat) != 0)
2052					continue;
2053			return nat;
2054		}
2055	}
2056	if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2057		return NULL;
2058	if (!rw) {
2059		RWLOCK_EXIT(&ipf_nat);
2060	}
2061	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2062	hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2063	if (!rw) {
2064		WRITE_ENTER(&ipf_nat);
2065	}
2066	nat = nat_table[1][hv];
2067	for (; nat; nat = nat->nat_hnext[1]) {
2068		nflags = nat->nat_flags;
2069		if (ifp && ifp != nat->nat_ifp)
2070			continue;
2071		if (!(nflags & FI_WILDP))
2072			continue;
2073		if (nat->nat_oip.s_addr != src.s_addr ||
2074		    nat->nat_outip.s_addr != dst)
2075			continue;
2076		if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2077		    ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2078			nat_tabmove(fin, nat);
2079			break;
2080		}
2081	}
2082	if (!rw) {
2083		MUTEX_DOWNGRADE(&ipf_nat);
2084	}
2085	return nat;
2086}
2087
2088
2089/*
2090 * This function is only called for TCP/UDP NAT table entries where the
2091 * original was placed in the table without hashing on the ports and we now
2092 * want to include hashing on port numbers.
2093 */
2094static void nat_tabmove(fin, nat)
2095fr_info_t *fin;
2096nat_t *nat;
2097{
2098	register u_short sport, dport;
2099	u_int hv, nflags;
2100	nat_t **natp;
2101
2102	nflags = nat->nat_flags;
2103
2104	sport = ntohs(fin->fin_data[0]);
2105	dport = ntohs(fin->fin_data[1]);
2106
2107	/*
2108	 * Remove the NAT entry from the old location
2109	 */
2110	if (nat->nat_hnext[0])
2111		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2112	*nat->nat_phnext[0] = nat->nat_hnext[0];
2113
2114	if (nat->nat_hnext[1])
2115		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2116	*nat->nat_phnext[1] = nat->nat_hnext[1];
2117
2118	/*
2119	 * Add into the NAT table in the new position
2120	 */
2121	hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2122	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2123	natp = &nat_table[0][hv];
2124	if (*natp)
2125		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2126	nat->nat_phnext[0] = natp;
2127	nat->nat_hnext[0] = *natp;
2128	*natp = nat;
2129
2130	hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2131	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2132	natp = &nat_table[1][hv];
2133	if (*natp)
2134		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2135	nat->nat_phnext[1] = natp;
2136	nat->nat_hnext[1] = *natp;
2137	*natp = nat;
2138}
2139
2140
2141/*
2142 * Lookup a nat entry based on the source 'real' ip address/port and
2143 * destination address/port.  We use this lookup when sending a packet out,
2144 * we're looking for a table entry, based on the source address.
2145 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2146 */
2147nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2148fr_info_t *fin;
2149register u_int flags, p;
2150struct in_addr src , dst;
2151int rw;
2152{
2153	register u_short sport, dport;
2154	register nat_t *nat;
2155	register int nflags;
2156	ipnat_t *ipn;
2157	u_32_t srcip;
2158	void *ifp;
2159	u_int hv;
2160
2161	ifp = fin->fin_ifp;
2162	srcip = src.s_addr;
2163	if (flags & IPN_TCPUDP) {
2164		sport = ntohs(fin->fin_data[0]);
2165		dport = ntohs(fin->fin_data[1]);
2166	} else {
2167		sport = 0;
2168		dport = 0;
2169	}
2170
2171	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2172	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2173	nat = nat_table[0][hv];
2174	for (; nat; nat = nat->nat_hnext[0]) {
2175		nflags = nat->nat_flags;
2176
2177		if ((!ifp || ifp == nat->nat_ifp) &&
2178		    nat->nat_inip.s_addr == srcip &&
2179		    nat->nat_oip.s_addr == dst.s_addr &&
2180		    ((p == 0) || (p == nat->nat_p))) {
2181			switch (p)
2182			{
2183			case IPPROTO_TCP :
2184			case IPPROTO_UDP :
2185				if (nat->nat_oport != dport)
2186					continue;
2187				if (nat->nat_inport != sport)
2188					continue;
2189				break;
2190			default :
2191				break;
2192			}
2193
2194			ipn = nat->nat_ptr;
2195			if ((ipn != NULL) && (nat->nat_aps != NULL))
2196				if (appr_match(fin, nat) != 0)
2197					continue;
2198			return nat;
2199		}
2200	}
2201	if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2202		return NULL;
2203	if (!rw) {
2204		RWLOCK_EXIT(&ipf_nat);
2205	}
2206
2207	hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2208	if (!rw) {
2209		WRITE_ENTER(&ipf_nat);
2210	}
2211	nat = nat_table[0][hv];
2212	for (; nat; nat = nat->nat_hnext[0]) {
2213		nflags = nat->nat_flags;
2214		if (ifp && ifp != nat->nat_ifp)
2215			continue;
2216		if (!(nflags & FI_WILDP))
2217			continue;
2218		if ((nat->nat_inip.s_addr != srcip) ||
2219		    (nat->nat_oip.s_addr != dst.s_addr))
2220			continue;
2221		if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2222		    ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2223			nat_tabmove(fin, nat);
2224			break;
2225		}
2226	}
2227	if (!rw) {
2228		MUTEX_DOWNGRADE(&ipf_nat);
2229	}
2230	return nat;
2231}
2232
2233
2234/*
2235 * Lookup the NAT tables to search for a matching redirect
2236 */
2237nat_t *nat_lookupredir(np)
2238register natlookup_t *np;
2239{
2240	nat_t *nat;
2241	fr_info_t fi;
2242
2243	bzero((char *)&fi, sizeof(fi));
2244	fi.fin_data[0] = ntohs(np->nl_inport);
2245	fi.fin_data[1] = ntohs(np->nl_outport);
2246
2247	/*
2248	 * If nl_inip is non null, this is a lookup based on the real
2249	 * ip address. Else, we use the fake.
2250	 */
2251	if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2252				 np->nl_outip, 0))) {
2253		np->nl_realip = nat->nat_outip;
2254		np->nl_realport = nat->nat_outport;
2255	}
2256	return nat;
2257}
2258
2259
2260static int nat_match(fin, np, ip)
2261fr_info_t *fin;
2262ipnat_t *np;
2263ip_t *ip;
2264{
2265	frtuc_t *ft;
2266
2267	if (ip->ip_v != 4)
2268		return 0;
2269
2270	if (np->in_p && fin->fin_p != np->in_p)
2271		return 0;
2272	if (fin->fin_out) {
2273		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2274			return 0;
2275		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2276		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
2277			return 0;
2278		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2279		    ^ ((np->in_flags & IPN_NOTDST) != 0))
2280			return 0;
2281	} else {
2282		if (!(np->in_redir & NAT_REDIRECT))
2283			return 0;
2284		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2285		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
2286			return 0;
2287		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2288		    ^ ((np->in_flags & IPN_NOTDST) != 0))
2289			return 0;
2290	}
2291
2292	ft = &np->in_tuc;
2293	if (!(fin->fin_fl & FI_TCPUDP) ||
2294	    (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2295		if (ft->ftu_scmp || ft->ftu_dcmp)
2296			return 0;
2297		return 1;
2298	}
2299
2300	return fr_tcpudpchk(ft, fin);
2301}
2302
2303
2304/*
2305 * Packets going out on the external interface go through this.
2306 * Here, the source address requires alteration, if anything.
2307 */
2308int ip_natout(ip, fin)
2309ip_t *ip;
2310fr_info_t *fin;
2311{
2312	register ipnat_t *np = NULL;
2313	register u_32_t ipa;
2314	tcphdr_t *tcp = NULL;
2315	u_short sport = 0, dport = 0, *csump = NULL;
2316	int natadd = 1, i, icmpset = 1;
2317	u_int nflags = 0, hv, msk;
2318	struct ifnet *ifp;
2319	frentry_t *fr;
2320	void *sifp;
2321	u_32_t iph;
2322	nat_t *nat;
2323
2324	if (nat_list == NULL || (fr_nat_lock))
2325		return 0;
2326
2327	if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2328	    fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2329		sifp = fin->fin_ifp;
2330		fin->fin_ifp = fr->fr_tif.fd_ifp;
2331	} else
2332		sifp = fin->fin_ifp;
2333	ifp = fin->fin_ifp;
2334
2335	if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2336		if (fin->fin_p == IPPROTO_TCP)
2337			nflags = IPN_TCP;
2338		else if (fin->fin_p == IPPROTO_UDP)
2339			nflags = IPN_UDP;
2340		if ((nflags & IPN_TCPUDP)) {
2341			tcp = (tcphdr_t *)fin->fin_dp;
2342			sport = tcp->th_sport;
2343			dport = tcp->th_dport;
2344		}
2345	}
2346
2347	ipa = fin->fin_saddr;
2348
2349	READ_ENTER(&ipf_nat);
2350
2351	if ((fin->fin_p == IPPROTO_ICMP) &&
2352	    (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2353		icmpset = 1;
2354	else if ((fin->fin_fl & FI_FRAG) &&
2355	    (nat = ipfr_nat_knownfrag(ip, fin)))
2356		natadd = 0;
2357	else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2358				      (u_int)fin->fin_p, fin->fin_src,
2359				      fin->fin_dst, 0))) {
2360		nflags = nat->nat_flags;
2361		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2362			if ((nflags & FI_W_SPORT) &&
2363			    (nat->nat_inport != sport))
2364				nat->nat_inport = sport;
2365			if ((nflags & FI_W_DPORT) &&
2366			    (nat->nat_oport != dport))
2367				nat->nat_oport = dport;
2368
2369			if (nat->nat_outport == 0)
2370				nat->nat_outport = sport;
2371			nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2372			nflags = nat->nat_flags;
2373			nat_stats.ns_wilds--;
2374		}
2375	} else {
2376		RWLOCK_EXIT(&ipf_nat);
2377
2378		msk = 0xffffffff;
2379		i = 32;
2380
2381		WRITE_ENTER(&ipf_nat);
2382		/*
2383		 * If there is no current entry in the nat table for this IP#,
2384		 * create one for it (if there is a matching rule).
2385		 */
2386maskloop:
2387		iph = ipa & htonl(msk);
2388		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2389		for (np = nat_rules[hv]; np; np = np->in_mnext)
2390		{
2391			if (np->in_ifp && (np->in_ifp != ifp))
2392				continue;
2393			if ((np->in_flags & IPN_RF) &&
2394			    !(np->in_flags & nflags))
2395				continue;
2396			if (np->in_flags & IPN_FILTER) {
2397				if (!nat_match(fin, np, ip))
2398					continue;
2399			} else if ((ipa & np->in_inmsk) != np->in_inip)
2400				continue;
2401			if (*np->in_plabel && !appr_ok(ip, tcp, np))
2402				continue;
2403			nat = nat_new(fin, ip, np, NULL,
2404				      (u_int)nflags, NAT_OUTBOUND);
2405			if (nat != NULL) {
2406				np->in_hits++;
2407				break;
2408			}
2409		}
2410		if ((np == NULL) && (i > 0)) {
2411			do {
2412				i--;
2413				msk <<= 1;
2414			} while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2415			if (i >= 0)
2416				goto maskloop;
2417		}
2418		MUTEX_DOWNGRADE(&ipf_nat);
2419	}
2420
2421	/*
2422	 * NOTE: ipf_nat must now only be held as a read lock
2423	 */
2424	if (nat) {
2425		np = nat->nat_ptr;
2426		if (natadd && (fin->fin_fl & FI_FRAG) && np)
2427			ipfr_nat_newfrag(ip, fin, nat);
2428		MUTEX_ENTER(&nat->nat_lock);
2429		if (fin->fin_p != IPPROTO_TCP) {
2430			if (np && np->in_age[1])
2431				nat->nat_age = np->in_age[1];
2432			else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2433				nat->nat_age = fr_defnaticmpage;
2434			else
2435				nat->nat_age = fr_defnatage;
2436		}
2437		nat->nat_bytes += ip->ip_len;
2438		nat->nat_pkts++;
2439		MUTEX_EXIT(&nat->nat_lock);
2440
2441		/*
2442		 * Fix up checksums, not by recalculating them, but
2443		 * simply computing adjustments.
2444		 */
2445		if (nflags == IPN_ICMPERR) {
2446			u_32_t s1, s2, sumd;
2447
2448			s1 = LONG_SUM(ntohl(fin->fin_saddr));
2449			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2450			CALC_SUMD(s1, s2, sumd);
2451			fix_outcksum(fin, &ip->ip_sum, sumd);
2452		}
2453#if (SOLARIS || defined(__sgi)) || !defined(_KERNEL)
2454		else {
2455			if (nat->nat_dir == NAT_OUTBOUND)
2456				fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2457			else
2458				fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2459		}
2460#endif
2461		/*
2462		 * Only change the packet contents, not what is filtered upon.
2463		 */
2464		ip->ip_src = nat->nat_outip;
2465
2466		if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2467
2468			if ((nat->nat_outport != 0) && (tcp != NULL)) {
2469				tcp->th_sport = nat->nat_outport;
2470				fin->fin_data[0] = ntohs(tcp->th_sport);
2471			}
2472
2473			if (fin->fin_p == IPPROTO_TCP) {
2474				csump = &tcp->th_sum;
2475				MUTEX_ENTER(&nat->nat_lock);
2476				fr_tcp_age(&nat->nat_age,
2477					   nat->nat_tcpstate, fin, 1, 0);
2478				if (nat->nat_age < fr_defnaticmpage)
2479					nat->nat_age = fr_defnaticmpage;
2480#ifdef LARGE_NAT
2481				else if ((!np || !np->in_age[1]) &&
2482				         (nat->nat_age > fr_defnatage))
2483					nat->nat_age = fr_defnatage;
2484#endif
2485				/*
2486				 * Increase this because we may have
2487				 * "keep state" following this too and
2488				 * packet storms can occur if this is
2489				 * removed too quickly.
2490				 */
2491				if (nat->nat_age == fr_tcpclosed)
2492					nat->nat_age = fr_tcplastack;
2493
2494                                /*
2495                                 * Do a MSS CLAMPING on a SYN packet,
2496                                 * only deal IPv4 for now.
2497                                 */
2498                                if (nat->nat_mssclamp &&
2499                                    (tcp->th_flags & TH_SYN) != 0)
2500                                        nat_mssclamp(tcp, nat->nat_mssclamp,
2501						     fin, csump);
2502
2503				MUTEX_EXIT(&nat->nat_lock);
2504			} else if (fin->fin_p == IPPROTO_UDP) {
2505				udphdr_t *udp = (udphdr_t *)tcp;
2506
2507				if (udp->uh_sum)
2508					csump = &udp->uh_sum;
2509			}
2510
2511			if (csump) {
2512				if (nat->nat_dir == NAT_OUTBOUND)
2513					fix_outcksum(fin, csump,
2514						     nat->nat_sumd[1]);
2515				else
2516					fix_incksum(fin, csump,
2517						    nat->nat_sumd[1]);
2518			}
2519		}
2520
2521		if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2522		     (tcp != NULL && dport == np->in_dport))) {
2523			i = appr_check(ip, fin, nat);
2524			if (i == 0)
2525				i = 1;
2526			else if (i == -1)
2527				nat->nat_drop[1]++;
2528		} else
2529			i = 1;
2530		ATOMIC_INCL(nat_stats.ns_mapped[1]);
2531		RWLOCK_EXIT(&ipf_nat);	/* READ */
2532		fin->fin_ifp = sifp;
2533		return i;
2534	}
2535	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
2536	fin->fin_ifp = sifp;
2537	return 0;
2538}
2539
2540
2541/*
2542 * Packets coming in from the external interface go through this.
2543 * Here, the destination address requires alteration, if anything.
2544 */
2545int ip_natin(ip, fin)
2546ip_t *ip;
2547fr_info_t *fin;
2548{
2549	register struct in_addr src;
2550	register struct in_addr in;
2551	register ipnat_t *np;
2552	u_short sport = 0, dport = 0, *csump = NULL;
2553	u_int nflags = 0, natadd = 1, hv, msk;
2554	struct ifnet *ifp = fin->fin_ifp;
2555	tcphdr_t *tcp = NULL;
2556	int i, icmpset = 0;
2557	nat_t *nat;
2558	u_32_t iph;
2559
2560	if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2561		return 0;
2562
2563	if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2564		if (fin->fin_p == IPPROTO_TCP)
2565			nflags = IPN_TCP;
2566		else if (fin->fin_p == IPPROTO_UDP)
2567			nflags = IPN_UDP;
2568		if ((nflags & IPN_TCPUDP)) {
2569			tcp = (tcphdr_t *)fin->fin_dp;
2570			sport = tcp->th_sport;
2571			dport = tcp->th_dport;
2572		}
2573	}
2574
2575	in = fin->fin_dst;
2576	/* make sure the source address is to be redirected */
2577	src = fin->fin_src;
2578
2579	READ_ENTER(&ipf_nat);
2580
2581	if ((fin->fin_p == IPPROTO_ICMP) &&
2582	    (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2583		icmpset = 1;
2584	else if ((fin->fin_fl & FI_FRAG) &&
2585		 (nat = ipfr_nat_knownfrag(ip, fin)))
2586		natadd = 0;
2587	else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2588				     (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2589		nflags = nat->nat_flags;
2590		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2591			if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2592				nat->nat_oport = sport;
2593			if ((nat->nat_outport != dport) &&
2594				 (nflags & FI_W_SPORT))
2595				nat->nat_outport = dport;
2596			nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2597			nflags = nat->nat_flags;
2598			nat_stats.ns_wilds--;
2599		}
2600	} else {
2601		RWLOCK_EXIT(&ipf_nat);
2602
2603		msk = 0xffffffff;
2604		i = 32;
2605
2606		WRITE_ENTER(&ipf_nat);
2607		/*
2608		 * If there is no current entry in the nat table for this IP#,
2609		 * create one for it (if there is a matching rule).
2610		 */
2611maskloop:
2612		iph = in.s_addr & htonl(msk);
2613		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2614		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2615			if ((np->in_ifp && (np->in_ifp != ifp)) ||
2616			    (np->in_p && (np->in_p != fin->fin_p)) ||
2617			    (np->in_flags && !(nflags & np->in_flags)))
2618				continue;
2619			if (np->in_flags & IPN_FILTER) {
2620				if (!nat_match(fin, np, ip))
2621					continue;
2622			} else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2623				continue;
2624			if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2625			     ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2626			      (ntohs(dport) >= ntohs(np->in_pmin)))))
2627				if ((nat = nat_new(fin, ip, np, NULL, nflags,
2628						    NAT_INBOUND))) {
2629					np->in_hits++;
2630					break;
2631				}
2632		}
2633
2634		if ((np == NULL) && (i > 0)) {
2635			do {
2636				i--;
2637				msk <<= 1;
2638			} while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2639			if (i >= 0)
2640				goto maskloop;
2641		}
2642		MUTEX_DOWNGRADE(&ipf_nat);
2643	}
2644
2645	/*
2646	 * NOTE: ipf_nat must now only be held as a read lock
2647	 */
2648	if (nat) {
2649		np = nat->nat_ptr;
2650		fin->fin_fr = nat->nat_fr;
2651		if (natadd && (fin->fin_fl & FI_FRAG) && np)
2652			ipfr_nat_newfrag(ip, fin, nat);
2653		if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2654		     (tcp != NULL && sport == np->in_dport))) {
2655			i = appr_check(ip, fin, nat);
2656			if (i == -1) {
2657				nat->nat_drop[0]++;
2658				RWLOCK_EXIT(&ipf_nat);
2659				return i;
2660			}
2661		}
2662
2663		MUTEX_ENTER(&nat->nat_lock);
2664		if (fin->fin_p != IPPROTO_TCP) {
2665			if (np && np->in_age[0])
2666				nat->nat_age = np->in_age[0];
2667			else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2668				nat->nat_age = fr_defnaticmpage;
2669			else
2670				nat->nat_age = fr_defnatage;
2671		}
2672		nat->nat_bytes += ip->ip_len;
2673		nat->nat_pkts++;
2674		MUTEX_EXIT(&nat->nat_lock);
2675
2676		/*
2677		 * Fix up checksums, not by recalculating them, but
2678		 * simply computing adjustments.
2679		 */
2680		if (nat->nat_dir == NAT_OUTBOUND)
2681			fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2682		else
2683			fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2684
2685		ip->ip_dst = nat->nat_inip;
2686		fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2687
2688		if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2689
2690			if ((nat->nat_inport != 0) && (tcp != NULL)) {
2691				tcp->th_dport = nat->nat_inport;
2692				fin->fin_data[1] = ntohs(tcp->th_dport);
2693			}
2694
2695			if (fin->fin_p == IPPROTO_TCP) {
2696				csump = &tcp->th_sum;
2697				MUTEX_ENTER(&nat->nat_lock);
2698				fr_tcp_age(&nat->nat_age,
2699					   nat->nat_tcpstate, fin, 0, 0);
2700				if (nat->nat_age < fr_defnaticmpage)
2701					nat->nat_age = fr_defnaticmpage;
2702#ifdef LARGE_NAT
2703				else if ((!np || !np->in_age[0]) &&
2704					 (nat->nat_age > fr_defnatage))
2705					nat->nat_age = fr_defnatage;
2706#endif
2707				/*
2708				 * Increase this because we may have
2709				 * "keep state" following this too and
2710				 * packet storms can occur if this is
2711				 * removed too quickly.
2712				 */
2713				if (nat->nat_age == fr_tcpclosed)
2714					nat->nat_age = fr_tcplastack;
2715                                /*
2716                                 * Do a MSS CLAMPING on a SYN packet,
2717                                 * only deal IPv4 for now.
2718                                 */
2719                                if (nat->nat_mssclamp &&
2720                                    (tcp->th_flags & TH_SYN) != 0)
2721                                        nat_mssclamp(tcp, nat->nat_mssclamp,
2722						     fin, csump);
2723
2724				MUTEX_EXIT(&nat->nat_lock);
2725			} else if (fin->fin_p == IPPROTO_UDP) {
2726				udphdr_t *udp = (udphdr_t *)tcp;
2727
2728				if (udp->uh_sum)
2729					csump = &udp->uh_sum;
2730			}
2731
2732			if (csump) {
2733				if (nat->nat_dir == NAT_OUTBOUND)
2734					fix_incksum(fin, csump,
2735						    nat->nat_sumd[0]);
2736				else
2737					fix_outcksum(fin, csump,
2738						    nat->nat_sumd[0]);
2739			}
2740		}
2741		ATOMIC_INCL(nat_stats.ns_mapped[0]);
2742		RWLOCK_EXIT(&ipf_nat);			/* READ */
2743		return 1;
2744	}
2745	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
2746	return 0;
2747}
2748
2749
2750/*
2751 * Free all memory used by NAT structures allocated at runtime.
2752 */
2753void ip_natunload()
2754{
2755	WRITE_ENTER(&ipf_nat);
2756	(void) nat_clearlist();
2757	(void) nat_flushtable();
2758	RWLOCK_EXIT(&ipf_nat);
2759
2760	if (nat_table[0] != NULL) {
2761		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2762		nat_table[0] = NULL;
2763	}
2764	if (nat_table[1] != NULL) {
2765		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2766		nat_table[1] = NULL;
2767	}
2768	if (nat_rules != NULL) {
2769		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2770		nat_rules = NULL;
2771	}
2772	if (rdr_rules != NULL) {
2773		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2774		rdr_rules = NULL;
2775	}
2776	if (maptable != NULL) {
2777		KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2778		maptable = NULL;
2779	}
2780}
2781
2782
2783/*
2784 * Slowly expire held state for NAT entries.  Timeouts are set in
2785 * expectation of this being called twice per second.
2786 */
2787void ip_natexpire()
2788{
2789	register struct nat *nat, **natp;
2790#if defined(_KERNEL) && !SOLARIS
2791	int s;
2792#endif
2793
2794	SPL_NET(s);
2795	WRITE_ENTER(&ipf_nat);
2796	for (natp = &nat_instances; (nat = *natp); ) {
2797		nat->nat_age--;
2798		if (nat->nat_age) {
2799			natp = &nat->nat_next;
2800			continue;
2801		}
2802		*natp = nat->nat_next;
2803#ifdef	IPFILTER_LOG
2804		nat_log(nat, NL_EXPIRE);
2805#endif
2806		nat_delete(nat);
2807		nat_stats.ns_expire++;
2808	}
2809	RWLOCK_EXIT(&ipf_nat);
2810	SPL_X(s);
2811}
2812
2813
2814/*
2815 */
2816void ip_natsync(ifp)
2817void *ifp;
2818{
2819	register ipnat_t *n;
2820	register nat_t *nat;
2821	register u_32_t sum1, sum2, sumd;
2822	struct in_addr in;
2823	ipnat_t *np;
2824	void *ifp2;
2825#if defined(_KERNEL) && !SOLARIS
2826	int s;
2827#endif
2828
2829	/*
2830	 * Change IP addresses for NAT sessions for any protocol except TCP
2831	 * since it will break the TCP connection anyway.
2832	 */
2833	SPL_NET(s);
2834	WRITE_ENTER(&ipf_nat);
2835	for (nat = nat_instances; nat; nat = nat->nat_next)
2836		if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2837		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2838		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2839			ifp2 = nat->nat_ifp;
2840			/*
2841			 * Change the map-to address to be the same as the
2842			 * new one.
2843			 */
2844			sum1 = nat->nat_outip.s_addr;
2845			if (fr_ifpaddr(4, ifp2, &in) != -1)
2846				nat->nat_outip = in;
2847			sum2 = nat->nat_outip.s_addr;
2848
2849			if (sum1 == sum2)
2850				continue;
2851			/*
2852			 * Readjust the checksum adjustment to take into
2853			 * account the new IP#.
2854			 */
2855			CALC_SUMD(sum1, sum2, sumd);
2856			/* XXX - dont change for TCP when solaris does
2857			 * hardware checksumming.
2858			 */
2859			sumd += nat->nat_sumd[0];
2860			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2861			nat->nat_sumd[1] = nat->nat_sumd[0];
2862		}
2863
2864	for (n = nat_list; (n != NULL); n = n->in_next)
2865		if (n->in_ifp == ifp) {
2866			n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2867			if (!n->in_ifp)
2868				n->in_ifp = (void *)-1;
2869		}
2870	RWLOCK_EXIT(&ipf_nat);
2871	SPL_X(s);
2872}
2873
2874
2875#ifdef	IPFILTER_LOG
2876void nat_log(nat, type)
2877struct nat *nat;
2878u_int type;
2879{
2880	struct ipnat *np;
2881	struct natlog natl;
2882	void *items[1];
2883	size_t sizes[1];
2884	int rulen, types[1];
2885
2886	natl.nl_inip = nat->nat_inip;
2887	natl.nl_outip = nat->nat_outip;
2888	natl.nl_origip = nat->nat_oip;
2889	natl.nl_bytes = nat->nat_bytes;
2890	natl.nl_pkts = nat->nat_pkts;
2891	natl.nl_origport = nat->nat_oport;
2892	natl.nl_inport = nat->nat_inport;
2893	natl.nl_outport = nat->nat_outport;
2894	natl.nl_p = nat->nat_p;
2895	natl.nl_type = type;
2896	natl.nl_rule = -1;
2897#ifndef LARGE_NAT
2898	if (nat->nat_ptr != NULL) {
2899		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2900			if (np == nat->nat_ptr) {
2901				natl.nl_rule = rulen;
2902				break;
2903			}
2904	}
2905#endif
2906	items[0] = &natl;
2907	sizes[0] = sizeof(natl);
2908	types[0] = 0;
2909
2910	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2911}
2912#endif
2913
2914
2915#if defined(__OpenBSD__)
2916void nat_ifdetach(ifp)
2917void *ifp;
2918{
2919	frsync();
2920	return;
2921}
2922#endif
2923
2924
2925/*
2926 * Check for MSS option and clamp it if necessary.
2927 */
2928static void nat_mssclamp(tcp, maxmss, fin, csump)
2929tcphdr_t *tcp;
2930u_32_t maxmss;
2931fr_info_t *fin;
2932u_short *csump;
2933{
2934	u_char *cp, *ep, opt;
2935	int hlen, advance;
2936	u_32_t mss, sumd;
2937	u_short v;
2938
2939	hlen = tcp->th_off << 2;
2940	if (hlen > sizeof(*tcp)) {
2941		cp = (u_char *)tcp + sizeof(*tcp);
2942		ep = (u_char *)tcp + hlen;
2943
2944		while (cp < ep) {
2945			opt = cp[0];
2946			if (opt == TCPOPT_EOL)
2947				break;
2948			else if (opt == TCPOPT_NOP) {
2949				cp++;
2950				continue;
2951			}
2952
2953			if (&cp[1] >= ep)
2954				break;
2955			advance = cp[1];
2956			if (&cp[advance] > ep)
2957				break;
2958			switch (opt) {
2959			case TCPOPT_MAXSEG:
2960				if (advance != 4)
2961					break;
2962				bcopy(&cp[2], &v, sizeof(v));
2963				mss = ntohs(v);
2964				if (mss > maxmss) {
2965					v = htons(maxmss);
2966					bcopy(&v, &cp[2], sizeof(v));
2967					CALC_SUMD(mss, maxmss, sumd);
2968					fix_outcksum(fin, csump, sumd);
2969				}
2970				break;
2971			default:
2972				/* ignore unknown options */
2973				break;
2974			}
2975
2976			cp += advance;
2977		}
2978	}
2979}
2980