ip_nat.c revision 55929
1/*
2 * Copyright (C) 1995-1998 by Darren Reed.
3 *
4 * Redistribution and use in source and binary forms are permitted
5 * provided that this notice is preserved and due credit is given
6 * to the original author and the contributors.
7 *
8 * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
9 */
10#if !defined(lint)
11static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
12/*static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.2.2.11 1999/12/17 13:05:40 darrenr Exp $";*/
13static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 55929 2000-01-13 19:01:33Z guido $";
14#endif
15
16#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
17#define _KERNEL
18#endif
19
20#include <sys/errno.h>
21#include <sys/types.h>
22#include <sys/param.h>
23#include <sys/time.h>
24#include <sys/file.h>
25#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26    defined(_KERNEL)
27# include "opt_ipfilter_log.h"
28#endif
29#if !defined(_KERNEL) && !defined(KERNEL)
30# include <stdio.h>
31# include <string.h>
32# include <stdlib.h>
33#endif
34#if ((defined(KERNEL) && (__FreeBSD_version >= 220000)) || \
35     (defined(_KERNEL) && (__FreeBSD_version >= 40013)))
36# include <sys/filio.h>
37# include <sys/fcntl.h>
38#else
39# include <sys/ioctl.h>
40#endif
41#include <sys/fcntl.h>
42#include <sys/uio.h>
43#ifndef linux
44# include <sys/protosw.h>
45#endif
46#include <sys/socket.h>
47#if defined(_KERNEL) && !defined(linux)
48# include <sys/systm.h>
49#endif
50#if !defined(__SVR4) && !defined(__svr4__)
51# ifndef linux
52#  include <sys/mbuf.h>
53# endif
54#else
55# include <sys/filio.h>
56# include <sys/byteorder.h>
57# ifdef _KERNEL
58#  include <sys/dditypes.h>
59# endif
60# include <sys/stream.h>
61# include <sys/kmem.h>
62#endif
63#if __FreeBSD_version >= 300000
64# include <sys/queue.h>
65#endif
66#include <net/if.h>
67#if __FreeBSD_version >= 300000
68# include <net/if_var.h>
69# if defined(_KERNEL) && !defined(IPFILTER_LKM)
70#  include "opt_ipfilter.h"
71# endif
72#endif
73#ifdef sun
74# include <net/af.h>
75#endif
76#include <net/route.h>
77#include <netinet/in.h>
78#include <netinet/in_systm.h>
79#include <netinet/ip.h>
80
81#ifdef __sgi
82# ifdef IFF_DRVRLOCK /* IRIX6 */
83#include <sys/hashing.h>
84#include <netinet/in_var.h>
85# endif
86#endif
87
88#ifdef RFC1825
89# include <vpn/md5.h>
90# include <vpn/ipsec.h>
91extern struct ifnet vpnif;
92#endif
93
94#ifndef linux
95# include <netinet/ip_var.h>
96#endif
97#include <netinet/tcp.h>
98#include <netinet/udp.h>
99#include <netinet/ip_icmp.h>
100#include "netinet/ip_compat.h"
101#include <netinet/tcpip.h>
102#include "netinet/ip_fil.h"
103#include "netinet/ip_proxy.h"
104#include "netinet/ip_nat.h"
105#include "netinet/ip_frag.h"
106#include "netinet/ip_state.h"
107#if (__FreeBSD_version >= 300000)
108# include <sys/malloc.h>
109#endif
110#ifndef	MIN
111# define	MIN(a,b)	(((a)<(b))?(a):(b))
112#endif
113#undef	SOCKADDR_IN
114#define	SOCKADDR_IN	struct sockaddr_in
115
116nat_t	**nat_table[2] = { NULL, NULL },
117	*nat_instances = NULL;
118ipnat_t	*nat_list = NULL;
119u_int	ipf_nattable_sz = NAT_TABLE_SZ;
120u_int	ipf_natrules_sz = NAT_SIZE;
121u_int	ipf_rdrrules_sz = RDR_SIZE;
122u_32_t	nat_masks = 0;
123u_32_t	rdr_masks = 0;
124ipnat_t	**nat_rules = NULL;
125ipnat_t	**rdr_rules = NULL;
126
127u_long	fr_defnatage = DEF_NAT_AGE,
128	fr_defnaticmpage = 6;		/* 3 seconds */
129natstat_t nat_stats;
130#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
131extern	kmutex_t	ipf_rw;
132extern	KRWLOCK_T	ipf_nat;
133#endif
134
135static	int	nat_flushtable __P((void));
136static	int	nat_clearlist __P((void));
137static	void	nat_delete __P((struct nat *));
138static	void	nat_delrdr __P((struct ipnat *));
139static	void	nat_delnat __P((struct ipnat *));
140
141
142int nat_init()
143{
144	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
145	if (nat_table[0] != NULL)
146		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
147	else
148		return -1;
149
150	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
151	if (nat_table[1] != NULL)
152		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
153	else
154		return -1;
155
156	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
157	if (nat_rules != NULL)
158		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
159	else
160		return -1;
161
162	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
163	if (rdr_rules != NULL)
164		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
165	else
166		return -1;
167	return 0;
168}
169
170
171void nat_delrdr(n)
172ipnat_t *n;
173{
174	ipnat_t **n1;
175	u_32_t iph;
176	u_int hv;
177
178	iph = n->in_outip & n->in_outmsk;
179	hv = NAT_HASH_FN(iph, ipf_rdrrules_sz);
180	for (n1 = &rdr_rules[hv]; *n1 && (*n1 != n); n1 = &(*n1)->in_rnext)
181		;
182	if (*n1)
183		*n1 = n->in_rnext;
184}
185
186
187static void nat_delnat(n)
188ipnat_t *n;
189{
190	ipnat_t **n1;
191	u_32_t iph;
192	u_int hv;
193
194	iph = n->in_inip & n->in_inmsk;
195	hv = NAT_HASH_FN(iph, ipf_natrules_sz);
196	for (n1 = &nat_rules[hv]; *n1 && (*n1 != n); n1 = &(*n1)->in_mnext)
197		;
198	if (*n1)
199		*n1 = n->in_mnext;
200}
201
202
203void fix_outcksum(sp, n , len)
204u_short *sp;
205u_32_t n;
206int len;
207{
208	register u_short sumshort;
209	register u_32_t sum1;
210
211	if (!n)
212		return;
213#if SOLARIS2 >= 6
214	else if (n & NAT_HW_CKSUM) {
215		*sp = n & 0xffff;
216		return;
217	}
218#endif
219	sum1 = (~ntohs(*sp)) & 0xffff;
220	sum1 += (n);
221	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
222	/* Again */
223	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
224	sumshort = ~(u_short)sum1;
225	*(sp) = htons(sumshort);
226}
227
228
229void fix_incksum(sp, n , len)
230u_short *sp;
231u_32_t n;
232int len;
233{
234	register u_short sumshort;
235	register u_32_t sum1;
236
237	if (!n)
238		return;
239#if SOLARIS2 >= 6
240	else if (n & NAT_HW_CKSUM) {
241		*sp = n & 0xffff;
242		return;
243	}
244#endif
245#ifdef sparc
246	sum1 = (~(*sp)) & 0xffff;
247#else
248	sum1 = (~ntohs(*sp)) & 0xffff;
249#endif
250	sum1 += ~(n) & 0xffff;
251	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
252	/* Again */
253	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
254	sumshort = ~(u_short)sum1;
255	*(sp) = htons(sumshort);
256}
257
258
259/*
260 * How the NAT is organised and works.
261 *
262 * Inside (interface y) NAT       Outside (interface x)
263 * -------------------- -+- -------------------------------------
264 * Packet going          |   out, processsed by ip_natout() for x
265 * ------------>         |   ------------>
266 * src=10.1.1.1          |   src=192.1.1.1
267 *                       |
268 *                       |   in, processed by ip_natin() for x
269 * <------------         |   <------------
270 * dst=10.1.1.1          |   dst=192.1.1.1
271 * -------------------- -+- -------------------------------------
272 * ip_natout() - changes ip_src and if required, sport
273 *             - creates a new mapping, if required.
274 * ip_natin()  - changes ip_dst and if required, dport
275 *
276 * In the NAT table, internal source is recorded as "in" and externally
277 * seen as "out".
278 */
279
280/*
281 * Handle ioctls which manipulate the NAT.
282 */
283int nat_ioctl(data, cmd, mode)
284#if defined(__NetBSD__) || defined(__OpenBSD__)
285u_long cmd;
286#else
287int cmd;
288#endif
289caddr_t data;
290int mode;
291{
292	register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
293	int error = 0, ret, k;
294	ipnat_t natd;
295	u_32_t i, j;
296#if defined(_KERNEL) && !SOLARIS
297	int s;
298#endif
299
300#if (BSD >= 199306) && defined(_KERNEL)
301	if ((securelevel >= 2) && (mode & FWRITE))
302		return EPERM;
303#endif
304
305	nat = NULL;     /* XXX gcc -Wuninitialized */
306	KMALLOC(nt, ipnat_t *);
307	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT))
308		IRCOPY(data, (char *)&natd, sizeof(natd));
309
310	/*
311	 * For add/delete, look to see if the NAT entry is already present
312	 */
313	SPL_NET(s);
314	WRITE_ENTER(&ipf_nat);
315	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
316		nat = &natd;
317		nat->in_flags &= IPN_USERFLAGS;
318		if ((nat->in_redir & NAT_MAPBLK) == 0) {
319			nat->in_inip &= nat->in_inmsk;
320			if ((nat->in_flags & IPN_RANGE) == 0)
321				nat->in_outip &= nat->in_outmsk;
322		}
323		for (np = &nat_list; (n = *np); np = &n->in_next)
324			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
325					IPN_CMPSIZ))
326				break;
327	}
328
329	switch (cmd)
330	{
331#ifdef  IPFILTER_LOG
332	case SIOCIPFFB :
333		if (!(mode & FWRITE))
334			error = EPERM;
335		else
336			*(int *)data = ipflog_clear(IPL_LOGNAT);
337		break;
338#endif
339	case SIOCADNAT :
340		if (!(mode & FWRITE)) {
341			error = EPERM;
342			break;
343		}
344		if (n) {
345			error = EEXIST;
346			break;
347		}
348		if (nt == NULL) {
349			error = ENOMEM;
350			break;
351		}
352		n = nt;
353		nt = NULL;
354		bcopy((char *)nat, (char *)n, sizeof(*n));
355		n->in_ifp = (void *)GETUNIT(n->in_ifname);
356		if (!n->in_ifp)
357			n->in_ifp = (void *)-1;
358		if (n->in_plabel[0] != '\0') {
359			n->in_apr = appr_match(n->in_p, n->in_plabel);
360			if (!n->in_apr) {
361				error = ENOENT;
362				break;
363			}
364		}
365		n->in_next = NULL;
366		*np = n;
367
368		if (n->in_redir & NAT_REDIRECT) {
369			u_int hv;
370
371			k = countbits(n->in_outmsk);
372			if ((k >= 0) && (k != 32))
373				rdr_masks |= 1 << k;
374			j = (n->in_outip & n->in_outmsk);
375			hv = NAT_HASH_FN(j, ipf_rdrrules_sz);
376			np = rdr_rules + hv;
377			while (*np != NULL)
378				np = &(*np)->in_rnext;
379			n->in_rnext = NULL;
380			*np = n;
381		}
382		if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
383			u_int hv;
384
385			k = countbits(n->in_inmsk);
386			if ((k >= 0) && (k != 32))
387				nat_masks |= 1 << k;
388			j = (n->in_inip & n->in_inmsk);
389			hv = NAT_HASH_FN(j, ipf_natrules_sz);
390			np = nat_rules + hv;
391			while (*np != NULL)
392				np = &(*np)->in_mnext;
393			n->in_mnext = NULL;
394			*np = n;
395		}
396
397		n->in_use = 0;
398		if (n->in_redir & NAT_MAPBLK)
399			n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
400		else if (n->in_flags & IPN_AUTOPORTMAP)
401			n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
402		else if (n->in_flags & IPN_RANGE)
403			n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
404		else
405			n->in_space = ~ntohl(n->in_outmsk);
406		/*
407		 * Calculate the number of valid IP addresses in the output
408		 * mapping range.  In all cases, the range is inclusive of
409		 * the start and ending IP addresses.
410		 * If to a CIDR address, lose 2: broadcast + network address
411		 *                               (so subtract 1)
412		 * If to a range, add one.
413		 * If to a single IP address, set to 1.
414		 */
415		if (n->in_space) {
416			if ((n->in_flags & IPN_RANGE) != 0)
417				n->in_space += 1;
418			else
419				n->in_space -= 1;
420		} else
421			n->in_space = 1;
422		if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
423		    ((n->in_flags & IPN_RANGE) == 0))
424			n->in_nip = ntohl(n->in_outip) + 1;
425		else
426			n->in_nip = ntohl(n->in_outip);
427		if (n->in_redir & NAT_MAP) {
428			n->in_pnext = ntohs(n->in_pmin);
429			/*
430			 * Multiply by the number of ports made available.
431			 */
432			if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
433				n->in_space *= (ntohs(n->in_pmax) -
434						ntohs(n->in_pmin) + 1);
435				/*
436				 * Because two different sources can map to
437				 * different destinations but use the same
438				 * local IP#/port #.
439				 * If the result is smaller than in_space, then
440				 * we may have wrapped around 32bits.
441				 */
442				i = n->in_inmsk;
443				if ((i != 0) && (i != 0xffffffff)) {
444					j = n->in_space * (~ntohl(i) + 1);
445					if (j >= n->in_space)
446						n->in_space = j;
447					else
448						n->in_space = 0xffffffff;
449				}
450			}
451			/*
452			 * If no protocol is specified, multiple by 256.
453			 */
454			if ((n->in_flags & IPN_TCPUDP) == 0) {
455					j = n->in_space * 256;
456					if (j >= n->in_space)
457						n->in_space = j;
458					else
459						n->in_space = 0xffffffff;
460			}
461		}
462		/* Otherwise, these fields are preset */
463		n = NULL;
464		nat_stats.ns_rules++;
465		break;
466	case SIOCRMNAT :
467		if (!(mode & FWRITE)) {
468			error = EPERM;
469			n = NULL;
470			break;
471		}
472		if (!n) {
473			error = ESRCH;
474			break;
475		}
476		if (n->in_redir & NAT_REDIRECT)
477			nat_delrdr(n);
478		if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
479			nat_delnat(n);
480		if (nat_list == NULL) {
481			nat_masks = 0;
482			rdr_masks = 0;
483		}
484		*np = n->in_next;
485		if (!n->in_use) {
486			if (n->in_apr)
487				appr_free(n->in_apr);
488			KFREE(n);
489			nat_stats.ns_rules--;
490		} else {
491			n->in_flags |= IPN_DELETE;
492			n->in_next = NULL;
493		}
494		n = NULL;
495		break;
496	case SIOCGNATS :
497		MUTEX_DOWNGRADE(&ipf_nat);
498		nat_stats.ns_table[0] = nat_table[0];
499		nat_stats.ns_table[1] = nat_table[1];
500		nat_stats.ns_list = nat_list;
501		nat_stats.ns_nattab_sz = ipf_nattable_sz;
502		nat_stats.ns_rultab_sz = ipf_natrules_sz;
503		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
504		nat_stats.ns_instances = nat_instances;
505		nat_stats.ns_apslist = ap_sess_list;
506		IWCOPY((char *)&nat_stats, (char *)data, sizeof(nat_stats));
507		break;
508	case SIOCGNATL :
509	    {
510		natlookup_t nl;
511
512		MUTEX_DOWNGRADE(&ipf_nat);
513		IRCOPY((char *)data, (char *)&nl, sizeof(nl));
514
515		if (nat_lookupredir(&nl)) {
516			IWCOPY((char *)&nl, (char *)data, sizeof(nl));
517		} else
518			error = ESRCH;
519		break;
520	    }
521	case SIOCFLNAT :
522		if (!(mode & FWRITE)) {
523			error = EPERM;
524			break;
525		}
526		ret = nat_flushtable();
527		MUTEX_DOWNGRADE(&ipf_nat);
528		IWCOPY((caddr_t)&ret, data, sizeof(ret));
529		break;
530	case SIOCCNATL :
531		if (!(mode & FWRITE)) {
532			error = EPERM;
533			break;
534		}
535		ret = nat_clearlist();
536		MUTEX_DOWNGRADE(&ipf_nat);
537		IWCOPY((caddr_t)&ret, data, sizeof(ret));
538		break;
539	case FIONREAD :
540#ifdef	IPFILTER_LOG
541		MUTEX_DOWNGRADE(&ipf_nat);
542		IWCOPY((caddr_t)&iplused[IPL_LOGNAT], (caddr_t)data,
543		       sizeof(iplused[IPL_LOGNAT]));
544#endif
545		break;
546	default :
547		error = EINVAL;
548		break;
549	}
550	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
551	SPL_X(s);
552	if (nt)
553		KFREE(nt);
554	return error;
555}
556
557
558/*
559 * Delete a nat entry from the various lists and table.
560 */
561static void nat_delete(natd)
562struct nat *natd;
563{
564	register struct nat **natp, *nat;
565	struct ipnat *ipn;
566
567	for (natp = natd->nat_hstart[0]; (nat = *natp);
568	     natp = &nat->nat_hnext[0])
569		if (nat == natd) {
570			*natp = nat->nat_hnext[0];
571			break;
572		}
573
574	for (natp = natd->nat_hstart[1]; (nat = *natp);
575	     natp = &nat->nat_hnext[1])
576		if (nat == natd) {
577			*natp = nat->nat_hnext[1];
578			break;
579		}
580
581	if (natd->nat_fr != NULL) {
582		ATOMIC_DEC(natd->nat_fr->fr_ref);
583	}
584	/*
585	 * If there is an active reference from the nat entry to its parent
586	 * rule, decrement the rule's reference count and free it too if no
587	 * longer being used.
588	 */
589	ipn = natd->nat_ptr;
590	if (ipn != NULL) {
591		ipn->in_space++;
592		ipn->in_use--;
593		if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
594			if (ipn->in_apr)
595				appr_free(ipn->in_apr);
596			KFREE(ipn);
597			nat_stats.ns_rules--;
598		}
599	}
600
601	/*
602	 * If there's a fragment table entry too for this nat entry, then
603	 * dereference that as well.
604	 */
605	ipfr_forget((void *)natd);
606	aps_free(natd->nat_aps);
607	nat_stats.ns_inuse--;
608	KFREE(natd);
609}
610
611
612/*
613 * nat_flushtable - clear the NAT table of all mapping entries.
614 */
615static int nat_flushtable()
616{
617	register nat_t *nat, **natp;
618	register int j = 0;
619
620	/*
621	 * ALL NAT mappings deleted, so lets just make the deletions
622	 * quicker.
623	 */
624	if (nat_table[0] != NULL)
625		bzero((char *)nat_table[0],
626		      sizeof(nat_table[0]) * ipf_nattable_sz);
627	if (nat_table[1] != NULL)
628		bzero((char *)nat_table[1],
629		      sizeof(nat_table[1]) * ipf_nattable_sz);
630
631	for (natp = &nat_instances; (nat = *natp); ) {
632		*natp = nat->nat_next;
633		nat_delete(nat);
634		j++;
635	}
636	nat_stats.ns_inuse = 0;
637	return j;
638}
639
640
641/*
642 * nat_clearlist - delete all rules in the active NAT mapping list.
643 */
644static int nat_clearlist()
645{
646	register ipnat_t *n, **np = &nat_list;
647	int i = 0;
648
649	if (nat_rules != NULL)
650		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
651	if (rdr_rules != NULL)
652		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
653
654	while ((n = *np)) {
655		*np = n->in_next;
656		if (!n->in_use) {
657			if (n->in_apr)
658				appr_free(n->in_apr);
659			KFREE(n);
660			nat_stats.ns_rules--;
661		} else {
662			n->in_flags |= IPN_DELETE;
663			n->in_next = NULL;
664		}
665		i++;
666	}
667	nat_masks = 0;
668	rdr_masks = 0;
669	return i;
670}
671
672
673/*
674 * Create a new NAT table entry.
675 * NOTE: assumes write lock on ipf_nat has been obtained already.
676 */
677nat_t *nat_new(np, ip, fin, flags, direction)
678ipnat_t *np;
679ip_t *ip;
680fr_info_t *fin;
681u_int flags;
682int direction;
683{
684	register u_32_t sum1, sum2, sumd, l;
685	u_short port = 0, sport = 0, dport = 0, nport = 0;
686	nat_t *nat, **natp, *natl = NULL;
687	struct in_addr in, inb;
688	tcphdr_t *tcp = NULL;
689	u_short nflags;
690	u_int hv;
691#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
692	qif_t *qf = fin->fin_qif;
693#endif
694
695	nflags = flags & np->in_flags;
696	if (flags & IPN_TCPUDP) {
697		tcp = (tcphdr_t *)fin->fin_dp;
698		sport = tcp->th_sport;
699		dport = tcp->th_dport;
700	}
701
702	/* Give me a new nat */
703	KMALLOC(nat, nat_t *);
704	if (nat == NULL)
705		return NULL;
706
707	bzero((char *)nat, sizeof(*nat));
708	nat->nat_flags = flags;
709	/*
710	 * Search the current table for a match.
711	 */
712	if (direction == NAT_OUTBOUND) {
713		/*
714		 * Values at which the search for a free resouce starts.
715		 */
716		u_32_t st_ip;
717		u_short st_port;
718
719		/*
720		 * If it's an outbound packet which doesn't match any existing
721		 * record, then create a new port
722		 */
723		l = 0;
724		st_ip = np->in_nip;
725		st_port = np->in_pnext;
726
727		do {
728			port = 0;
729			in.s_addr = np->in_nip;
730			if (l == 0) {
731				natl = nat_maplookup(fin->fin_ifp, flags,
732						     ip->ip_src, ip->ip_dst);
733				if (natl != NULL) {
734					in = natl->nat_outip;
735#ifndef sparc
736					in.s_addr = ntohl(in.s_addr);
737#endif
738				}
739			}
740
741			if ((np->in_outmsk == 0xffffffff) &&
742			    (np->in_pnext == 0)) {
743				if (l > 0) {
744					KFREE(nat);
745					return NULL;
746				}
747			}
748
749			if (np->in_redir & NAT_MAPBLK) {
750				if ((l >= np->in_ppip) || ((l > 0) &&
751				     !(flags & IPN_TCPUDP))) {
752					KFREE(nat);
753					return NULL;
754				}
755				/*
756				 * map-block - Calculate destination address.
757				 */
758				in.s_addr = ntohl(ip->ip_src.s_addr);
759				in.s_addr &= ntohl(~np->in_inmsk);
760				inb.s_addr = in.s_addr;
761				in.s_addr /= np->in_ippip;
762				in.s_addr &= ntohl(~np->in_outmsk);
763				in.s_addr += ntohl(np->in_outip);
764				/*
765				 * Calculate destination port.
766				 */
767				if ((flags & IPN_TCPUDP) &&
768				    (np->in_ppip != 0)) {
769					port = ntohs(sport) + l;
770					port %= np->in_ppip;
771					port += np->in_ppip *
772						(inb.s_addr % np->in_ippip);
773					port += MAPBLK_MINPORT;
774					port = htons(port);
775				}
776			} else if (!in.s_addr &&
777				   (np->in_outmsk == 0xffffffff)) {
778				/*
779				 * 0/32 - use the interface's IP address.
780				 */
781				if ((l > 0) ||
782				    fr_ifpaddr(fin->fin_ifp, &in) == -1) {
783					KFREE(nat);
784					return NULL;
785				}
786				in.s_addr = ntohl(in.s_addr);
787			} else if (!in.s_addr && !np->in_outmsk) {
788				/*
789				 * 0/0 - use the original source address/port.
790				 */
791				if (l > 0) {
792					KFREE(nat);
793					return NULL;
794				}
795				in.s_addr = ntohl(ip->ip_src.s_addr);
796			} else if ((np->in_outmsk != 0xffffffff) &&
797				   (np->in_pnext == 0) &&
798				   ((l > 0) || (natl == NULL)))
799				np->in_nip++;
800			natl = NULL;
801
802			if ((nflags & IPN_TCPUDP) &&
803			    ((np->in_redir & NAT_MAPBLK) == 0) &&
804			    (np->in_flags & IPN_AUTOPORTMAP)) {
805				if ((l > 0) && (l % np->in_ppip == 0)) {
806					if (l > np->in_space) {
807						KFREE(nat);
808						return NULL;
809					} else if ((l > np->in_ppip) &&
810						   np->in_outmsk != 0xffffffff)
811						np->in_nip++;
812				}
813				if (np->in_ppip != 0) {
814					port = ntohs(sport);
815					port += (l % np->in_ppip);
816					port %= np->in_ppip;
817					port += np->in_ppip *
818						(ntohl(ip->ip_src.s_addr) %
819						 np->in_ippip);
820					port += MAPBLK_MINPORT;
821					port = htons(port);
822				}
823			} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
824				   (nflags & IPN_TCPUDP) &&
825				   (np->in_pnext != 0)) {
826				port = htons(np->in_pnext++);
827				if (np->in_pnext > ntohs(np->in_pmax)) {
828					np->in_pnext = ntohs(np->in_pmin);
829					if (np->in_outmsk != 0xffffffff)
830						np->in_nip++;
831				}
832			}
833
834			if (np->in_flags & IPN_RANGE) {
835				if (np->in_nip >= ntohl(np->in_outmsk))
836					np->in_nip = ntohl(np->in_outip);
837			} else {
838				if ((np->in_outmsk != 0xffffffff) &&
839				    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
840				    ntohl(np->in_outip))
841					np->in_nip = ntohl(np->in_outip) + 1;
842			}
843
844			if (!port && (flags & IPN_TCPUDP))
845				port = sport;
846
847			/*
848			 * Here we do a lookup of the connection as seen from
849			 * the outside.  If an IP# pair already exists, try
850			 * again.  So if you have A->B becomes C->B, you can
851			 * also have D->E become C->E but not D->B causing
852			 * another C->B.  Also take protocol and ports into
853			 * account when determining whether a pre-existing
854			 * NAT setup will cause an external conflict where
855			 * this is appropriate.
856			 */
857			inb.s_addr = htonl(in.s_addr);
858			natl = nat_inlookup(fin->fin_ifp, flags & ~FI_WILD,
859					    (u_int)ip->ip_p, ip->ip_dst, inb,
860					    (port << 16) | dport);
861
862			/*
863			 * Has the search wrapped around and come back to the
864			 * start ?
865			 */
866			if ((natl != NULL) &&
867			    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
868			    (np->in_nip != 0) && (st_ip == np->in_nip)) {
869				KFREE(nat);
870				return NULL;
871			}
872			l++;
873		} while (natl != NULL);
874
875		if (np->in_space > 0)
876			np->in_space--;
877
878		/* Setup the NAT table */
879		nat->nat_inip = ip->ip_src;
880		nat->nat_outip.s_addr = htonl(in.s_addr);
881		nat->nat_oip = ip->ip_dst;
882
883		sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)) + ntohs(sport);
884		sum2 = LONG_SUM(in.s_addr) + ntohs(port);
885
886		if (flags & IPN_TCPUDP) {
887			nat->nat_inport = sport;
888			nat->nat_outport = port;	/* sport */
889			nat->nat_oport = dport;
890		}
891	} else {
892		/*
893		 * Otherwise, it's an inbound packet. Most likely, we don't
894		 * want to rewrite source ports and source addresses. Instead,
895		 * we want to rewrite to a fixed internal address and fixed
896		 * internal port.
897		 */
898		in.s_addr = ntohl(np->in_inip);
899		if (!(nport = np->in_pnext))
900			nport = dport;
901
902		/*
903		 * When the redirect-to address is set to 0.0.0.0, just
904		 * assume a blank `forwarding' of the packet.  We don't
905		 * setup any translation for this either.
906		 */
907		if ((in.s_addr == 0) && (nport == dport)) {
908			KFREE(nat);
909			return NULL;
910		}
911
912		nat->nat_inip.s_addr = htonl(in.s_addr);
913		nat->nat_outip = ip->ip_dst;
914		nat->nat_oip = ip->ip_src;
915
916		sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr)) + ntohs(dport);
917		sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
918
919		if (flags & IPN_TCPUDP) {
920			nat->nat_inport = nport;
921			nat->nat_outport = dport;
922			nat->nat_oport = sport;
923		}
924	}
925
926	CALC_SUMD(sum1, sum2, sumd);
927	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
928#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
929	if ((flags == IPN_TCP) && dohwcksum &&
930	    (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
931		if (direction == NAT_OUTBOUND)
932			sum1 = LONG_SUM(ntohl(in.s_addr));
933		else
934			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
935		sum1 += LONG_SUM(ntohl(ip->ip_dst.s_addr));
936		sum1 += 30;
937		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
938		nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
939	} else
940#endif
941		nat->nat_sumd[1] = nat->nat_sumd[0];
942
943	if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
944		if (direction == NAT_OUTBOUND)
945			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
946		else
947			sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr));
948
949		sum2 = LONG_SUM(in.s_addr);
950
951		CALC_SUMD(sum1, sum2, sumd);
952		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
953	} else
954		nat->nat_ipsumd = nat->nat_sumd[0];
955
956	in.s_addr = htonl(in.s_addr);
957	nat->nat_next = nat_instances;
958	nat_instances = nat;
959	hv = NAT_HASH_FN(nat->nat_inip.s_addr, ipf_nattable_sz);
960	natp = &nat_table[0][hv];
961	nat->nat_hstart[0] = natp;
962	nat->nat_hnext[0] = *natp;
963	*natp = nat;
964	hv = NAT_HASH_FN(nat->nat_outip.s_addr, ipf_nattable_sz);
965	natp = &nat_table[1][hv];
966	nat->nat_hstart[1] = natp;
967	nat->nat_hnext[1] = *natp;
968	*natp = nat;
969	nat->nat_dir = direction;
970	nat->nat_ifp = fin->fin_ifp;
971	nat->nat_ptr = np;
972	nat->nat_p = ip->ip_p;
973	nat->nat_bytes = 0;
974	nat->nat_pkts = 0;
975	nat->nat_age = fr_defnatage;
976	nat->nat_fr = fin->fin_fr;
977	if (nat->nat_fr != NULL) {
978		ATOMIC_INC(nat->nat_fr->fr_ref);
979	}
980	if (direction == NAT_OUTBOUND) {
981		if (flags & IPN_TCPUDP)
982			tcp->th_sport = port;
983	} else {
984		if (flags & IPN_TCPUDP)
985			tcp->th_dport = nport;
986	}
987	nat_stats.ns_added++;
988	nat_stats.ns_inuse++;
989	np->in_use++;
990	return nat;
991}
992
993
994nat_t *nat_icmpinlookup(ip, fin)
995ip_t *ip;
996fr_info_t *fin;
997{
998	icmphdr_t *icmp;
999	tcphdr_t *tcp = NULL;
1000	ip_t *oip;
1001	int flags = 0, type;
1002
1003	icmp = (icmphdr_t *)fin->fin_dp;
1004	/*
1005	 * Does it at least have the return (basic) IP header ?
1006	 * Only a basic IP header (no options) should be with an ICMP error
1007	 * header.
1008	 */
1009	if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1010		return NULL;
1011	type = icmp->icmp_type;
1012	/*
1013	 * If it's not an error type, then return.
1014	 */
1015	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1016	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1017	    (type != ICMP_PARAMPROB))
1018		return NULL;
1019
1020	oip = (ip_t *)((char *)fin->fin_dp + 8);
1021	if (ip->ip_len < ICMPERR_MAXPKTLEN + ((oip->ip_hl - 5) << 2))
1022		return NULL;
1023	if (oip->ip_p == IPPROTO_TCP)
1024		flags = IPN_TCP;
1025	else if (oip->ip_p == IPPROTO_UDP)
1026		flags = IPN_UDP;
1027	if (flags & IPN_TCPUDP) {
1028		tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1029		return nat_inlookup(fin->fin_ifp, flags, (u_int)oip->ip_p,
1030				    oip->ip_dst, oip->ip_src,
1031				    (tcp->th_sport << 16) | tcp->th_dport);
1032	}
1033	return nat_inlookup(fin->fin_ifp, 0, (u_int)oip->ip_p, oip->ip_dst,
1034			    oip->ip_src, 0);
1035}
1036
1037
1038/*
1039 * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1040 * packet gets correctly recognised.
1041 */
1042nat_t *nat_icmpin(ip, fin, nflags)
1043ip_t *ip;
1044fr_info_t *fin;
1045u_int *nflags;
1046{
1047	u_32_t sum1, sum2, sumd;
1048	struct in_addr in;
1049	icmphdr_t *icmp;
1050	nat_t *nat;
1051	ip_t *oip;
1052	int flags = 0;
1053
1054	if (!(nat = nat_icmpinlookup(ip, fin)))
1055		return NULL;
1056	*nflags = IPN_ICMPERR;
1057	icmp = (icmphdr_t *)fin->fin_dp;
1058	oip = (ip_t *)&icmp->icmp_ip;
1059	if (oip->ip_p == IPPROTO_TCP)
1060		flags = IPN_TCP;
1061	else if (oip->ip_p == IPPROTO_UDP)
1062		flags = IPN_UDP;
1063	/*
1064	 * Need to adjust ICMP header to include the real IP#'s and
1065	 * port #'s.  Only apply a checksum change relative to the
1066	 * IP address change is it will be modified again in ip_natout
1067	 * for both address and port.  Two checksum changes are
1068	 * necessary for the two header address changes.  Be careful
1069	 * to only modify the checksum once for the port # and twice
1070	 * for the IP#.
1071	 */
1072	if (nat->nat_dir == NAT_OUTBOUND) {
1073		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1074		in = nat->nat_inip;
1075		oip->ip_src = in;
1076	} else {
1077		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1078		in = nat->nat_outip;
1079		oip->ip_dst = in;
1080	}
1081
1082	sum2 = LONG_SUM(ntohl(in.s_addr));
1083
1084	CALC_SUMD(sum1, sum2, sumd);
1085
1086	if (nat->nat_dir == NAT_OUTBOUND) {
1087		fix_incksum(&oip->ip_sum, sumd, 0);
1088
1089		sumd += (sumd & 0xffff);
1090		while (sumd > 0xffff)
1091			sumd = (sumd & 0xffff) + (sumd >> 16);
1092		fix_outcksum(&icmp->icmp_cksum, sumd, 0);
1093	} else {
1094		fix_outcksum(&oip->ip_sum, sumd, 0);
1095
1096		sumd += (sumd & 0xffff);
1097		while (sumd > 0xffff)
1098			sumd = (sumd & 0xffff) + (sumd >> 16);
1099		fix_incksum(&icmp->icmp_cksum, sumd, 0);
1100	}
1101
1102
1103	if ((flags & IPN_TCPUDP) != 0) {
1104		tcphdr_t *tcp;
1105
1106		/* XXX - what if this is bogus hl and we go off the end ? */
1107		tcp = (tcphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1108
1109		if (nat->nat_dir == NAT_OUTBOUND) {
1110			if (tcp->th_sport != nat->nat_inport) {
1111				sum1 = ntohs(tcp->th_sport);
1112				sum2 = ntohs(nat->nat_inport);
1113				CALC_SUMD(sum1, sum2, sumd);
1114				tcp->th_sport = nat->nat_inport;
1115				fix_outcksum(&icmp->icmp_cksum, sumd, 0);
1116			}
1117		} else {
1118			if (tcp->th_dport != nat->nat_outport) {
1119				sum1 = ntohs(tcp->th_dport);
1120				sum2 = ntohs(nat->nat_outport);
1121				CALC_SUMD(sum1, sum2, sumd);
1122				tcp->th_dport = nat->nat_outport;
1123				fix_incksum(&icmp->icmp_cksum, sumd, 0);
1124			}
1125		}
1126	}
1127	nat->nat_age = fr_defnaticmpage;
1128	return nat;
1129}
1130
1131
1132/*
1133 * NB: these lookups don't lock access to the list, it assume it has already
1134 * been done!
1135 */
1136/*
1137 * Lookup a nat entry based on the mapped destination ip address/port and
1138 * real source address/port.  We use this lookup when receiving a packet,
1139 * we're looking for a table entry, based on the destination address.
1140 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1141 */
1142nat_t *nat_inlookup(ifp, flags, p, src, mapdst, ports)
1143void *ifp;
1144register u_int flags, p;
1145struct in_addr src , mapdst;
1146u_32_t ports;
1147{
1148	register u_short sport, mapdport;
1149	register nat_t *nat;
1150	register int nflags;
1151	u_int hv;
1152
1153	mapdport = ports >> 16;
1154	sport = ports & 0xffff;
1155	flags &= IPN_TCPUDP;
1156
1157	hv = NAT_HASH_FN(mapdst.s_addr, ipf_nattable_sz);
1158	nat = nat_table[1][hv];
1159	for (; nat; nat = nat->nat_hnext[1]) {
1160		nflags = nat->nat_flags;
1161		if ((!ifp || ifp == nat->nat_ifp) &&
1162		    nat->nat_oip.s_addr == src.s_addr &&
1163		    nat->nat_outip.s_addr == mapdst.s_addr &&
1164		    (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP)))
1165		     || (p == nat->nat_p)) && (!flags ||
1166		     (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
1167		      ((nat->nat_outport == mapdport) ||
1168		       (nflags & FI_W_SPORT)))))
1169			return nat;
1170	}
1171	return NULL;
1172}
1173
1174
1175/*
1176 * Lookup a nat entry based on the source 'real' ip address/port and
1177 * destination address/port.  We use this lookup when sending a packet out,
1178 * we're looking for a table entry, based on the source address.
1179 * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
1180 */
1181nat_t *nat_outlookup(ifp, flags, p, src, dst, ports)
1182void *ifp;
1183register u_int flags, p;
1184struct in_addr src , dst;
1185u_32_t ports;
1186{
1187	register u_short sport, dport;
1188	register nat_t *nat;
1189	register int nflags;
1190	u_int hv;
1191
1192	sport = ports & 0xffff;
1193	dport = ports >> 16;
1194	flags &= IPN_TCPUDP;
1195
1196	hv = NAT_HASH_FN(src.s_addr, ipf_nattable_sz);
1197	nat = nat_table[0][hv];
1198	for (; nat; nat = nat->nat_hnext[0]) {
1199		nflags = nat->nat_flags;
1200
1201		if ((!ifp || ifp == nat->nat_ifp) &&
1202		    nat->nat_inip.s_addr == src.s_addr &&
1203		    nat->nat_oip.s_addr == dst.s_addr &&
1204		    (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP)))
1205		     || (p == nat->nat_p)) && (!flags ||
1206		     ((nat->nat_inport == sport || nflags & FI_W_SPORT) &&
1207		      (nat->nat_oport == dport || nflags & FI_W_DPORT))))
1208			return nat;
1209	}
1210	return NULL;
1211}
1212
1213
1214/*
1215 * check if an ip address has already been allocated for a given mapping that
1216 * is not doing port based translation.
1217 */
1218nat_t *nat_maplookup(ifp, flags, src, dst)
1219void *ifp;
1220register u_int flags;
1221struct in_addr src , dst;
1222{
1223	register nat_t *nat;
1224	register int oflags;
1225	u_int hv;
1226
1227	hv = NAT_HASH_FN(src.s_addr, ipf_nattable_sz);
1228	nat = nat_table[0][hv];
1229	for (; nat; nat = nat->nat_hnext[0]) {
1230		oflags = (flags & IPN_TCPUDP) & nat->nat_ptr->in_flags;
1231		if (oflags != 0)
1232			continue;
1233
1234		if ((!ifp || ifp == nat->nat_ifp) &&
1235		    nat->nat_inip.s_addr == src.s_addr &&
1236		    nat->nat_oip.s_addr == dst.s_addr)
1237			return nat;
1238	}
1239	return NULL;
1240}
1241
1242
1243/*
1244 * Lookup the NAT tables to search for a matching redirect
1245 */
1246nat_t *nat_lookupredir(np)
1247register natlookup_t *np;
1248{
1249	u_32_t ports;
1250	nat_t *nat;
1251
1252	ports = (np->nl_outport << 16) | np->nl_inport;
1253	/*
1254	 * If nl_inip is non null, this is a lookup based on the real
1255	 * ip address. Else, we use the fake.
1256	 */
1257	if ((nat = nat_outlookup(NULL, np->nl_flags, 0, np->nl_inip,
1258				 np->nl_outip, ports))) {
1259		np->nl_realip = nat->nat_outip;
1260		np->nl_realport = nat->nat_outport;
1261	}
1262	return nat;
1263}
1264
1265
1266/*
1267 * Packets going out on the external interface go through this.
1268 * Here, the source address requires alteration, if anything.
1269 */
1270int ip_natout(ip, fin)
1271ip_t *ip;
1272fr_info_t *fin;
1273{
1274	register ipnat_t *np = NULL;
1275	register u_32_t ipa;
1276	tcphdr_t *tcp = NULL;
1277	u_short nflags = 0, sport = 0, dport = 0, *csump = NULL;
1278	struct ifnet *ifp;
1279	int natadd = 1;
1280	frentry_t *fr;
1281	u_int hv, msk;
1282	u_32_t iph;
1283	nat_t *nat;
1284	int i;
1285
1286	if (nat_list == NULL)
1287		return 0;
1288
1289	if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
1290	    fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1)
1291		ifp = fr->fr_tif.fd_ifp;
1292	else
1293		ifp = fin->fin_ifp;
1294
1295	if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) {
1296		if (ip->ip_p == IPPROTO_TCP)
1297			nflags = IPN_TCP;
1298		else if (ip->ip_p == IPPROTO_UDP)
1299			nflags = IPN_UDP;
1300		if ((nflags & IPN_TCPUDP)) {
1301			tcp = (tcphdr_t *)fin->fin_dp;
1302			sport = tcp->th_sport;
1303			dport = tcp->th_dport;
1304		}
1305	}
1306
1307	ipa = ip->ip_src.s_addr;
1308
1309	READ_ENTER(&ipf_nat);
1310	if ((ip->ip_off & (IP_OFFMASK|IP_MF)) &&
1311	    (nat = ipfr_nat_knownfrag(ip, fin)))
1312		natadd = 0;
1313	else if ((nat = nat_outlookup(ifp, nflags, (u_int)ip->ip_p, ip->ip_src,
1314				      ip->ip_dst, (dport << 16) | sport))) {
1315		nflags = nat->nat_flags;
1316		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
1317			if ((nflags & FI_W_SPORT) &&
1318			    (nat->nat_inport != sport))
1319				nat->nat_inport = sport;
1320			else if ((nflags & FI_W_DPORT) &&
1321				 (nat->nat_oport != dport))
1322				nat->nat_oport = dport;
1323			if (nat->nat_outport == 0)
1324				nat->nat_outport = sport;
1325			nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
1326			nflags = nat->nat_flags;
1327		}
1328	} else {
1329		RWLOCK_EXIT(&ipf_nat);
1330		WRITE_ENTER(&ipf_nat);
1331		/*
1332		 * If there is no current entry in the nat table for this IP#,
1333		 * create one for it (if there is a matching rule).
1334		 */
1335		msk = 0xffffffff;
1336		i = 32;
1337maskloop:
1338		iph = ipa & htonl(msk);
1339		hv = NAT_HASH_FN(iph, ipf_natrules_sz);
1340		for (np = nat_rules[hv]; np; np = np->in_mnext)
1341		{
1342			if ((np->in_ifp == ifp) && np->in_space &&
1343			    (!(np->in_flags & IPN_RF) ||
1344			     (np->in_flags & nflags)) &&
1345			    ((ipa & np->in_inmsk) == np->in_inip) &&
1346			    ((np->in_redir & (NAT_MAP|NAT_MAPBLK)) ||
1347			     (np->in_pnext == sport))) {
1348				if (*np->in_plabel && !appr_ok(ip, tcp, np))
1349					continue;
1350				/*
1351				 * If it's a redirection, then we don't want to
1352				 * create new outgoing port stuff.
1353				 * Redirections are only for incoming
1354				 * connections.
1355				 */
1356				if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
1357					continue;
1358				if ((nat = nat_new(np, ip, fin, (u_int)nflags,
1359						    NAT_OUTBOUND))) {
1360					np->in_hits++;
1361#ifdef	IPFILTER_LOG
1362					nat_log(nat, (u_int)np->in_redir);
1363#endif
1364					break;
1365				}
1366			}
1367		}
1368		if ((np == NULL) && (i > 0)) {
1369			do {
1370				i--;
1371				msk <<= 1;
1372			} while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
1373			if (i >= 0)
1374				goto maskloop;
1375		}
1376		MUTEX_DOWNGRADE(&ipf_nat);
1377	}
1378
1379	if (nat) {
1380		np = nat->nat_ptr;
1381		if (natadd && fin->fin_fi.fi_fl & FI_FRAG)
1382			ipfr_nat_newfrag(ip, fin, 0, nat);
1383		ip->ip_src = nat->nat_outip;
1384		MUTEX_ENTER(&ipf_rw);
1385		nat->nat_age = fr_defnatage;
1386		nat->nat_bytes += ip->ip_len;
1387		nat->nat_pkts++;
1388		MUTEX_EXIT(&ipf_rw);
1389
1390		/*
1391		 * Fix up checksums, not by recalculating them, but
1392		 * simply computing adjustments.
1393		 */
1394#if SOLARIS || defined(__sgi)
1395		if (nat->nat_dir == NAT_OUTBOUND)
1396			fix_outcksum(&ip->ip_sum, nat->nat_ipsumd, 0);
1397		else
1398			fix_incksum(&ip->ip_sum, nat->nat_ipsumd, 0);
1399#endif
1400
1401		if (!(ip->ip_off & IP_OFFMASK) &&
1402		    !(fin->fin_fi.fi_fl & FI_SHORT)) {
1403
1404			if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
1405				tcp->th_sport = nat->nat_outport;
1406				fin->fin_data[0] = ntohs(tcp->th_sport);
1407			}
1408
1409			if (ip->ip_p == IPPROTO_TCP) {
1410				csump = &tcp->th_sum;
1411				MUTEX_ENTER(&ipf_rw);
1412				fr_tcp_age(&nat->nat_age,
1413					   nat->nat_tcpstate, ip, fin, 1);
1414				if (nat->nat_age < fr_defnaticmpage)
1415					nat->nat_age = fr_defnaticmpage;
1416#ifdef LARGE_NAT
1417				else if (nat->nat_age > DEF_NAT_AGE)
1418					nat->nat_age = DEF_NAT_AGE;
1419#endif
1420				/*
1421				 * Increase this because we may have
1422				 * "keep state" following this too and
1423				 * packet storms can occur if this is
1424				 * removed too quickly.
1425				 */
1426				if (nat->nat_age == fr_tcpclosed)
1427					nat->nat_age = fr_tcplastack;
1428				MUTEX_EXIT(&ipf_rw);
1429			} else if (ip->ip_p == IPPROTO_UDP) {
1430				udphdr_t *udp = (udphdr_t *)tcp;
1431
1432				if (udp->uh_sum)
1433					csump = &udp->uh_sum;
1434			}
1435			if (csump) {
1436				if (nat->nat_dir == NAT_OUTBOUND)
1437					fix_outcksum(csump, nat->nat_sumd[1],
1438						     ip->ip_len);
1439				else
1440					fix_incksum(csump, nat->nat_sumd[1],
1441						     ip->ip_len);
1442			}
1443		}
1444		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1445		     (tcp != NULL && dport == np->in_dport)))
1446			(void) appr_check(ip, fin, nat);
1447		ATOMIC_INC(nat_stats.ns_mapped[1]);
1448		RWLOCK_EXIT(&ipf_nat);	/* READ */
1449		return 1;
1450	}
1451	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1452	return 0;
1453}
1454
1455
1456/*
1457 * Packets coming in from the external interface go through this.
1458 * Here, the destination address requires alteration, if anything.
1459 */
1460int ip_natin(ip, fin)
1461ip_t *ip;
1462fr_info_t *fin;
1463{
1464	register struct in_addr src;
1465	register struct in_addr in;
1466	register ipnat_t *np;
1467	u_int nflags = 0, natadd = 1, hv, msk;
1468	struct ifnet *ifp = fin->fin_ifp;
1469	tcphdr_t *tcp = NULL;
1470	u_short sport = 0, dport = 0, *csump = NULL;
1471	nat_t *nat;
1472	u_32_t iph;
1473	int i;
1474
1475	if (nat_list == NULL)
1476		return 0;
1477
1478	if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) {
1479		if (ip->ip_p == IPPROTO_TCP)
1480			nflags = IPN_TCP;
1481		else if (ip->ip_p == IPPROTO_UDP)
1482			nflags = IPN_UDP;
1483		if ((nflags & IPN_TCPUDP)) {
1484			tcp = (tcphdr_t *)fin->fin_dp;
1485			dport = tcp->th_dport;
1486			sport = tcp->th_sport;
1487		}
1488	}
1489
1490	in = ip->ip_dst;
1491	/* make sure the source address is to be redirected */
1492	src = ip->ip_src;
1493
1494	READ_ENTER(&ipf_nat);
1495
1496	if ((ip->ip_p == IPPROTO_ICMP) && (nat = nat_icmpin(ip, fin, &nflags)))
1497		;
1498	else if ((ip->ip_off & IP_OFFMASK) &&
1499		 (nat = ipfr_nat_knownfrag(ip, fin)))
1500		natadd = 0;
1501	else if ((nat = nat_inlookup(fin->fin_ifp, nflags, (u_int)ip->ip_p,
1502				     ip->ip_src, in, (dport << 16) | sport))) {
1503		nflags = nat->nat_flags;
1504		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
1505			if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
1506				nat->nat_oport = sport;
1507			else if ((nat->nat_outport != dport) &&
1508				 (nflags & FI_W_SPORT))
1509				nat->nat_outport = dport;
1510			nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
1511			nflags = nat->nat_flags;
1512		}
1513	} else {
1514		RWLOCK_EXIT(&ipf_nat);
1515		WRITE_ENTER(&ipf_nat);
1516		/*
1517		 * If there is no current entry in the nat table for this IP#,
1518		 * create one for it (if there is a matching rule).
1519		 */
1520		msk = 0xffffffff;
1521		i = 32;
1522maskloop:
1523		iph = in.s_addr & htonl(msk);
1524		hv = NAT_HASH_FN(iph, ipf_rdrrules_sz);
1525		for (np = rdr_rules[hv]; np; np = np->in_rnext)
1526			if ((np->in_ifp == ifp) &&
1527			    (!np->in_flags || (nflags & np->in_flags)) &&
1528			    ((in.s_addr & np->in_outmsk) == np->in_outip) &&
1529			    ((src.s_addr & np->in_srcmsk) == np->in_srcip) &&
1530			    (np->in_redir & NAT_REDIRECT) &&
1531			    (!np->in_pmin || np->in_pmin == dport) &&
1532			    (!np->in_p || np->in_p == ip->ip_p)) {
1533				if ((nat = nat_new(np, ip, fin, nflags,
1534						    NAT_INBOUND))) {
1535					np->in_hits++;
1536#ifdef	IPFILTER_LOG
1537					nat_log(nat, (u_int)np->in_redir);
1538#endif
1539					break;
1540				}
1541			}
1542		if ((np == NULL) && (i > 0)) {
1543			do {
1544				i--;
1545				msk <<= 1;
1546			} while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
1547			if (i >= 0)
1548				goto maskloop;
1549		}
1550		MUTEX_DOWNGRADE(&ipf_nat);
1551	}
1552	if (nat) {
1553		np = nat->nat_ptr;
1554		fin->fin_fr = nat->nat_fr;
1555		if (natadd && fin->fin_fi.fi_fl & FI_FRAG)
1556			ipfr_nat_newfrag(ip, fin, 0, nat);
1557		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1558		    (tcp != NULL && sport == np->in_dport)))
1559			(void) appr_check(ip, fin, nat);
1560
1561		MUTEX_ENTER(&ipf_rw);
1562		if (nflags != IPN_ICMPERR)
1563			nat->nat_age = fr_defnatage;
1564
1565		nat->nat_bytes += ip->ip_len;
1566		nat->nat_pkts++;
1567		MUTEX_EXIT(&ipf_rw);
1568		ip->ip_dst = nat->nat_inip;
1569		fin->fin_fi.fi_dst = nat->nat_inip;
1570
1571		/*
1572		 * Fix up checksums, not by recalculating them, but
1573		 * simply computing adjustments.
1574		 */
1575#if SOLARIS || defined(__sgi)
1576		if (nat->nat_dir == NAT_OUTBOUND)
1577			fix_incksum(&ip->ip_sum, nat->nat_ipsumd, 0);
1578		else
1579			fix_outcksum(&ip->ip_sum, nat->nat_ipsumd, 0);
1580#endif
1581		if (!(ip->ip_off & IP_OFFMASK) &&
1582		    !(fin->fin_fi.fi_fl & FI_SHORT)) {
1583
1584			if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
1585				tcp->th_dport = nat->nat_inport;
1586				fin->fin_data[1] = ntohs(tcp->th_dport);
1587			}
1588
1589			if (ip->ip_p == IPPROTO_TCP) {
1590				csump = &tcp->th_sum;
1591				MUTEX_ENTER(&ipf_rw);
1592				fr_tcp_age(&nat->nat_age,
1593					   nat->nat_tcpstate, ip, fin, 0);
1594				if (nat->nat_age < fr_defnaticmpage)
1595					nat->nat_age = fr_defnaticmpage;
1596#ifdef LARGE_NAT
1597				else if (nat->nat_age > DEF_NAT_AGE)
1598					nat->nat_age = DEF_NAT_AGE;
1599#endif
1600				/*
1601				 * Increase this because we may have
1602				 * "keep state" following this too and
1603				 * packet storms can occur if this is
1604				 * removed too quickly.
1605				 */
1606				if (nat->nat_age == fr_tcpclosed)
1607					nat->nat_age = fr_tcplastack;
1608				MUTEX_EXIT(&ipf_rw);
1609			} else if (ip->ip_p == IPPROTO_UDP) {
1610				udphdr_t *udp = (udphdr_t *)tcp;
1611
1612				if (udp->uh_sum)
1613					csump = &udp->uh_sum;
1614			}
1615			if (csump) {
1616				if (nat->nat_dir == NAT_OUTBOUND)
1617					fix_incksum(csump, nat->nat_sumd[0], 0);
1618				else
1619					fix_outcksum(csump, nat->nat_sumd[0], 0);
1620			}
1621		}
1622		ATOMIC_INC(nat_stats.ns_mapped[0]);
1623		RWLOCK_EXIT(&ipf_nat);			/* READ */
1624		return 1;
1625	}
1626	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1627	return 0;
1628}
1629
1630
1631/*
1632 * Free all memory used by NAT structures allocated at runtime.
1633 */
1634void ip_natunload()
1635{
1636	WRITE_ENTER(&ipf_nat);
1637	(void) nat_clearlist();
1638	(void) nat_flushtable();
1639	RWLOCK_EXIT(&ipf_nat);
1640
1641	if (nat_table[0] != NULL) {
1642		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
1643		nat_table[0] = NULL;
1644	}
1645	if (nat_table[1] != NULL) {
1646		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
1647		nat_table[1] = NULL;
1648	}
1649	if (nat_rules != NULL) {
1650		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
1651		nat_rules = NULL;
1652	}
1653	if (rdr_rules != NULL) {
1654		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
1655		rdr_rules = NULL;
1656	}
1657}
1658
1659
1660/*
1661 * Slowly expire held state for NAT entries.  Timeouts are set in
1662 * expectation of this being called twice per second.
1663 */
1664void ip_natexpire()
1665{
1666	register struct nat *nat, **natp;
1667#if defined(_KERNEL) && !SOLARIS
1668	int s;
1669#endif
1670
1671	SPL_NET(s);
1672	WRITE_ENTER(&ipf_nat);
1673	for (natp = &nat_instances; (nat = *natp); ) {
1674		nat->nat_age--;
1675		if (nat->nat_age) {
1676			natp = &nat->nat_next;
1677			continue;
1678		}
1679		*natp = nat->nat_next;
1680#ifdef	IPFILTER_LOG
1681		nat_log(nat, NL_EXPIRE);
1682#endif
1683		nat_delete(nat);
1684		nat_stats.ns_expire++;
1685	}
1686	RWLOCK_EXIT(&ipf_nat);
1687	SPL_X(s);
1688}
1689
1690
1691/*
1692 */
1693void ip_natsync(ifp)
1694void *ifp;
1695{
1696	register ipnat_t *n;
1697	register nat_t *nat;
1698	register u_32_t sum1, sum2, sumd;
1699	struct in_addr in;
1700	ipnat_t *np;
1701	void *ifp2;
1702#if defined(_KERNEL) && !SOLARIS
1703	int s;
1704#endif
1705
1706	/*
1707	 * Change IP addresses for NAT sessions for any protocol except TCP
1708	 * since it will break the TCP connection anyway.
1709	 */
1710	SPL_NET(s);
1711	WRITE_ENTER(&ipf_nat);
1712	for (nat = nat_instances; nat; nat = nat->nat_next)
1713		if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
1714		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
1715		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
1716			ifp2 = nat->nat_ifp;
1717			/*
1718			 * Change the map-to address to be the same as the
1719			 * new one.
1720			 */
1721			sum1 = nat->nat_outip.s_addr;
1722			if (fr_ifpaddr(ifp2, &in) != -1)
1723				nat->nat_outip = in;
1724			sum2 = nat->nat_outip.s_addr;
1725
1726			if (sum1 == sum2)
1727				continue;
1728			/*
1729			 * Readjust the checksum adjustment to take into
1730			 * account the new IP#.
1731			 */
1732			CALC_SUMD(sum1, sum2, sumd);
1733			/* XXX - dont change for TCP when solaris does
1734			 * hardware checksumming.
1735			 */
1736			sumd += nat->nat_sumd[0];
1737			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1738			nat->nat_sumd[1] = nat->nat_sumd[0];
1739		}
1740
1741	for (n = nat_list; (n != NULL); n = n->in_next)
1742		if (n->in_ifp == ifp) {
1743			n->in_ifp = (void *)GETUNIT(n->in_ifname);
1744			if (!n->in_ifp)
1745				n->in_ifp = (void *)-1;
1746		}
1747	RWLOCK_EXIT(&ipf_nat);
1748	SPL_X(s);
1749}
1750
1751
1752#ifdef	IPFILTER_LOG
1753void nat_log(nat, type)
1754struct nat *nat;
1755u_int type;
1756{
1757	struct ipnat *np;
1758	struct natlog natl;
1759	void *items[1];
1760	size_t sizes[1];
1761	int rulen, types[1];
1762
1763	natl.nl_inip = nat->nat_inip;
1764	natl.nl_outip = nat->nat_outip;
1765	natl.nl_origip = nat->nat_oip;
1766	natl.nl_bytes = nat->nat_bytes;
1767	natl.nl_pkts = nat->nat_pkts;
1768	natl.nl_origport = nat->nat_oport;
1769	natl.nl_inport = nat->nat_inport;
1770	natl.nl_outport = nat->nat_outport;
1771	natl.nl_type = type;
1772	natl.nl_rule = -1;
1773#ifndef LARGE_NAT
1774	if (nat->nat_ptr != NULL) {
1775		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
1776			if (np == nat->nat_ptr) {
1777				natl.nl_rule = rulen;
1778				break;
1779			}
1780	}
1781#endif
1782	items[0] = &natl;
1783	sizes[0] = sizeof(natl);
1784	types[0] = 0;
1785
1786	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
1787}
1788#endif
1789