ip_nat.c revision 173181
1/*	$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 173181 2007-10-30 15:23:27Z darrenr $	*/
2
3/*
4 * Copyright (C) 1995-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#if defined(KERNEL) || defined(_KERNEL)
9# undef KERNEL
10# undef _KERNEL
11# define        KERNEL	1
12# define        _KERNEL	1
13#endif
14#include <sys/errno.h>
15#include <sys/types.h>
16#include <sys/param.h>
17#include <sys/time.h>
18#include <sys/file.h>
19#if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20    (__NetBSD_Version__ >= 399002000)
21# include <sys/kauth.h>
22#endif
23#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24    defined(_KERNEL)
25#if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26#  include "opt_ipfilter_log.h"
27# else
28#  include "opt_ipfilter.h"
29# endif
30#endif
31#if !defined(_KERNEL)
32# include <stdio.h>
33# include <string.h>
34# include <stdlib.h>
35# define _KERNEL
36# ifdef __OpenBSD__
37struct file;
38# endif
39# include <sys/uio.h>
40# undef _KERNEL
41#endif
42#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43# include <sys/filio.h>
44# include <sys/fcntl.h>
45#else
46# include <sys/ioctl.h>
47#endif
48#if !defined(AIX)
49# include <sys/fcntl.h>
50#endif
51#if !defined(linux)
52# include <sys/protosw.h>
53#endif
54#include <sys/socket.h>
55#if defined(_KERNEL)
56# include <sys/systm.h>
57# if !defined(__SVR4) && !defined(__svr4__)
58#  include <sys/mbuf.h>
59# endif
60#endif
61#if defined(__SVR4) || defined(__svr4__)
62# include <sys/filio.h>
63# include <sys/byteorder.h>
64# ifdef _KERNEL
65#  include <sys/dditypes.h>
66# endif
67# include <sys/stream.h>
68# include <sys/kmem.h>
69#endif
70#if __FreeBSD_version >= 300000
71# include <sys/queue.h>
72#endif
73#include <net/if.h>
74#if __FreeBSD_version >= 300000
75# include <net/if_var.h>
76# if defined(_KERNEL) && !defined(IPFILTER_LKM)
77#  include "opt_ipfilter.h"
78# endif
79#endif
80#ifdef sun
81# include <net/af.h>
82#endif
83#include <net/route.h>
84#include <netinet/in.h>
85#include <netinet/in_systm.h>
86#include <netinet/ip.h>
87
88#ifdef RFC1825
89# include <vpn/md5.h>
90# include <vpn/ipsec.h>
91extern struct ifnet vpnif;
92#endif
93
94#if !defined(linux)
95# include <netinet/ip_var.h>
96#endif
97#include <netinet/tcp.h>
98#include <netinet/udp.h>
99#include <netinet/ip_icmp.h>
100#include "netinet/ip_compat.h"
101#include <netinet/tcpip.h>
102#include "netinet/ip_fil.h"
103#include "netinet/ip_nat.h"
104#include "netinet/ip_frag.h"
105#include "netinet/ip_state.h"
106#include "netinet/ip_proxy.h"
107#ifdef	IPFILTER_SYNC
108#include "netinet/ip_sync.h"
109#endif
110#if (__FreeBSD_version >= 300000)
111# include <sys/malloc.h>
112#endif
113/* END OF INCLUDES */
114
115#undef	SOCKADDR_IN
116#define	SOCKADDR_IN	struct sockaddr_in
117
118#if !defined(lint)
119static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
120static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 173181 2007-10-30 15:23:27Z darrenr $";
121/* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122#endif
123
124
125/* ======================================================================== */
126/* How the NAT is organised and works.                                      */
127/*                                                                          */
128/* Inside (interface y) NAT       Outside (interface x)                     */
129/* -------------------- -+- -------------------------------------           */
130/* Packet going          |   out, processsed by fr_checknatout() for x      */
131/* ------------>         |   ------------>                                  */
132/* src=10.1.1.1          |   src=192.1.1.1                                  */
133/*                       |                                                  */
134/*                       |   in, processed by fr_checknatin() for x         */
135/* <------------         |   <------------                                  */
136/* dst=10.1.1.1          |   dst=192.1.1.1                                  */
137/* -------------------- -+- -------------------------------------           */
138/* fr_checknatout() - changes ip_src and if required, sport                 */
139/*             - creates a new mapping, if required.                        */
140/* fr_checknatin()  - changes ip_dst and if required, dport                 */
141/*                                                                          */
142/* In the NAT table, internal source is recorded as "in" and externally     */
143/* seen as "out".                                                           */
144/* ======================================================================== */
145
146
147nat_t	**nat_table[2] = { NULL, NULL },
148	*nat_instances = NULL;
149ipnat_t	*nat_list = NULL;
150u_int	ipf_nattable_max = NAT_TABLE_MAX;
151u_int	ipf_nattable_sz = NAT_TABLE_SZ;
152u_int	ipf_natrules_sz = NAT_SIZE;
153u_int	ipf_rdrrules_sz = RDR_SIZE;
154u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
155u_int	fr_nat_maxbucket = 0,
156	fr_nat_maxbucket_reset = 1;
157u_32_t	nat_masks = 0;
158u_32_t	rdr_masks = 0;
159u_long	nat_last_force_flush = 0;
160ipnat_t	**nat_rules = NULL;
161ipnat_t	**rdr_rules = NULL;
162hostmap_t	**ipf_hm_maptable  = NULL;
163hostmap_t	*ipf_hm_maplist  = NULL;
164ipftq_t	nat_tqb[IPF_TCP_NSTATES];
165ipftq_t	nat_udptq;
166ipftq_t	nat_icmptq;
167ipftq_t	nat_iptq;
168ipftq_t	*nat_utqe = NULL;
169int	fr_nat_doflush = 0;
170#ifdef  IPFILTER_LOG
171int	nat_logging = 1;
172#else
173int	nat_logging = 0;
174#endif
175
176u_long	fr_defnatage = DEF_NAT_AGE,
177	fr_defnatipage = 120,		/* 60 seconds */
178	fr_defnaticmpage = 6;		/* 3 seconds */
179natstat_t nat_stats;
180int	fr_nat_lock = 0;
181int	fr_nat_init = 0;
182#if SOLARIS && !defined(_INET_IP_STACK_H)
183extern	int		pfil_delayed_copy;
184#endif
185
186static	int	nat_flush_entry __P((void *));
187static	int	nat_flushtable __P((void));
188static	int	nat_clearlist __P((void));
189static	void	nat_addnat __P((struct ipnat *));
190static	void	nat_addrdr __P((struct ipnat *));
191static	void	nat_delrdr __P((struct ipnat *));
192static	void	nat_delnat __P((struct ipnat *));
193static	int	fr_natgetent __P((caddr_t, int));
194static	int	fr_natgetsz __P((caddr_t, int));
195static	int	fr_natputent __P((caddr_t, int));
196static	int	nat_extraflush __P((int));
197static	int	nat_gettable __P((char *));
198static	void	nat_tabmove __P((nat_t *));
199static	int	nat_match __P((fr_info_t *, ipnat_t *));
200static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203				    struct in_addr, struct in_addr, u_32_t));
204static	int	nat_icmpquerytype4 __P((int));
205static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207static	int	nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208				      tcphdr_t *, nat_t **, int));
209static	int	nat_resolverule __P((ipnat_t *));
210static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
211static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212static	int	nat_wildok __P((nat_t *, int, int, int, int));
213static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215
216
217/* ------------------------------------------------------------------------ */
218/* Function:    fr_natinit                                                  */
219/* Returns:     int - 0 == success, -1 == failure                           */
220/* Parameters:  Nil                                                         */
221/*                                                                          */
222/* Initialise all of the NAT locks, tables and other structures.            */
223/* ------------------------------------------------------------------------ */
224int fr_natinit()
225{
226	int i;
227
228	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229	if (nat_table[0] != NULL)
230		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231	else
232		return -1;
233
234	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235	if (nat_table[1] != NULL)
236		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237	else
238		return -2;
239
240	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241	if (nat_rules != NULL)
242		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243	else
244		return -3;
245
246	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247	if (rdr_rules != NULL)
248		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249	else
250		return -4;
251
252	KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253		 sizeof(hostmap_t *) * ipf_hostmap_sz);
254	if (ipf_hm_maptable != NULL)
255		bzero((char *)ipf_hm_maptable,
256		      sizeof(hostmap_t *) * ipf_hostmap_sz);
257	else
258		return -5;
259	ipf_hm_maplist = NULL;
260
261	KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262		 ipf_nattable_sz * sizeof(u_long));
263	if (nat_stats.ns_bucketlen[0] == NULL)
264		return -6;
265	bzero((char *)nat_stats.ns_bucketlen[0],
266	      ipf_nattable_sz * sizeof(u_long));
267
268	KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269		 ipf_nattable_sz * sizeof(u_long));
270	if (nat_stats.ns_bucketlen[1] == NULL)
271		return -7;
272
273	bzero((char *)nat_stats.ns_bucketlen[1],
274	      ipf_nattable_sz * sizeof(u_long));
275
276	if (fr_nat_maxbucket == 0) {
277		for (i = ipf_nattable_sz; i > 0; i >>= 1)
278			fr_nat_maxbucket++;
279		fr_nat_maxbucket *= 2;
280	}
281
282	fr_sttab_init(nat_tqb);
283	/*
284	 * Increase this because we may have "keep state" following this too
285	 * and packet storms can occur if this is removed too quickly.
286	 */
287	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288	nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289	nat_udptq.ifq_ttl = fr_defnatage;
290	nat_udptq.ifq_ref = 1;
291	nat_udptq.ifq_head = NULL;
292	nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293	MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294	nat_udptq.ifq_next = &nat_icmptq;
295	nat_icmptq.ifq_ttl = fr_defnaticmpage;
296	nat_icmptq.ifq_ref = 1;
297	nat_icmptq.ifq_head = NULL;
298	nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299	MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300	nat_icmptq.ifq_next = &nat_iptq;
301	nat_iptq.ifq_ttl = fr_defnatipage;
302	nat_iptq.ifq_ref = 1;
303	nat_iptq.ifq_head = NULL;
304	nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305	MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306	nat_iptq.ifq_next = NULL;
307
308	for (i = 0; i < IPF_TCP_NSTATES; i++) {
309		if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310			nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311#ifdef LARGE_NAT
312		else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313			nat_tqb[i].ifq_ttl = fr_defnatage;
314#endif
315	}
316
317	/*
318	 * Increase this because we may have "keep state" following
319	 * this too and packet storms can occur if this is removed
320	 * too quickly.
321	 */
322	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323
324	RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325	RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326	MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327	MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328
329	fr_nat_init = 1;
330
331	return 0;
332}
333
334
335/* ------------------------------------------------------------------------ */
336/* Function:    nat_addrdr                                                  */
337/* Returns:     Nil                                                         */
338/* Parameters:  n(I) - pointer to NAT rule to add                           */
339/*                                                                          */
340/* Adds a redirect rule to the hash table of redirect rules and the list of */
341/* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
342/* use by redirect rules.                                                   */
343/* ------------------------------------------------------------------------ */
344static void nat_addrdr(n)
345ipnat_t *n;
346{
347	ipnat_t **np;
348	u_32_t j;
349	u_int hv;
350	int k;
351
352	k = count4bits(n->in_outmsk);
353	if ((k >= 0) && (k != 32))
354		rdr_masks |= 1 << k;
355	j = (n->in_outip & n->in_outmsk);
356	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357	np = rdr_rules + hv;
358	while (*np != NULL)
359		np = &(*np)->in_rnext;
360	n->in_rnext = NULL;
361	n->in_prnext = np;
362	n->in_hv = hv;
363	*np = n;
364}
365
366
367/* ------------------------------------------------------------------------ */
368/* Function:    nat_addnat                                                  */
369/* Returns:     Nil                                                         */
370/* Parameters:  n(I) - pointer to NAT rule to add                           */
371/*                                                                          */
372/* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
373/* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
374/* redirect rules.                                                          */
375/* ------------------------------------------------------------------------ */
376static void nat_addnat(n)
377ipnat_t *n;
378{
379	ipnat_t **np;
380	u_32_t j;
381	u_int hv;
382	int k;
383
384	k = count4bits(n->in_inmsk);
385	if ((k >= 0) && (k != 32))
386		nat_masks |= 1 << k;
387	j = (n->in_inip & n->in_inmsk);
388	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389	np = nat_rules + hv;
390	while (*np != NULL)
391		np = &(*np)->in_mnext;
392	n->in_mnext = NULL;
393	n->in_pmnext = np;
394	n->in_hv = hv;
395	*np = n;
396}
397
398
399/* ------------------------------------------------------------------------ */
400/* Function:    nat_delrdr                                                  */
401/* Returns:     Nil                                                         */
402/* Parameters:  n(I) - pointer to NAT rule to delete                        */
403/*                                                                          */
404/* Removes a redirect rule from the hash table of redirect rules.           */
405/* ------------------------------------------------------------------------ */
406static void nat_delrdr(n)
407ipnat_t *n;
408{
409	if (n->in_rnext)
410		n->in_rnext->in_prnext = n->in_prnext;
411	*n->in_prnext = n->in_rnext;
412}
413
414
415/* ------------------------------------------------------------------------ */
416/* Function:    nat_delnat                                                  */
417/* Returns:     Nil                                                         */
418/* Parameters:  n(I) - pointer to NAT rule to delete                        */
419/*                                                                          */
420/* Removes a NAT map rule from the hash table of NAT map rules.             */
421/* ------------------------------------------------------------------------ */
422static void nat_delnat(n)
423ipnat_t *n;
424{
425	if (n->in_mnext != NULL)
426		n->in_mnext->in_pmnext = n->in_pmnext;
427	*n->in_pmnext = n->in_mnext;
428}
429
430
431/* ------------------------------------------------------------------------ */
432/* Function:    nat_hostmap                                                 */
433/* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
434/*                                else a pointer to the hostmapping to use  */
435/* Parameters:  np(I)   - pointer to NAT rule                               */
436/*              real(I) - real IP address                                   */
437/*              map(I)  - mapped IP address                                 */
438/*              port(I) - destination port number                           */
439/* Write Locks: ipf_nat                                                     */
440/*                                                                          */
441/* Check if an ip address has already been allocated for a given mapping    */
442/* that is not doing port based translation.  If is not yet allocated, then */
443/* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
444/* ------------------------------------------------------------------------ */
445static struct hostmap *nat_hostmap(np, src, dst, map, port)
446ipnat_t *np;
447struct in_addr src;
448struct in_addr dst;
449struct in_addr map;
450u_32_t port;
451{
452	hostmap_t *hm;
453	u_int hv;
454
455	hv = (src.s_addr ^ dst.s_addr);
456	hv += src.s_addr;
457	hv += dst.s_addr;
458	hv %= HOSTMAP_SIZE;
459	for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460		if ((hm->hm_srcip.s_addr == src.s_addr) &&
461		    (hm->hm_dstip.s_addr == dst.s_addr) &&
462		    ((np == NULL) || (np == hm->hm_ipnat)) &&
463		    ((port == 0) || (port == hm->hm_port))) {
464			hm->hm_ref++;
465			return hm;
466		}
467
468	if (np == NULL)
469		return NULL;
470
471	KMALLOC(hm, hostmap_t *);
472	if (hm) {
473		hm->hm_next = ipf_hm_maplist;
474		hm->hm_pnext = &ipf_hm_maplist;
475		if (ipf_hm_maplist != NULL)
476			ipf_hm_maplist->hm_pnext = &hm->hm_next;
477		ipf_hm_maplist = hm;
478		hm->hm_hnext = ipf_hm_maptable[hv];
479		hm->hm_phnext = ipf_hm_maptable + hv;
480		if (ipf_hm_maptable[hv] != NULL)
481			ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482		ipf_hm_maptable[hv] = hm;
483		hm->hm_ipnat = np;
484		hm->hm_srcip = src;
485		hm->hm_dstip = dst;
486		hm->hm_mapip = map;
487		hm->hm_ref = 1;
488		hm->hm_port = port;
489	}
490	return hm;
491}
492
493
494/* ------------------------------------------------------------------------ */
495/* Function:    fr_hostmapdel                                               */
496/* Returns:     Nil                                                         */
497/* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
498/* Write Locks: ipf_nat                                                     */
499/*                                                                          */
500/* Decrement the references to this hostmap structure by one.  If this      */
501/* reaches zero then remove it and free it.                                 */
502/* ------------------------------------------------------------------------ */
503void fr_hostmapdel(hmp)
504struct hostmap **hmp;
505{
506	struct hostmap *hm;
507
508	hm = *hmp;
509	*hmp = NULL;
510
511	hm->hm_ref--;
512	if (hm->hm_ref == 0) {
513		if (hm->hm_hnext)
514			hm->hm_hnext->hm_phnext = hm->hm_phnext;
515		*hm->hm_phnext = hm->hm_hnext;
516		if (hm->hm_next)
517			hm->hm_next->hm_pnext = hm->hm_pnext;
518		*hm->hm_pnext = hm->hm_next;
519		KFREE(hm);
520	}
521}
522
523
524/* ------------------------------------------------------------------------ */
525/* Function:    fix_outcksum                                                */
526/* Returns:     Nil                                                         */
527/* Parameters:  fin(I) - pointer to packet information                      */
528/*              sp(I)  - location of 16bit checksum to update               */
529/*              n((I)  - amount to adjust checksum by                       */
530/*                                                                          */
531/* Adjusts the 16bit checksum by "n" for packets going out.                 */
532/* ------------------------------------------------------------------------ */
533void fix_outcksum(fin, sp, n)
534fr_info_t *fin;
535u_short *sp;
536u_32_t n;
537{
538	u_short sumshort;
539	u_32_t sum1;
540
541	if (n == 0)
542		return;
543
544	if (n & NAT_HW_CKSUM) {
545		n &= 0xffff;
546		n += fin->fin_dlen;
547		n = (n & 0xffff) + (n >> 16);
548		*sp = n & 0xffff;
549		return;
550	}
551	sum1 = (~ntohs(*sp)) & 0xffff;
552	sum1 += (n);
553	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554	/* Again */
555	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556	sumshort = ~(u_short)sum1;
557	*(sp) = htons(sumshort);
558}
559
560
561/* ------------------------------------------------------------------------ */
562/* Function:    fix_incksum                                                 */
563/* Returns:     Nil                                                         */
564/* Parameters:  fin(I) - pointer to packet information                      */
565/*              sp(I)  - location of 16bit checksum to update               */
566/*              n((I)  - amount to adjust checksum by                       */
567/*                                                                          */
568/* Adjusts the 16bit checksum by "n" for packets going in.                  */
569/* ------------------------------------------------------------------------ */
570void fix_incksum(fin, sp, n)
571fr_info_t *fin;
572u_short *sp;
573u_32_t n;
574{
575	u_short sumshort;
576	u_32_t sum1;
577
578	if (n == 0)
579		return;
580
581	if (n & NAT_HW_CKSUM) {
582		n &= 0xffff;
583		n += fin->fin_dlen;
584		n = (n & 0xffff) + (n >> 16);
585		*sp = n & 0xffff;
586		return;
587	}
588	sum1 = (~ntohs(*sp)) & 0xffff;
589	sum1 += ~(n) & 0xffff;
590	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591	/* Again */
592	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593	sumshort = ~(u_short)sum1;
594	*(sp) = htons(sumshort);
595}
596
597
598/* ------------------------------------------------------------------------ */
599/* Function:    fix_datacksum                                               */
600/* Returns:     Nil                                                         */
601/* Parameters:  sp(I)  - location of 16bit checksum to update               */
602/*              n((I)  - amount to adjust checksum by                       */
603/*                                                                          */
604/* Fix_datacksum is used *only* for the adjustments of checksums in the     */
605/* data section of an IP packet.                                            */
606/*                                                                          */
607/* The only situation in which you need to do this is when NAT'ing an       */
608/* ICMP error message. Such a message, contains in its body the IP header   */
609/* of the original IP packet, that causes the error.                        */
610/*                                                                          */
611/* You can't use fix_incksum or fix_outcksum in that case, because for the  */
612/* kernel the data section of the ICMP error is just data, and no special   */
613/* processing like hardware cksum or ntohs processing have been done by the */
614/* kernel on the data section.                                              */
615/* ------------------------------------------------------------------------ */
616void fix_datacksum(sp, n)
617u_short *sp;
618u_32_t n;
619{
620	u_short sumshort;
621	u_32_t sum1;
622
623	if (n == 0)
624		return;
625
626	sum1 = (~ntohs(*sp)) & 0xffff;
627	sum1 += (n);
628	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629	/* Again */
630	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631	sumshort = ~(u_short)sum1;
632	*(sp) = htons(sumshort);
633}
634
635
636/* ------------------------------------------------------------------------ */
637/* Function:    fr_nat_ioctl                                                */
638/* Returns:     int - 0 == success, != 0 == failure                         */
639/* Parameters:  data(I) - pointer to ioctl data                             */
640/*              cmd(I)  - ioctl command integer                             */
641/*              mode(I) - file mode bits used with open                     */
642/*                                                                          */
643/* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644/* ------------------------------------------------------------------------ */
645int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646ioctlcmd_t cmd;
647caddr_t data;
648int mode, uid;
649void *ctx;
650{
651	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652	int error = 0, ret, arg, getlock;
653	ipnat_t natd;
654	SPL_INT(s);
655
656#if (BSD >= 199306) && defined(_KERNEL)
657# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658	if ((mode & FWRITE) &&
659	     kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660				     KAUTH_REQ_NETWORK_FIREWALL_FW,
661				     NULL, NULL, NULL)) {
662		return EPERM;
663	}
664# else
665	if ((securelevel >= 3) && (mode & FWRITE)) {
666		return EPERM;
667	}
668# endif
669#endif
670
671#if defined(__osf__) && defined(_KERNEL)
672	getlock = 0;
673#else
674	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
675#endif
676
677	nat = NULL;     /* XXX gcc -Wuninitialized */
678	if (cmd == (ioctlcmd_t)SIOCADNAT) {
679		KMALLOC(nt, ipnat_t *);
680	} else {
681		nt = NULL;
682	}
683
684	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
685		if (mode & NAT_SYSSPACE) {
686			bcopy(data, (char *)&natd, sizeof(natd));
687			error = 0;
688		} else {
689			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
690		}
691	}
692
693	if (error != 0)
694		goto done;
695
696	/*
697	 * For add/delete, look to see if the NAT entry is already present
698	 */
699	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
700		nat = &natd;
701		if (nat->in_v == 0)	/* For backward compat. */
702			nat->in_v = 4;
703		nat->in_flags &= IPN_USERFLAGS;
704		if ((nat->in_redir & NAT_MAPBLK) == 0) {
705			if ((nat->in_flags & IPN_SPLIT) == 0)
706				nat->in_inip &= nat->in_inmsk;
707			if ((nat->in_flags & IPN_IPRANGE) == 0)
708				nat->in_outip &= nat->in_outmsk;
709		}
710		MUTEX_ENTER(&ipf_natio);
711		for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
712			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
713					IPN_CMPSIZ) == 0) {
714				if (nat->in_redir == NAT_REDIRECT &&
715				    nat->in_pnext != n->in_pnext)
716					continue;
717				break;
718			}
719	}
720
721	switch (cmd)
722	{
723#ifdef  IPFILTER_LOG
724	case SIOCIPFFB :
725	{
726		int tmp;
727
728		if (!(mode & FWRITE))
729			error = EPERM;
730		else {
731			tmp = ipflog_clear(IPL_LOGNAT);
732			error = BCOPYOUT((char *)&tmp, (char *)data,
733					 sizeof(tmp));
734			if (error != 0)
735				error = EFAULT;
736		}
737		break;
738	}
739
740	case SIOCSETLG :
741		if (!(mode & FWRITE))
742			error = EPERM;
743		else {
744			error = BCOPYIN((char *)data, (char *)&nat_logging,
745					sizeof(nat_logging));
746			if (error != 0)
747				error = EFAULT;
748		}
749		break;
750
751	case SIOCGETLG :
752		error = BCOPYOUT((char *)&nat_logging, (char *)data,
753				 sizeof(nat_logging));
754		if (error != 0)
755			error = EFAULT;
756		break;
757
758	case FIONREAD :
759		arg = iplused[IPL_LOGNAT];
760		error = BCOPYOUT(&arg, data, sizeof(arg));
761		if (error != 0)
762			error = EFAULT;
763		break;
764#endif
765	case SIOCADNAT :
766		if (!(mode & FWRITE)) {
767			error = EPERM;
768		} else if (n != NULL) {
769			error = EEXIST;
770		} else if (nt == NULL) {
771			error = ENOMEM;
772		}
773		if (error != 0) {
774			MUTEX_EXIT(&ipf_natio);
775			break;
776		}
777		bcopy((char *)nat, (char *)nt, sizeof(*n));
778		error = nat_siocaddnat(nt, np, getlock);
779		MUTEX_EXIT(&ipf_natio);
780		if (error == 0)
781			nt = NULL;
782		break;
783
784	case SIOCRMNAT :
785		if (!(mode & FWRITE)) {
786			error = EPERM;
787			n = NULL;
788		} else if (n == NULL) {
789			error = ESRCH;
790		}
791
792		if (error != 0) {
793			MUTEX_EXIT(&ipf_natio);
794			break;
795		}
796		nat_siocdelnat(n, np, getlock);
797
798		MUTEX_EXIT(&ipf_natio);
799		n = NULL;
800		break;
801
802	case SIOCGNATS :
803		nat_stats.ns_table[0] = nat_table[0];
804		nat_stats.ns_table[1] = nat_table[1];
805		nat_stats.ns_list = nat_list;
806		nat_stats.ns_maptable = ipf_hm_maptable;
807		nat_stats.ns_maplist = ipf_hm_maplist;
808		nat_stats.ns_nattab_sz = ipf_nattable_sz;
809		nat_stats.ns_nattab_max = ipf_nattable_max;
810		nat_stats.ns_rultab_sz = ipf_natrules_sz;
811		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
812		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
813		nat_stats.ns_instances = nat_instances;
814		nat_stats.ns_apslist = ap_sess_list;
815		nat_stats.ns_ticks = fr_ticks;
816		error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
817		break;
818
819	case SIOCGNATL :
820	    {
821		natlookup_t nl;
822
823		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
824		if (error == 0) {
825			void *ptr;
826
827			if (getlock) {
828				READ_ENTER(&ipf_nat);
829			}
830			ptr = nat_lookupredir(&nl);
831			if (getlock) {
832				RWLOCK_EXIT(&ipf_nat);
833			}
834			if (ptr != NULL) {
835				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
836			} else {
837				error = ESRCH;
838			}
839		}
840		break;
841	    }
842
843	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
844		if (!(mode & FWRITE)) {
845			error = EPERM;
846			break;
847		}
848		if (getlock) {
849			WRITE_ENTER(&ipf_nat);
850		}
851
852		error = BCOPYIN(data, &arg, sizeof(arg));
853		if (error != 0)
854			error = EFAULT;
855		else {
856			if (arg == 0)
857				ret = nat_flushtable();
858			else if (arg == 1)
859				ret = nat_clearlist();
860			else
861				ret = nat_extraflush(arg);
862		}
863
864		if (getlock) {
865			RWLOCK_EXIT(&ipf_nat);
866		}
867		if (error == 0) {
868			error = BCOPYOUT(&ret, data, sizeof(ret));
869		}
870		break;
871
872	case SIOCPROXY :
873		error = appr_ioctl(data, cmd, mode, ctx);
874		break;
875
876	case SIOCSTLCK :
877		if (!(mode & FWRITE)) {
878			error = EPERM;
879		} else {
880			error = fr_lock(data, &fr_nat_lock);
881		}
882		break;
883
884	case SIOCSTPUT :
885		if ((mode & FWRITE) != 0) {
886			error = fr_natputent(data, getlock);
887		} else {
888			error = EACCES;
889		}
890		break;
891
892	case SIOCSTGSZ :
893		if (fr_nat_lock) {
894			error = fr_natgetsz(data, getlock);
895		} else
896			error = EACCES;
897		break;
898
899	case SIOCSTGET :
900		if (fr_nat_lock) {
901			error = fr_natgetent(data, getlock);
902		} else
903			error = EACCES;
904		break;
905
906	case SIOCGENITER :
907	    {
908		ipfgeniter_t iter;
909		ipftoken_t *token;
910
911		SPL_SCHED(s);
912		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
913		if (error == 0) {
914			token = ipf_findtoken(iter.igi_type, uid, ctx);
915			if (token != NULL) {
916				error  = nat_iterator(token, &iter);
917			}
918			RWLOCK_EXIT(&ipf_tokens);
919		}
920		SPL_X(s);
921		break;
922	    }
923
924	case SIOCIPFDELTOK :
925		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
926		if (error == 0) {
927			SPL_SCHED(s);
928			error = ipf_deltoken(arg, uid, ctx);
929			SPL_X(s);
930		} else {
931			error = EFAULT;
932		}
933		break;
934
935	case SIOCGTQTAB :
936		error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
937		break;
938
939	case SIOCGTABL :
940		error = nat_gettable(data);
941		break;
942
943	default :
944		error = EINVAL;
945		break;
946	}
947done:
948	if (nt != NULL)
949		KFREE(nt);
950	return error;
951}
952
953
954/* ------------------------------------------------------------------------ */
955/* Function:    nat_siocaddnat                                              */
956/* Returns:     int - 0 == success, != 0 == failure                         */
957/* Parameters:  n(I)       - pointer to new NAT rule                        */
958/*              np(I)      - pointer to where to insert new NAT rule        */
959/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
960/* Mutex Locks: ipf_natio                                                   */
961/*                                                                          */
962/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
963/* from information passed to the kernel, then add it  to the appropriate   */
964/* NAT rule table(s).                                                       */
965/* ------------------------------------------------------------------------ */
966static int nat_siocaddnat(n, np, getlock)
967ipnat_t *n, **np;
968int getlock;
969{
970	int error = 0, i, j;
971
972	if (nat_resolverule(n) != 0)
973		return ENOENT;
974
975	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
976		return EINVAL;
977
978	n->in_use = 0;
979	if (n->in_redir & NAT_MAPBLK)
980		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
981	else if (n->in_flags & IPN_AUTOPORTMAP)
982		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
983	else if (n->in_flags & IPN_IPRANGE)
984		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
985	else if (n->in_flags & IPN_SPLIT)
986		n->in_space = 2;
987	else if (n->in_outmsk != 0)
988		n->in_space = ~ntohl(n->in_outmsk);
989	else
990		n->in_space = 1;
991
992	/*
993	 * Calculate the number of valid IP addresses in the output
994	 * mapping range.  In all cases, the range is inclusive of
995	 * the start and ending IP addresses.
996	 * If to a CIDR address, lose 2: broadcast + network address
997	 *                               (so subtract 1)
998	 * If to a range, add one.
999	 * If to a single IP address, set to 1.
1000	 */
1001	if (n->in_space) {
1002		if ((n->in_flags & IPN_IPRANGE) != 0)
1003			n->in_space += 1;
1004		else
1005			n->in_space -= 1;
1006	} else
1007		n->in_space = 1;
1008
1009	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1010	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1011		n->in_nip = ntohl(n->in_outip) + 1;
1012	else if ((n->in_flags & IPN_SPLIT) &&
1013		 (n->in_redir & NAT_REDIRECT))
1014		n->in_nip = ntohl(n->in_inip);
1015	else
1016		n->in_nip = ntohl(n->in_outip);
1017	if (n->in_redir & NAT_MAP) {
1018		n->in_pnext = ntohs(n->in_pmin);
1019		/*
1020		 * Multiply by the number of ports made available.
1021		 */
1022		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1023			n->in_space *= (ntohs(n->in_pmax) -
1024					ntohs(n->in_pmin) + 1);
1025			/*
1026			 * Because two different sources can map to
1027			 * different destinations but use the same
1028			 * local IP#/port #.
1029			 * If the result is smaller than in_space, then
1030			 * we may have wrapped around 32bits.
1031			 */
1032			i = n->in_inmsk;
1033			if ((i != 0) && (i != 0xffffffff)) {
1034				j = n->in_space * (~ntohl(i) + 1);
1035				if (j >= n->in_space)
1036					n->in_space = j;
1037				else
1038					n->in_space = 0xffffffff;
1039			}
1040		}
1041		/*
1042		 * If no protocol is specified, multiple by 256 to allow for
1043		 * at least one IP:IP mapping per protocol.
1044		 */
1045		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1046				j = n->in_space * 256;
1047				if (j >= n->in_space)
1048					n->in_space = j;
1049				else
1050					n->in_space = 0xffffffff;
1051		}
1052	}
1053
1054	/* Otherwise, these fields are preset */
1055
1056	if (getlock) {
1057		WRITE_ENTER(&ipf_nat);
1058	}
1059	n->in_next = NULL;
1060	*np = n;
1061
1062	if (n->in_age[0] != 0)
1063		n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1064
1065	if (n->in_age[1] != 0)
1066		n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1067
1068	if (n->in_redir & NAT_REDIRECT) {
1069		n->in_flags &= ~IPN_NOTDST;
1070		nat_addrdr(n);
1071	}
1072	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1073		n->in_flags &= ~IPN_NOTSRC;
1074		nat_addnat(n);
1075	}
1076	MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1077
1078	n = NULL;
1079	nat_stats.ns_rules++;
1080#if SOLARIS && !defined(_INET_IP_STACK_H)
1081	pfil_delayed_copy = 0;
1082#endif
1083	if (getlock) {
1084		RWLOCK_EXIT(&ipf_nat);			/* WRITE */
1085	}
1086
1087	return error;
1088}
1089
1090
1091/* ------------------------------------------------------------------------ */
1092/* Function:    nat_resolvrule                                              */
1093/* Returns:     Nil                                                         */
1094/* Parameters:  n(I)  - pointer to NAT rule                                 */
1095/*                                                                          */
1096/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1097/* from information passed to the kernel, then add it  to the appropriate   */
1098/* NAT rule table(s).                                                       */
1099/* ------------------------------------------------------------------------ */
1100static int nat_resolverule(n)
1101ipnat_t *n;
1102{
1103	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1104	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1105
1106	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1107	if (n->in_ifnames[1][0] == '\0') {
1108		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1109		n->in_ifps[1] = n->in_ifps[0];
1110	} else {
1111		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1112	}
1113
1114	if (n->in_plabel[0] != '\0') {
1115		n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1116		if (n->in_apr == NULL)
1117			return -1;
1118	}
1119	return 0;
1120}
1121
1122
1123/* ------------------------------------------------------------------------ */
1124/* Function:    nat_siocdelnat                                              */
1125/* Returns:     int - 0 == success, != 0 == failure                         */
1126/* Parameters:  n(I)       - pointer to new NAT rule                        */
1127/*              np(I)      - pointer to where to insert new NAT rule        */
1128/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1129/* Mutex Locks: ipf_natio                                                   */
1130/*                                                                          */
1131/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1132/* from information passed to the kernel, then add it  to the appropriate   */
1133/* NAT rule table(s).                                                       */
1134/* ------------------------------------------------------------------------ */
1135static void nat_siocdelnat(n, np, getlock)
1136ipnat_t *n, **np;
1137int getlock;
1138{
1139	if (getlock) {
1140		WRITE_ENTER(&ipf_nat);
1141	}
1142	if (n->in_redir & NAT_REDIRECT)
1143		nat_delrdr(n);
1144	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1145		nat_delnat(n);
1146	if (nat_list == NULL) {
1147		nat_masks = 0;
1148		rdr_masks = 0;
1149	}
1150
1151	if (n->in_tqehead[0] != NULL) {
1152		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1153			fr_freetimeoutqueue(n->in_tqehead[1]);
1154		}
1155	}
1156
1157	if (n->in_tqehead[1] != NULL) {
1158		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1159			fr_freetimeoutqueue(n->in_tqehead[1]);
1160		}
1161	}
1162
1163	*np = n->in_next;
1164
1165	if (n->in_use == 0) {
1166		if (n->in_apr)
1167			appr_free(n->in_apr);
1168		MUTEX_DESTROY(&n->in_lock);
1169		KFREE(n);
1170		nat_stats.ns_rules--;
1171#if SOLARIS && !defined(_INET_IP_STACK_H)
1172		if (nat_stats.ns_rules == 0)
1173			pfil_delayed_copy = 1;
1174#endif
1175	} else {
1176		n->in_flags |= IPN_DELETE;
1177		n->in_next = NULL;
1178	}
1179	if (getlock) {
1180		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1181	}
1182}
1183
1184
1185/* ------------------------------------------------------------------------ */
1186/* Function:    fr_natgetsz                                                 */
1187/* Returns:     int - 0 == success, != 0 is the error value.                */
1188/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1189/*                        get the size of.                                  */
1190/*                                                                          */
1191/* Handle SIOCSTGSZ.                                                        */
1192/* Return the size of the nat list entry to be copied back to user space.   */
1193/* The size of the entry is stored in the ng_sz field and the enture natget */
1194/* structure is copied back to the user.                                    */
1195/* ------------------------------------------------------------------------ */
1196static int fr_natgetsz(data, getlock)
1197caddr_t data;
1198int getlock;
1199{
1200	ap_session_t *aps;
1201	nat_t *nat, *n;
1202	natget_t ng;
1203
1204	if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1205		return EFAULT;
1206
1207	if (getlock) {
1208		READ_ENTER(&ipf_nat);
1209	}
1210
1211	nat = ng.ng_ptr;
1212	if (!nat) {
1213		nat = nat_instances;
1214		ng.ng_sz = 0;
1215		/*
1216		 * Empty list so the size returned is 0.  Simple.
1217		 */
1218		if (nat == NULL) {
1219			if (getlock) {
1220				RWLOCK_EXIT(&ipf_nat);
1221			}
1222			if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1223				return EFAULT;
1224			return 0;
1225		}
1226	} else {
1227		/*
1228		 * Make sure the pointer we're copying from exists in the
1229		 * current list of entries.  Security precaution to prevent
1230		 * copying of random kernel data.
1231		 */
1232		for (n = nat_instances; n; n = n->nat_next)
1233			if (n == nat)
1234				break;
1235		if (n == NULL) {
1236			if (getlock) {
1237				RWLOCK_EXIT(&ipf_nat);
1238			}
1239			return ESRCH;
1240		}
1241	}
1242
1243	/*
1244	 * Incluse any space required for proxy data structures.
1245	 */
1246	ng.ng_sz = sizeof(nat_save_t);
1247	aps = nat->nat_aps;
1248	if (aps != NULL) {
1249		ng.ng_sz += sizeof(ap_session_t) - 4;
1250		if (aps->aps_data != 0)
1251			ng.ng_sz += aps->aps_psiz;
1252	}
1253	if (getlock) {
1254		RWLOCK_EXIT(&ipf_nat);
1255	}
1256
1257	if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1258		return EFAULT;
1259	return 0;
1260}
1261
1262
1263/* ------------------------------------------------------------------------ */
1264/* Function:    fr_natgetent                                                */
1265/* Returns:     int - 0 == success, != 0 is the error value.                */
1266/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1267/*                        to NAT structure to copy out.                     */
1268/*                                                                          */
1269/* Handle SIOCSTGET.                                                        */
1270/* Copies out NAT entry to user space.  Any additional data held for a      */
1271/* proxy is also copied, as to is the NAT rule which was responsible for it */
1272/* ------------------------------------------------------------------------ */
1273static int fr_natgetent(data, getlock)
1274caddr_t data;
1275int getlock;
1276{
1277	int error, outsize;
1278	ap_session_t *aps;
1279	nat_save_t *ipn, ipns;
1280	nat_t *n, *nat;
1281
1282	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1283	if (error != 0)
1284		return error;
1285
1286	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1287		return EINVAL;
1288
1289	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1290	if (ipn == NULL)
1291		return ENOMEM;
1292
1293	if (getlock) {
1294		READ_ENTER(&ipf_nat);
1295	}
1296
1297	ipn->ipn_dsize = ipns.ipn_dsize;
1298	nat = ipns.ipn_next;
1299	if (nat == NULL) {
1300		nat = nat_instances;
1301		if (nat == NULL) {
1302			if (nat_instances == NULL)
1303				error = ENOENT;
1304			goto finished;
1305		}
1306	} else {
1307		/*
1308		 * Make sure the pointer we're copying from exists in the
1309		 * current list of entries.  Security precaution to prevent
1310		 * copying of random kernel data.
1311		 */
1312		for (n = nat_instances; n; n = n->nat_next)
1313			if (n == nat)
1314				break;
1315		if (n == NULL) {
1316			error = ESRCH;
1317			goto finished;
1318		}
1319	}
1320	ipn->ipn_next = nat->nat_next;
1321
1322	/*
1323	 * Copy the NAT structure.
1324	 */
1325	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1326
1327	/*
1328	 * If we have a pointer to the NAT rule it belongs to, save that too.
1329	 */
1330	if (nat->nat_ptr != NULL)
1331		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1332		      sizeof(ipn->ipn_ipnat));
1333
1334	/*
1335	 * If we also know the NAT entry has an associated filter rule,
1336	 * save that too.
1337	 */
1338	if (nat->nat_fr != NULL)
1339		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1340		      sizeof(ipn->ipn_fr));
1341
1342	/*
1343	 * Last but not least, if there is an application proxy session set
1344	 * up for this NAT entry, then copy that out too, including any
1345	 * private data saved along side it by the proxy.
1346	 */
1347	aps = nat->nat_aps;
1348	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1349	if (aps != NULL) {
1350		char *s;
1351
1352		if (outsize < sizeof(*aps)) {
1353			error = ENOBUFS;
1354			goto finished;
1355		}
1356
1357		s = ipn->ipn_data;
1358		bcopy((char *)aps, s, sizeof(*aps));
1359		s += sizeof(*aps);
1360		outsize -= sizeof(*aps);
1361		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1362			bcopy(aps->aps_data, s, aps->aps_psiz);
1363		else
1364			error = ENOBUFS;
1365	}
1366	if (error == 0) {
1367		if (getlock) {
1368			RWLOCK_EXIT(&ipf_nat);
1369			getlock = 0;
1370		}
1371		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1372	}
1373
1374finished:
1375	if (getlock) {
1376		RWLOCK_EXIT(&ipf_nat);
1377	}
1378	if (ipn != NULL) {
1379		KFREES(ipn, ipns.ipn_dsize);
1380	}
1381	return error;
1382}
1383
1384
1385/* ------------------------------------------------------------------------ */
1386/* Function:    fr_natputent                                                */
1387/* Returns:     int - 0 == success, != 0 is the error value.                */
1388/* Parameters:  data(I) -     pointer to natget structure with NAT          */
1389/*                            structure information to load into the kernel */
1390/*              getlock(I) - flag indicating whether or not a write lock    */
1391/*                           on ipf_nat is already held.                    */
1392/*                                                                          */
1393/* Handle SIOCSTPUT.                                                        */
1394/* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1395/* firewall rule data structures, if pointers to them indicate so.          */
1396/* ------------------------------------------------------------------------ */
1397static int fr_natputent(data, getlock)
1398caddr_t data;
1399int getlock;
1400{
1401	nat_save_t ipn, *ipnn;
1402	ap_session_t *aps;
1403	nat_t *n, *nat;
1404	frentry_t *fr;
1405	fr_info_t fin;
1406	ipnat_t *in;
1407	int error;
1408
1409	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1410	if (error != 0)
1411		return error;
1412
1413	/*
1414	 * Initialise early because of code at junkput label.
1415	 */
1416	in = NULL;
1417	aps = NULL;
1418	nat = NULL;
1419	ipnn = NULL;
1420	fr = NULL;
1421
1422	/*
1423	 * New entry, copy in the rest of the NAT entry if it's size is more
1424	 * than just the nat_t structure.
1425	 */
1426	if (ipn.ipn_dsize > sizeof(ipn)) {
1427		if (ipn.ipn_dsize > 81920) {
1428			error = ENOMEM;
1429			goto junkput;
1430		}
1431
1432		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1433		if (ipnn == NULL)
1434			return ENOMEM;
1435
1436		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1437		if (error != 0) {
1438			error = EFAULT;
1439			goto junkput;
1440		}
1441	} else
1442		ipnn = &ipn;
1443
1444	KMALLOC(nat, nat_t *);
1445	if (nat == NULL) {
1446		error = ENOMEM;
1447		goto junkput;
1448	}
1449
1450	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1451	/*
1452	 * Initialize all these so that nat_delete() doesn't cause a crash.
1453	 */
1454	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1455	nat->nat_tqe.tqe_pnext = NULL;
1456	nat->nat_tqe.tqe_next = NULL;
1457	nat->nat_tqe.tqe_ifq = NULL;
1458	nat->nat_tqe.tqe_parent = nat;
1459
1460	/*
1461	 * Restore the rule associated with this nat session
1462	 */
1463	in = ipnn->ipn_nat.nat_ptr;
1464	if (in != NULL) {
1465		KMALLOC(in, ipnat_t *);
1466		nat->nat_ptr = in;
1467		if (in == NULL) {
1468			error = ENOMEM;
1469			goto junkput;
1470		}
1471		bzero((char *)in, offsetof(struct ipnat, in_next6));
1472		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1473		in->in_use = 1;
1474		in->in_flags |= IPN_DELETE;
1475
1476		ATOMIC_INC(nat_stats.ns_rules);
1477
1478		if (nat_resolverule(in) != 0) {
1479			error = ESRCH;
1480			goto junkput;
1481		}
1482	}
1483
1484	/*
1485	 * Check that the NAT entry doesn't already exist in the kernel.
1486	 *
1487	 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1488	 * this, we check to see if the inbound combination of addresses and
1489	 * ports is already known.  Similar logic is applied for NAT_INBOUND.
1490	 *
1491	 */
1492	bzero((char *)&fin, sizeof(fin));
1493	fin.fin_p = nat->nat_p;
1494	if (nat->nat_dir == NAT_OUTBOUND) {
1495		fin.fin_ifp = nat->nat_ifps[0];
1496		fin.fin_data[0] = ntohs(nat->nat_oport);
1497		fin.fin_data[1] = ntohs(nat->nat_outport);
1498		if (getlock) {
1499			READ_ENTER(&ipf_nat);
1500		}
1501		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1502				 nat->nat_oip, nat->nat_inip);
1503		if (getlock) {
1504			RWLOCK_EXIT(&ipf_nat);
1505		}
1506		if (n != NULL) {
1507			error = EEXIST;
1508			goto junkput;
1509		}
1510	} else if (nat->nat_dir == NAT_INBOUND) {
1511		fin.fin_ifp = nat->nat_ifps[0];
1512		fin.fin_data[0] = ntohs(nat->nat_outport);
1513		fin.fin_data[1] = ntohs(nat->nat_oport);
1514		if (getlock) {
1515			READ_ENTER(&ipf_nat);
1516		}
1517		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1518				  nat->nat_outip, nat->nat_oip);
1519		if (getlock) {
1520			RWLOCK_EXIT(&ipf_nat);
1521		}
1522		if (n != NULL) {
1523			error = EEXIST;
1524			goto junkput;
1525		}
1526	} else {
1527		error = EINVAL;
1528		goto junkput;
1529	}
1530
1531	/*
1532	 * Restore ap_session_t structure.  Include the private data allocated
1533	 * if it was there.
1534	 */
1535	aps = nat->nat_aps;
1536	if (aps != NULL) {
1537		KMALLOC(aps, ap_session_t *);
1538		nat->nat_aps = aps;
1539		if (aps == NULL) {
1540			error = ENOMEM;
1541			goto junkput;
1542		}
1543		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1544		if (in != NULL)
1545			aps->aps_apr = in->in_apr;
1546		else
1547			aps->aps_apr = NULL;
1548		if (aps->aps_psiz != 0) {
1549			if (aps->aps_psiz > 81920) {
1550				error = ENOMEM;
1551				goto junkput;
1552			}
1553			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1554			if (aps->aps_data == NULL) {
1555				error = ENOMEM;
1556				goto junkput;
1557			}
1558			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1559			      aps->aps_psiz);
1560		} else {
1561			aps->aps_psiz = 0;
1562			aps->aps_data = NULL;
1563		}
1564	}
1565
1566	/*
1567	 * If there was a filtering rule associated with this entry then
1568	 * build up a new one.
1569	 */
1570	fr = nat->nat_fr;
1571	if (fr != NULL) {
1572		if ((nat->nat_flags & SI_NEWFR) != 0) {
1573			KMALLOC(fr, frentry_t *);
1574			nat->nat_fr = fr;
1575			if (fr == NULL) {
1576				error = ENOMEM;
1577				goto junkput;
1578			}
1579			ipnn->ipn_nat.nat_fr = fr;
1580			fr->fr_ref = 1;
1581			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1582			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1583
1584			fr->fr_ref = 1;
1585			fr->fr_dsize = 0;
1586			fr->fr_data = NULL;
1587			fr->fr_type = FR_T_NONE;
1588
1589			MUTEX_NUKE(&fr->fr_lock);
1590			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1591		} else {
1592			if (getlock) {
1593				READ_ENTER(&ipf_nat);
1594			}
1595			for (n = nat_instances; n; n = n->nat_next)
1596				if (n->nat_fr == fr)
1597					break;
1598
1599			if (n != NULL) {
1600				MUTEX_ENTER(&fr->fr_lock);
1601				fr->fr_ref++;
1602				MUTEX_EXIT(&fr->fr_lock);
1603			}
1604			if (getlock) {
1605				RWLOCK_EXIT(&ipf_nat);
1606			}
1607
1608			if (!n) {
1609				error = ESRCH;
1610				goto junkput;
1611			}
1612		}
1613	}
1614
1615	if (ipnn != &ipn) {
1616		KFREES(ipnn, ipn.ipn_dsize);
1617		ipnn = NULL;
1618	}
1619
1620	if (getlock) {
1621		WRITE_ENTER(&ipf_nat);
1622	}
1623	error = nat_insert(nat, nat->nat_rev);
1624	if ((error == 0) && (aps != NULL)) {
1625		aps->aps_next = ap_sess_list;
1626		ap_sess_list = aps;
1627	}
1628	if (getlock) {
1629		RWLOCK_EXIT(&ipf_nat);
1630	}
1631
1632	if (error == 0)
1633		return 0;
1634
1635	error = ENOMEM;
1636
1637junkput:
1638	if (fr != NULL)
1639		(void) fr_derefrule(&fr);
1640
1641	if ((ipnn != NULL) && (ipnn != &ipn)) {
1642		KFREES(ipnn, ipn.ipn_dsize);
1643	}
1644	if (nat != NULL) {
1645		if (aps != NULL) {
1646			if (aps->aps_data != NULL) {
1647				KFREES(aps->aps_data, aps->aps_psiz);
1648			}
1649			KFREE(aps);
1650		}
1651		if (in != NULL) {
1652			if (in->in_apr)
1653				appr_free(in->in_apr);
1654			KFREE(in);
1655		}
1656		KFREE(nat);
1657	}
1658	return error;
1659}
1660
1661
1662/* ------------------------------------------------------------------------ */
1663/* Function:    nat_delete                                                  */
1664/* Returns:     Nil                                                         */
1665/* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1666/*              logtype(I) - type of LOG record to create before deleting   */
1667/* Write Lock:  ipf_nat                                                     */
1668/*                                                                          */
1669/* Delete a nat entry from the various lists and table.  If NAT logging is  */
1670/* enabled then generate a NAT log record for this event.                   */
1671/* ------------------------------------------------------------------------ */
1672void nat_delete(nat, logtype)
1673struct nat *nat;
1674int logtype;
1675{
1676	struct ipnat *ipn;
1677	int removed = 0;
1678
1679	if (logtype != 0 && nat_logging != 0)
1680		nat_log(nat, logtype);
1681
1682	/*
1683	 * Take it as a general indication that all the pointers are set if
1684	 * nat_pnext is set.
1685	 */
1686	if (nat->nat_pnext != NULL) {
1687		removed = 1;
1688
1689		nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1690		nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1691
1692		*nat->nat_pnext = nat->nat_next;
1693		if (nat->nat_next != NULL) {
1694			nat->nat_next->nat_pnext = nat->nat_pnext;
1695			nat->nat_next = NULL;
1696		}
1697		nat->nat_pnext = NULL;
1698
1699		*nat->nat_phnext[0] = nat->nat_hnext[0];
1700		if (nat->nat_hnext[0] != NULL) {
1701			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1702			nat->nat_hnext[0] = NULL;
1703		}
1704		nat->nat_phnext[0] = NULL;
1705
1706		*nat->nat_phnext[1] = nat->nat_hnext[1];
1707		if (nat->nat_hnext[1] != NULL) {
1708			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1709			nat->nat_hnext[1] = NULL;
1710		}
1711		nat->nat_phnext[1] = NULL;
1712
1713		if ((nat->nat_flags & SI_WILDP) != 0)
1714			nat_stats.ns_wilds--;
1715	}
1716
1717	if (nat->nat_me != NULL) {
1718		*nat->nat_me = NULL;
1719		nat->nat_me = NULL;
1720	}
1721
1722	if (nat->nat_tqe.tqe_ifq != NULL)
1723		fr_deletequeueentry(&nat->nat_tqe);
1724
1725	if (logtype == NL_EXPIRE)
1726		nat_stats.ns_expire++;
1727
1728	MUTEX_ENTER(&nat->nat_lock);
1729	/*
1730	 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1731	 * This happens when a nat'd packet is blocked and we want to throw
1732	 * away the NAT session.
1733	 */
1734	if (logtype == NL_DESTROY) {
1735		if (nat->nat_ref > 2) {
1736			nat->nat_ref -= 2;
1737			MUTEX_EXIT(&nat->nat_lock);
1738			if (removed)
1739				nat_stats.ns_orphans++;
1740			return;
1741		}
1742	} else if (nat->nat_ref > 1) {
1743		nat->nat_ref--;
1744		MUTEX_EXIT(&nat->nat_lock);
1745		if (removed)
1746			nat_stats.ns_orphans++;
1747		return;
1748	}
1749	MUTEX_EXIT(&nat->nat_lock);
1750
1751	/*
1752	 * At this point, nat_ref is 1, doing "--" would make it 0..
1753	 */
1754	nat->nat_ref = 0;
1755	if (!removed)
1756		nat_stats.ns_orphans--;
1757
1758#ifdef	IPFILTER_SYNC
1759	if (nat->nat_sync)
1760		ipfsync_del(nat->nat_sync);
1761#endif
1762
1763	if (nat->nat_fr != NULL)
1764		(void) fr_derefrule(&nat->nat_fr);
1765
1766	if (nat->nat_hm != NULL)
1767		fr_hostmapdel(&nat->nat_hm);
1768
1769	/*
1770	 * If there is an active reference from the nat entry to its parent
1771	 * rule, decrement the rule's reference count and free it too if no
1772	 * longer being used.
1773	 */
1774	ipn = nat->nat_ptr;
1775	if (ipn != NULL) {
1776		fr_ipnatderef(&ipn);
1777	}
1778
1779	MUTEX_DESTROY(&nat->nat_lock);
1780
1781	aps_free(nat->nat_aps);
1782	nat_stats.ns_inuse--;
1783
1784	/*
1785	 * If there's a fragment table entry too for this nat entry, then
1786	 * dereference that as well.  This is after nat_lock is released
1787	 * because of Tru64.
1788	 */
1789	fr_forgetnat((void *)nat);
1790
1791	KFREE(nat);
1792}
1793
1794
1795/* ------------------------------------------------------------------------ */
1796/* Function:    nat_flushtable                                              */
1797/* Returns:     int - number of NAT rules deleted                           */
1798/* Parameters:  Nil                                                         */
1799/*                                                                          */
1800/* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1801/* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1802/* ------------------------------------------------------------------------ */
1803/*
1804 * nat_flushtable - clear the NAT table of all mapping entries.
1805 */
1806static int nat_flushtable()
1807{
1808	nat_t *nat;
1809	int j = 0;
1810
1811	/*
1812	 * ALL NAT mappings deleted, so lets just make the deletions
1813	 * quicker.
1814	 */
1815	if (nat_table[0] != NULL)
1816		bzero((char *)nat_table[0],
1817		      sizeof(nat_table[0]) * ipf_nattable_sz);
1818	if (nat_table[1] != NULL)
1819		bzero((char *)nat_table[1],
1820		      sizeof(nat_table[1]) * ipf_nattable_sz);
1821
1822	while ((nat = nat_instances) != NULL) {
1823		nat_delete(nat, NL_FLUSH);
1824		j++;
1825	}
1826
1827	nat_stats.ns_inuse = 0;
1828	return j;
1829}
1830
1831
1832/* ------------------------------------------------------------------------ */
1833/* Function:    nat_clearlist                                               */
1834/* Returns:     int - number of NAT/RDR rules deleted                       */
1835/* Parameters:  Nil                                                         */
1836/*                                                                          */
1837/* Delete all rules in the current list of rules.  There is nothing elegant */
1838/* about this cleanup: simply free all entries on the list of rules and     */
1839/* clear out the tables used for hashed NAT rule lookups.                   */
1840/* ------------------------------------------------------------------------ */
1841static int nat_clearlist()
1842{
1843	ipnat_t *n, **np = &nat_list;
1844	int i = 0;
1845
1846	if (nat_rules != NULL)
1847		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1848	if (rdr_rules != NULL)
1849		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1850
1851	while ((n = *np) != NULL) {
1852		*np = n->in_next;
1853		if (n->in_use == 0) {
1854			if (n->in_apr != NULL)
1855				appr_free(n->in_apr);
1856			MUTEX_DESTROY(&n->in_lock);
1857			KFREE(n);
1858			nat_stats.ns_rules--;
1859		} else {
1860			n->in_flags |= IPN_DELETE;
1861			n->in_next = NULL;
1862		}
1863		i++;
1864	}
1865#if SOLARIS && !defined(_INET_IP_STACK_H)
1866	pfil_delayed_copy = 1;
1867#endif
1868	nat_masks = 0;
1869	rdr_masks = 0;
1870	return i;
1871}
1872
1873
1874/* ------------------------------------------------------------------------ */
1875/* Function:    nat_newmap                                                  */
1876/* Returns:     int - -1 == error, 0 == success                             */
1877/* Parameters:  fin(I) - pointer to packet information                      */
1878/*              nat(I) - pointer to NAT entry                               */
1879/*              ni(I)  - pointer to structure with misc. information needed */
1880/*                       to create new NAT entry.                           */
1881/*                                                                          */
1882/* Given an empty NAT structure, populate it with new information about a   */
1883/* new NAT session, as defined by the matching NAT rule.                    */
1884/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1885/* to the new IP address for the translation.                               */
1886/* ------------------------------------------------------------------------ */
1887static INLINE int nat_newmap(fin, nat, ni)
1888fr_info_t *fin;
1889nat_t *nat;
1890natinfo_t *ni;
1891{
1892	u_short st_port, dport, sport, port, sp, dp;
1893	struct in_addr in, inb;
1894	hostmap_t *hm;
1895	u_32_t flags;
1896	u_32_t st_ip;
1897	ipnat_t *np;
1898	nat_t *natl;
1899	int l;
1900
1901	/*
1902	 * If it's an outbound packet which doesn't match any existing
1903	 * record, then create a new port
1904	 */
1905	l = 0;
1906	hm = NULL;
1907	np = ni->nai_np;
1908	st_ip = np->in_nip;
1909	st_port = np->in_pnext;
1910	flags = ni->nai_flags;
1911	sport = ni->nai_sport;
1912	dport = ni->nai_dport;
1913
1914	/*
1915	 * Do a loop until we either run out of entries to try or we find
1916	 * a NAT mapping that isn't currently being used.  This is done
1917	 * because the change to the source is not (usually) being fixed.
1918	 */
1919	do {
1920		port = 0;
1921		in.s_addr = htonl(np->in_nip);
1922		if (l == 0) {
1923			/*
1924			 * Check to see if there is an existing NAT
1925			 * setup for this IP address pair.
1926			 */
1927			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1928					 in, 0);
1929			if (hm != NULL)
1930				in.s_addr = hm->hm_mapip.s_addr;
1931		} else if ((l == 1) && (hm != NULL)) {
1932			fr_hostmapdel(&hm);
1933		}
1934		in.s_addr = ntohl(in.s_addr);
1935
1936		nat->nat_hm = hm;
1937
1938		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1939			if (l > 0)
1940				return -1;
1941		}
1942
1943		if (np->in_redir == NAT_BIMAP &&
1944		    np->in_inmsk == np->in_outmsk) {
1945			/*
1946			 * map the address block in a 1:1 fashion
1947			 */
1948			in.s_addr = np->in_outip;
1949			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1950			in.s_addr = ntohl(in.s_addr);
1951
1952		} else if (np->in_redir & NAT_MAPBLK) {
1953			if ((l >= np->in_ppip) || ((l > 0) &&
1954			     !(flags & IPN_TCPUDP)))
1955				return -1;
1956			/*
1957			 * map-block - Calculate destination address.
1958			 */
1959			in.s_addr = ntohl(fin->fin_saddr);
1960			in.s_addr &= ntohl(~np->in_inmsk);
1961			inb.s_addr = in.s_addr;
1962			in.s_addr /= np->in_ippip;
1963			in.s_addr &= ntohl(~np->in_outmsk);
1964			in.s_addr += ntohl(np->in_outip);
1965			/*
1966			 * Calculate destination port.
1967			 */
1968			if ((flags & IPN_TCPUDP) &&
1969			    (np->in_ppip != 0)) {
1970				port = ntohs(sport) + l;
1971				port %= np->in_ppip;
1972				port += np->in_ppip *
1973					(inb.s_addr % np->in_ippip);
1974				port += MAPBLK_MINPORT;
1975				port = htons(port);
1976			}
1977
1978		} else if ((np->in_outip == 0) &&
1979			   (np->in_outmsk == 0xffffffff)) {
1980			/*
1981			 * 0/32 - use the interface's IP address.
1982			 */
1983			if ((l > 0) ||
1984			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1985				       &in, NULL) == -1)
1986				return -1;
1987			in.s_addr = ntohl(in.s_addr);
1988
1989		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1990			/*
1991			 * 0/0 - use the original source address/port.
1992			 */
1993			if (l > 0)
1994				return -1;
1995			in.s_addr = ntohl(fin->fin_saddr);
1996
1997		} else if ((np->in_outmsk != 0xffffffff) &&
1998			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1999			np->in_nip++;
2000
2001		natl = NULL;
2002
2003		if ((flags & IPN_TCPUDP) &&
2004		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2005		    (np->in_flags & IPN_AUTOPORTMAP)) {
2006			/*
2007			 * "ports auto" (without map-block)
2008			 */
2009			if ((l > 0) && (l % np->in_ppip == 0)) {
2010				if (l > np->in_space) {
2011					return -1;
2012				} else if ((l > np->in_ppip) &&
2013					   np->in_outmsk != 0xffffffff)
2014					np->in_nip++;
2015			}
2016			if (np->in_ppip != 0) {
2017				port = ntohs(sport);
2018				port += (l % np->in_ppip);
2019				port %= np->in_ppip;
2020				port += np->in_ppip *
2021					(ntohl(fin->fin_saddr) %
2022					 np->in_ippip);
2023				port += MAPBLK_MINPORT;
2024				port = htons(port);
2025			}
2026
2027		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2028			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2029			/*
2030			 * Standard port translation.  Select next port.
2031			 */
2032			port = htons(np->in_pnext++);
2033
2034			if (np->in_pnext > ntohs(np->in_pmax)) {
2035				np->in_pnext = ntohs(np->in_pmin);
2036				if (np->in_outmsk != 0xffffffff)
2037					np->in_nip++;
2038			}
2039		}
2040
2041		if (np->in_flags & IPN_IPRANGE) {
2042			if (np->in_nip > ntohl(np->in_outmsk))
2043				np->in_nip = ntohl(np->in_outip);
2044		} else {
2045			if ((np->in_outmsk != 0xffffffff) &&
2046			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2047			    ntohl(np->in_outip))
2048				np->in_nip = ntohl(np->in_outip) + 1;
2049		}
2050
2051		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2052			port = sport;
2053
2054		/*
2055		 * Here we do a lookup of the connection as seen from
2056		 * the outside.  If an IP# pair already exists, try
2057		 * again.  So if you have A->B becomes C->B, you can
2058		 * also have D->E become C->E but not D->B causing
2059		 * another C->B.  Also take protocol and ports into
2060		 * account when determining whether a pre-existing
2061		 * NAT setup will cause an external conflict where
2062		 * this is appropriate.
2063		 */
2064		inb.s_addr = htonl(in.s_addr);
2065		sp = fin->fin_data[0];
2066		dp = fin->fin_data[1];
2067		fin->fin_data[0] = fin->fin_data[1];
2068		fin->fin_data[1] = htons(port);
2069		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2070				    (u_int)fin->fin_p, fin->fin_dst, inb);
2071		fin->fin_data[0] = sp;
2072		fin->fin_data[1] = dp;
2073
2074		/*
2075		 * Has the search wrapped around and come back to the
2076		 * start ?
2077		 */
2078		if ((natl != NULL) &&
2079		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2080		    (np->in_nip != 0) && (st_ip == np->in_nip))
2081			return -1;
2082		l++;
2083	} while (natl != NULL);
2084
2085	if (np->in_space > 0)
2086		np->in_space--;
2087
2088	/* Setup the NAT table */
2089	nat->nat_inip = fin->fin_src;
2090	nat->nat_outip.s_addr = htonl(in.s_addr);
2091	nat->nat_oip = fin->fin_dst;
2092	if (nat->nat_hm == NULL)
2093		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2094					  nat->nat_outip, 0);
2095
2096	/*
2097	 * The ICMP checksum does not have a pseudo header containing
2098	 * the IP addresses
2099	 */
2100	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2101	ni->nai_sum2 = LONG_SUM(in.s_addr);
2102	if ((flags & IPN_TCPUDP)) {
2103		ni->nai_sum1 += ntohs(sport);
2104		ni->nai_sum2 += ntohs(port);
2105	}
2106
2107	if (flags & IPN_TCPUDP) {
2108		nat->nat_inport = sport;
2109		nat->nat_outport = port;	/* sport */
2110		nat->nat_oport = dport;
2111		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2112	} else if (flags & IPN_ICMPQUERY) {
2113		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2114		nat->nat_inport = port;
2115		nat->nat_outport = port;
2116	} else if (fin->fin_p == IPPROTO_GRE) {
2117#if 0
2118		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2119		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2120			nat->nat_oport = 0;/*fin->fin_data[1];*/
2121			nat->nat_inport = 0;/*fin->fin_data[0];*/
2122			nat->nat_outport = 0;/*fin->fin_data[0];*/
2123			nat->nat_call[0] = fin->fin_data[0];
2124			nat->nat_call[1] = fin->fin_data[0];
2125		}
2126#endif
2127	}
2128	ni->nai_ip.s_addr = in.s_addr;
2129	ni->nai_port = port;
2130	ni->nai_nport = dport;
2131	return 0;
2132}
2133
2134
2135/* ------------------------------------------------------------------------ */
2136/* Function:    nat_newrdr                                                  */
2137/* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2138/*                    allow rule to be moved if IPN_ROUNDR is set.          */
2139/* Parameters:  fin(I) - pointer to packet information                      */
2140/*              nat(I) - pointer to NAT entry                               */
2141/*              ni(I)  - pointer to structure with misc. information needed */
2142/*                       to create new NAT entry.                           */
2143/*                                                                          */
2144/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2145/* to the new IP address for the translation.                               */
2146/* ------------------------------------------------------------------------ */
2147static INLINE int nat_newrdr(fin, nat, ni)
2148fr_info_t *fin;
2149nat_t *nat;
2150natinfo_t *ni;
2151{
2152	u_short nport, dport, sport;
2153	struct in_addr in, inb;
2154	u_short sp, dp;
2155	hostmap_t *hm;
2156	u_32_t flags;
2157	ipnat_t *np;
2158	nat_t *natl;
2159	int move;
2160
2161	move = 1;
2162	hm = NULL;
2163	in.s_addr = 0;
2164	np = ni->nai_np;
2165	flags = ni->nai_flags;
2166	sport = ni->nai_sport;
2167	dport = ni->nai_dport;
2168
2169	/*
2170	 * If the matching rule has IPN_STICKY set, then we want to have the
2171	 * same rule kick in as before.  Why would this happen?  If you have
2172	 * a collection of rdr rules with "round-robin sticky", the current
2173	 * packet might match a different one to the previous connection but
2174	 * we want the same destination to be used.
2175	 */
2176	if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2177	    ((np->in_flags & IPN_STICKY) != 0)) {
2178		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2179				 (u_32_t)dport);
2180		if (hm != NULL) {
2181			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2182			np = hm->hm_ipnat;
2183			ni->nai_np = np;
2184			move = 0;
2185		}
2186	}
2187
2188	/*
2189	 * Otherwise, it's an inbound packet. Most likely, we don't
2190	 * want to rewrite source ports and source addresses. Instead,
2191	 * we want to rewrite to a fixed internal address and fixed
2192	 * internal port.
2193	 */
2194	if (np->in_flags & IPN_SPLIT) {
2195		in.s_addr = np->in_nip;
2196
2197		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2198			hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2199					 in, (u_32_t)dport);
2200			if (hm != NULL) {
2201				in.s_addr = hm->hm_mapip.s_addr;
2202				move = 0;
2203			}
2204		}
2205
2206		if (hm == NULL || hm->hm_ref == 1) {
2207			if (np->in_inip == htonl(in.s_addr)) {
2208				np->in_nip = ntohl(np->in_inmsk);
2209				move = 0;
2210			} else {
2211				np->in_nip = ntohl(np->in_inip);
2212			}
2213		}
2214
2215	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2216		/*
2217		 * 0/32 - use the interface's IP address.
2218		 */
2219		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2220			return -1;
2221		in.s_addr = ntohl(in.s_addr);
2222
2223	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2224		/*
2225		 * 0/0 - use the original destination address/port.
2226		 */
2227		in.s_addr = ntohl(fin->fin_daddr);
2228
2229	} else if (np->in_redir == NAT_BIMAP &&
2230		   np->in_inmsk == np->in_outmsk) {
2231		/*
2232		 * map the address block in a 1:1 fashion
2233		 */
2234		in.s_addr = np->in_inip;
2235		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2236		in.s_addr = ntohl(in.s_addr);
2237	} else {
2238		in.s_addr = ntohl(np->in_inip);
2239	}
2240
2241	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2242		nport = dport;
2243	else {
2244		/*
2245		 * Whilst not optimized for the case where
2246		 * pmin == pmax, the gain is not significant.
2247		 */
2248		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2249		    (np->in_pmin != np->in_pmax)) {
2250			nport = ntohs(dport) - ntohs(np->in_pmin) +
2251				ntohs(np->in_pnext);
2252			nport = htons(nport);
2253		} else
2254			nport = np->in_pnext;
2255	}
2256
2257	/*
2258	 * When the redirect-to address is set to 0.0.0.0, just
2259	 * assume a blank `forwarding' of the packet.  We don't
2260	 * setup any translation for this either.
2261	 */
2262	if (in.s_addr == 0) {
2263		if (nport == dport)
2264			return -1;
2265		in.s_addr = ntohl(fin->fin_daddr);
2266	}
2267
2268	/*
2269	 * Check to see if this redirect mapping already exists and if
2270	 * it does, return "failure" (allowing it to be created will just
2271	 * cause one or both of these "connections" to stop working.)
2272	 */
2273	inb.s_addr = htonl(in.s_addr);
2274	sp = fin->fin_data[0];
2275	dp = fin->fin_data[1];
2276	fin->fin_data[1] = fin->fin_data[0];
2277	fin->fin_data[0] = ntohs(nport);
2278	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2279			     (u_int)fin->fin_p, inb, fin->fin_src);
2280	fin->fin_data[0] = sp;
2281	fin->fin_data[1] = dp;
2282	if (natl != NULL)
2283		return -1;
2284
2285	nat->nat_inip.s_addr = htonl(in.s_addr);
2286	nat->nat_outip = fin->fin_dst;
2287	nat->nat_oip = fin->fin_src;
2288	if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2289		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2290					  (u_32_t)dport);
2291
2292	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2293	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2294
2295	ni->nai_ip.s_addr = in.s_addr;
2296	ni->nai_nport = nport;
2297	ni->nai_port = sport;
2298
2299	if (flags & IPN_TCPUDP) {
2300		nat->nat_inport = nport;
2301		nat->nat_outport = dport;
2302		nat->nat_oport = sport;
2303		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2304	} else if (flags & IPN_ICMPQUERY) {
2305		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2306		nat->nat_inport = nport;
2307		nat->nat_outport = nport;
2308	} else if (fin->fin_p == IPPROTO_GRE) {
2309#if 0
2310		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2311		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2312			nat->nat_call[0] = fin->fin_data[0];
2313			nat->nat_call[1] = fin->fin_data[1];
2314			nat->nat_oport = 0; /*fin->fin_data[0];*/
2315			nat->nat_inport = 0; /*fin->fin_data[1];*/
2316			nat->nat_outport = 0; /*fin->fin_data[1];*/
2317		}
2318#endif
2319	}
2320
2321	return move;
2322}
2323
2324/* ------------------------------------------------------------------------ */
2325/* Function:    nat_new                                                     */
2326/* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2327/*                       else pointer to new NAT structure                  */
2328/* Parameters:  fin(I)       - pointer to packet information                */
2329/*              np(I)        - pointer to NAT rule                          */
2330/*              natsave(I)   - pointer to where to store NAT struct pointer */
2331/*              flags(I)     - flags describing the current packet          */
2332/*              direction(I) - direction of packet (in/out)                 */
2333/* Write Lock:  ipf_nat                                                     */
2334/*                                                                          */
2335/* Attempts to create a new NAT entry.  Does not actually change the packet */
2336/* in any way.                                                              */
2337/*                                                                          */
2338/* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2339/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2340/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2341/* and (3) building that structure and putting it into the NAT table(s).    */
2342/*                                                                          */
2343/* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2344/*       as it can result in memory being corrupted.                        */
2345/* ------------------------------------------------------------------------ */
2346nat_t *nat_new(fin, np, natsave, flags, direction)
2347fr_info_t *fin;
2348ipnat_t *np;
2349nat_t **natsave;
2350u_int flags;
2351int direction;
2352{
2353	u_short port = 0, sport = 0, dport = 0, nport = 0;
2354	tcphdr_t *tcp = NULL;
2355	hostmap_t *hm = NULL;
2356	struct in_addr in;
2357	nat_t *nat, *natl;
2358	u_int nflags;
2359	natinfo_t ni;
2360	u_32_t sumd;
2361	int move;
2362#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2363	qpktinfo_t *qpi = fin->fin_qpi;
2364#endif
2365
2366	if (nat_stats.ns_inuse >= ipf_nattable_max) {
2367		nat_stats.ns_memfail++;
2368		fr_nat_doflush = 1;
2369		return NULL;
2370	}
2371
2372	move = 1;
2373	nflags = np->in_flags & flags;
2374	nflags &= NAT_FROMRULE;
2375
2376	ni.nai_np = np;
2377	ni.nai_nflags = nflags;
2378	ni.nai_flags = flags;
2379	ni.nai_dport = 0;
2380	ni.nai_sport = 0;
2381
2382	/* Give me a new nat */
2383	KMALLOC(nat, nat_t *);
2384	if (nat == NULL) {
2385		nat_stats.ns_memfail++;
2386		/*
2387		 * Try to automatically tune the max # of entries in the
2388		 * table allowed to be less than what will cause kmem_alloc()
2389		 * to fail and try to eliminate panics due to out of memory
2390		 * conditions arising.
2391		 */
2392		if (ipf_nattable_max > ipf_nattable_sz) {
2393			ipf_nattable_max = nat_stats.ns_inuse - 100;
2394			printf("ipf_nattable_max reduced to %d\n",
2395				ipf_nattable_max);
2396		}
2397		return NULL;
2398	}
2399
2400	if (flags & IPN_TCPUDP) {
2401		tcp = fin->fin_dp;
2402		ni.nai_sport = htons(fin->fin_sport);
2403		ni.nai_dport = htons(fin->fin_dport);
2404	} else if (flags & IPN_ICMPQUERY) {
2405		/*
2406		 * In the ICMP query NAT code, we translate the ICMP id fields
2407		 * to make them unique. This is indepedent of the ICMP type
2408		 * (e.g. in the unlikely event that a host sends an echo and
2409		 * an tstamp request with the same id, both packets will have
2410		 * their ip address/id field changed in the same way).
2411		 */
2412		/* The icmp_id field is used by the sender to identify the
2413		 * process making the icmp request. (the receiver justs
2414		 * copies it back in its response). So, it closely matches
2415		 * the concept of source port. We overlay sport, so we can
2416		 * maximally reuse the existing code.
2417		 */
2418		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2419		ni.nai_dport = ni.nai_sport;
2420	}
2421
2422	bzero((char *)nat, sizeof(*nat));
2423	nat->nat_flags = flags;
2424	nat->nat_redir = np->in_redir;
2425
2426	if ((flags & NAT_SLAVE) == 0) {
2427		MUTEX_ENTER(&ipf_nat_new);
2428	}
2429
2430	/*
2431	 * Search the current table for a match.
2432	 */
2433	if (direction == NAT_OUTBOUND) {
2434		/*
2435		 * We can now arrange to call this for the same connection
2436		 * because ipf_nat_new doesn't protect the code path into
2437		 * this function.
2438		 */
2439		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2440				     fin->fin_src, fin->fin_dst);
2441		if (natl != NULL) {
2442			KFREE(nat);
2443			nat = natl;
2444			goto done;
2445		}
2446
2447		move = nat_newmap(fin, nat, &ni);
2448		if (move == -1)
2449			goto badnat;
2450
2451		np = ni.nai_np;
2452		in = ni.nai_ip;
2453	} else {
2454		/*
2455		 * NAT_INBOUND is used only for redirects rules
2456		 */
2457		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2458				    fin->fin_src, fin->fin_dst);
2459		if (natl != NULL) {
2460			KFREE(nat);
2461			nat = natl;
2462			goto done;
2463		}
2464
2465		move = nat_newrdr(fin, nat, &ni);
2466		if (move == -1)
2467			goto badnat;
2468
2469		np = ni.nai_np;
2470		in = ni.nai_ip;
2471	}
2472	port = ni.nai_port;
2473	nport = ni.nai_nport;
2474
2475	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2476		if (np->in_redir == NAT_REDIRECT) {
2477			nat_delrdr(np);
2478			nat_addrdr(np);
2479		} else if (np->in_redir == NAT_MAP) {
2480			nat_delnat(np);
2481			nat_addnat(np);
2482		}
2483	}
2484
2485	if (flags & IPN_TCPUDP) {
2486		sport = ni.nai_sport;
2487		dport = ni.nai_dport;
2488	} else if (flags & IPN_ICMPQUERY) {
2489		sport = ni.nai_sport;
2490		dport = 0;
2491	}
2492
2493	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2494	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2495#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2496	if ((flags & IPN_TCP) && dohwcksum &&
2497	    (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2498		if (direction == NAT_OUTBOUND)
2499			ni.nai_sum1 = LONG_SUM(in.s_addr);
2500		else
2501			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2502		ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2503		ni.nai_sum1 += 30;
2504		ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2505		nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2506	} else
2507#endif
2508		nat->nat_sumd[1] = nat->nat_sumd[0];
2509
2510	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2511		if (direction == NAT_OUTBOUND)
2512			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2513		else
2514			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2515
2516		ni.nai_sum2 = LONG_SUM(in.s_addr);
2517
2518		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2519		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2520	} else {
2521		nat->nat_ipsumd = nat->nat_sumd[0];
2522		if (!(flags & IPN_TCPUDPICMP)) {
2523			nat->nat_sumd[0] = 0;
2524			nat->nat_sumd[1] = 0;
2525		}
2526	}
2527
2528	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2529		fr_nat_doflush = 1;
2530		goto badnat;
2531	}
2532	if (flags & SI_WILDP)
2533		nat_stats.ns_wilds++;
2534	fin->fin_flx |= FI_NEWNAT;
2535	goto done;
2536badnat:
2537	nat_stats.ns_badnat++;
2538	if ((hm = nat->nat_hm) != NULL)
2539		fr_hostmapdel(&hm);
2540	KFREE(nat);
2541	nat = NULL;
2542done:
2543	if ((flags & NAT_SLAVE) == 0) {
2544		MUTEX_EXIT(&ipf_nat_new);
2545	}
2546	return nat;
2547}
2548
2549
2550/* ------------------------------------------------------------------------ */
2551/* Function:    nat_finalise                                                */
2552/* Returns:     int - 0 == sucess, -1 == failure                            */
2553/* Parameters:  fin(I) - pointer to packet information                      */
2554/*              nat(I) - pointer to NAT entry                               */
2555/*              ni(I)  - pointer to structure with misc. information needed */
2556/*                       to create new NAT entry.                           */
2557/* Write Lock:  ipf_nat                                                     */
2558/*                                                                          */
2559/* This is the tail end of constructing a new NAT entry and is the same     */
2560/* for both IPv4 and IPv6.                                                  */
2561/* ------------------------------------------------------------------------ */
2562/*ARGSUSED*/
2563static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2564fr_info_t *fin;
2565nat_t *nat;
2566natinfo_t *ni;
2567tcphdr_t *tcp;
2568nat_t **natsave;
2569int direction;
2570{
2571	frentry_t *fr;
2572	ipnat_t *np;
2573
2574	np = ni->nai_np;
2575
2576	if (np->in_ifps[0] != NULL) {
2577		COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2578	}
2579	if (np->in_ifps[1] != NULL) {
2580		COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2581	}
2582#ifdef	IPFILTER_SYNC
2583	if ((nat->nat_flags & SI_CLONE) == 0)
2584		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2585#endif
2586
2587	nat->nat_me = natsave;
2588	nat->nat_dir = direction;
2589	nat->nat_ifps[0] = np->in_ifps[0];
2590	nat->nat_ifps[1] = np->in_ifps[1];
2591	nat->nat_ptr = np;
2592	nat->nat_p = fin->fin_p;
2593	nat->nat_mssclamp = np->in_mssclamp;
2594	if (nat->nat_p == IPPROTO_TCP)
2595		nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2596
2597	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2598		if (appr_new(fin, nat) == -1)
2599			return -1;
2600
2601	if (nat_insert(nat, fin->fin_rev) == 0) {
2602		if (nat_logging)
2603			nat_log(nat, (u_int)np->in_redir);
2604		np->in_use++;
2605		fr = fin->fin_fr;
2606		nat->nat_fr = fr;
2607		if (fr != NULL) {
2608			MUTEX_ENTER(&fr->fr_lock);
2609			fr->fr_ref++;
2610			MUTEX_EXIT(&fr->fr_lock);
2611		}
2612		return 0;
2613	}
2614
2615	/*
2616	 * nat_insert failed, so cleanup time...
2617	 */
2618	return -1;
2619}
2620
2621
2622/* ------------------------------------------------------------------------ */
2623/* Function:   nat_insert                                                   */
2624/* Returns:    int - 0 == sucess, -1 == failure                             */
2625/* Parameters: nat(I) - pointer to NAT structure                            */
2626/*             rev(I) - flag indicating forward/reverse direction of packet */
2627/* Write Lock: ipf_nat                                                      */
2628/*                                                                          */
2629/* Insert a NAT entry into the hash tables for searching and add it to the  */
2630/* list of active NAT entries.  Adjust global counters when complete.       */
2631/* ------------------------------------------------------------------------ */
2632int	nat_insert(nat, rev)
2633nat_t	*nat;
2634int	rev;
2635{
2636	u_int hv1, hv2;
2637	nat_t **natp;
2638
2639	/*
2640	 * Try and return an error as early as possible, so calculate the hash
2641	 * entry numbers first and then proceed.
2642	 */
2643	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2644		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2645				  0xffffffff);
2646		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2647				  ipf_nattable_sz);
2648		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2649				  0xffffffff);
2650		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2651				  ipf_nattable_sz);
2652	} else {
2653		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2654		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2655		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2656		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2657	}
2658
2659	if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2660	    nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2661		return -1;
2662	}
2663
2664	nat->nat_hv[0] = hv1;
2665	nat->nat_hv[1] = hv2;
2666
2667	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2668
2669	nat->nat_rev = rev;
2670	nat->nat_ref = 1;
2671	nat->nat_bytes[0] = 0;
2672	nat->nat_pkts[0] = 0;
2673	nat->nat_bytes[1] = 0;
2674	nat->nat_pkts[1] = 0;
2675
2676	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2677	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2678
2679	if (nat->nat_ifnames[1][0] != '\0') {
2680		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2681		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2682	} else {
2683		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2684			       LIFNAMSIZ);
2685		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2686		nat->nat_ifps[1] = nat->nat_ifps[0];
2687	}
2688
2689	nat->nat_next = nat_instances;
2690	nat->nat_pnext = &nat_instances;
2691	if (nat_instances)
2692		nat_instances->nat_pnext = &nat->nat_next;
2693	nat_instances = nat;
2694
2695	natp = &nat_table[0][hv1];
2696	if (*natp)
2697		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2698	nat->nat_phnext[0] = natp;
2699	nat->nat_hnext[0] = *natp;
2700	*natp = nat;
2701	nat_stats.ns_bucketlen[0][hv1]++;
2702
2703	natp = &nat_table[1][hv2];
2704	if (*natp)
2705		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2706	nat->nat_phnext[1] = natp;
2707	nat->nat_hnext[1] = *natp;
2708	*natp = nat;
2709	nat_stats.ns_bucketlen[1][hv2]++;
2710
2711	fr_setnatqueue(nat, rev);
2712
2713	nat_stats.ns_added++;
2714	nat_stats.ns_inuse++;
2715	return 0;
2716}
2717
2718
2719/* ------------------------------------------------------------------------ */
2720/* Function:    nat_icmperrorlookup                                         */
2721/* Returns:     nat_t* - point to matching NAT structure                    */
2722/* Parameters:  fin(I) - pointer to packet information                      */
2723/*              dir(I) - direction of packet (in/out)                       */
2724/*                                                                          */
2725/* Check if the ICMP error message is related to an existing TCP, UDP or    */
2726/* ICMP query nat entry.  It is assumed that the packet is already of the   */
2727/* the required length.                                                     */
2728/* ------------------------------------------------------------------------ */
2729nat_t *nat_icmperrorlookup(fin, dir)
2730fr_info_t *fin;
2731int dir;
2732{
2733	int flags = 0, type, minlen;
2734	icmphdr_t *icmp, *orgicmp;
2735	tcphdr_t *tcp = NULL;
2736	u_short data[2];
2737	nat_t *nat;
2738	ip_t *oip;
2739	u_int p;
2740
2741	icmp = fin->fin_dp;
2742	type = icmp->icmp_type;
2743	/*
2744	 * Does it at least have the return (basic) IP header ?
2745	 * Only a basic IP header (no options) should be with an ICMP error
2746	 * header.  Also, if it's not an error type, then return.
2747	 */
2748	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2749		return NULL;
2750
2751	/*
2752	 * Check packet size
2753	 */
2754	oip = (ip_t *)((char *)fin->fin_dp + 8);
2755	minlen = IP_HL(oip) << 2;
2756	if ((minlen < sizeof(ip_t)) ||
2757	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2758		return NULL;
2759	/*
2760	 * Is the buffer big enough for all of it ?  It's the size of the IP
2761	 * header claimed in the encapsulated part which is of concern.  It
2762	 * may be too big to be in this buffer but not so big that it's
2763	 * outside the ICMP packet, leading to TCP deref's causing problems.
2764	 * This is possible because we don't know how big oip_hl is when we
2765	 * do the pullup early in fr_check() and thus can't gaurantee it is
2766	 * all here now.
2767	 */
2768#ifdef  _KERNEL
2769	{
2770	mb_t *m;
2771
2772	m = fin->fin_m;
2773# if defined(MENTAT)
2774	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2775		return NULL;
2776# else
2777	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2778	    (char *)fin->fin_ip + M_LEN(m))
2779		return NULL;
2780# endif
2781	}
2782#endif
2783
2784	if (fin->fin_daddr != oip->ip_src.s_addr)
2785		return NULL;
2786
2787	p = oip->ip_p;
2788	if (p == IPPROTO_TCP)
2789		flags = IPN_TCP;
2790	else if (p == IPPROTO_UDP)
2791		flags = IPN_UDP;
2792	else if (p == IPPROTO_ICMP) {
2793		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2794
2795		/* see if this is related to an ICMP query */
2796		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2797			data[0] = fin->fin_data[0];
2798			data[1] = fin->fin_data[1];
2799			fin->fin_data[0] = 0;
2800			fin->fin_data[1] = orgicmp->icmp_id;
2801
2802			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2803			/*
2804			 * NOTE : dir refers to the direction of the original
2805			 *        ip packet. By definition the icmp error
2806			 *        message flows in the opposite direction.
2807			 */
2808			if (dir == NAT_INBOUND)
2809				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2810						   oip->ip_src);
2811			else
2812				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2813						    oip->ip_src);
2814			fin->fin_data[0] = data[0];
2815			fin->fin_data[1] = data[1];
2816			return nat;
2817		}
2818	}
2819
2820	if (flags & IPN_TCPUDP) {
2821		minlen += 8;		/* + 64bits of data to get ports */
2822		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2823			return NULL;
2824
2825		data[0] = fin->fin_data[0];
2826		data[1] = fin->fin_data[1];
2827		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2828		fin->fin_data[0] = ntohs(tcp->th_dport);
2829		fin->fin_data[1] = ntohs(tcp->th_sport);
2830
2831		if (dir == NAT_INBOUND) {
2832			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2833					   oip->ip_src);
2834		} else {
2835			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2836					    oip->ip_src);
2837		}
2838		fin->fin_data[0] = data[0];
2839		fin->fin_data[1] = data[1];
2840		return nat;
2841	}
2842	if (dir == NAT_INBOUND)
2843		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2844	else
2845		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2846}
2847
2848
2849/* ------------------------------------------------------------------------ */
2850/* Function:    nat_icmperror                                               */
2851/* Returns:     nat_t* - point to matching NAT structure                    */
2852/* Parameters:  fin(I)    - pointer to packet information                   */
2853/*              nflags(I) - NAT flags for this packet                       */
2854/*              dir(I)    - direction of packet (in/out)                    */
2855/*                                                                          */
2856/* Fix up an ICMP packet which is an error message for an existing NAT      */
2857/* session.  This will correct both packet header data and checksums.       */
2858/*                                                                          */
2859/* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2860/* a NAT'd ICMP packet gets correctly recognised.                           */
2861/* ------------------------------------------------------------------------ */
2862nat_t *nat_icmperror(fin, nflags, dir)
2863fr_info_t *fin;
2864u_int *nflags;
2865int dir;
2866{
2867	u_32_t sum1, sum2, sumd, sumd2;
2868	struct in_addr a1, a2;
2869	int flags, dlen, odst;
2870	icmphdr_t *icmp;
2871	u_short *csump;
2872	tcphdr_t *tcp;
2873	nat_t *nat;
2874	ip_t *oip;
2875	void *dp;
2876
2877	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2878		return NULL;
2879	/*
2880	 * nat_icmperrorlookup() will return NULL for `defective' packets.
2881	 */
2882	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2883		return NULL;
2884
2885	tcp = NULL;
2886	csump = NULL;
2887	flags = 0;
2888	sumd2 = 0;
2889	*nflags = IPN_ICMPERR;
2890	icmp = fin->fin_dp;
2891	oip = (ip_t *)&icmp->icmp_ip;
2892	dp = (((char *)oip) + (IP_HL(oip) << 2));
2893	if (oip->ip_p == IPPROTO_TCP) {
2894		tcp = (tcphdr_t *)dp;
2895		csump = (u_short *)&tcp->th_sum;
2896		flags = IPN_TCP;
2897	} else if (oip->ip_p == IPPROTO_UDP) {
2898		udphdr_t *udp;
2899
2900		udp = (udphdr_t *)dp;
2901		tcp = (tcphdr_t *)dp;
2902		csump = (u_short *)&udp->uh_sum;
2903		flags = IPN_UDP;
2904	} else if (oip->ip_p == IPPROTO_ICMP)
2905		flags = IPN_ICMPQUERY;
2906	dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2907
2908	/*
2909	 * Need to adjust ICMP header to include the real IP#'s and
2910	 * port #'s.  Only apply a checksum change relative to the
2911	 * IP address change as it will be modified again in fr_checknatout
2912	 * for both address and port.  Two checksum changes are
2913	 * necessary for the two header address changes.  Be careful
2914	 * to only modify the checksum once for the port # and twice
2915	 * for the IP#.
2916	 */
2917
2918	/*
2919	 * Step 1
2920	 * Fix the IP addresses in the offending IP packet. You also need
2921	 * to adjust the IP header checksum of that offending IP packet.
2922	 *
2923	 * Normally, you would expect that the ICMP checksum of the
2924	 * ICMP error message needs to be adjusted as well for the
2925	 * IP address change in oip.
2926	 * However, this is a NOP, because the ICMP checksum is
2927	 * calculated over the complete ICMP packet, which includes the
2928	 * changed oip IP addresses and oip->ip_sum. However, these
2929	 * two changes cancel each other out (if the delta for
2930	 * the IP address is x, then the delta for ip_sum is minus x),
2931	 * so no change in the icmp_cksum is necessary.
2932	 *
2933	 * Inbound ICMP
2934	 * ------------
2935	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2936	 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2937	 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2938	 *
2939	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2940	 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2941	 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2942	 *
2943	 * Outbound ICMP
2944	 * -------------
2945	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2946	 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2947	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2948	 *
2949	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2950	 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2951	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2952	 *
2953	 */
2954	odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2955	if (odst == 1) {
2956		a1.s_addr = ntohl(nat->nat_inip.s_addr);
2957		a2.s_addr = ntohl(oip->ip_src.s_addr);
2958		oip->ip_src.s_addr = htonl(a1.s_addr);
2959	} else {
2960		a1.s_addr = ntohl(nat->nat_outip.s_addr);
2961		a2.s_addr = ntohl(oip->ip_dst.s_addr);
2962		oip->ip_dst.s_addr = htonl(a1.s_addr);
2963	}
2964
2965	sumd = a2.s_addr - a1.s_addr;
2966	if (sumd != 0) {
2967		if (a1.s_addr > a2.s_addr)
2968			sumd--;
2969		sumd = ~sumd;
2970
2971		fix_datacksum(&oip->ip_sum, sumd);
2972	}
2973
2974	sumd2 = sumd;
2975	sum1 = 0;
2976	sum2 = 0;
2977
2978	/*
2979	 * Fix UDP pseudo header checksum to compensate for the
2980	 * IP address change.
2981	 */
2982	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2983		/*
2984		 * Step 2 :
2985		 * For offending TCP/UDP IP packets, translate the ports as
2986		 * well, based on the NAT specification. Of course such
2987		 * a change may be reflected in the ICMP checksum as well.
2988		 *
2989		 * Since the port fields are part of the TCP/UDP checksum
2990		 * of the offending IP packet, you need to adjust that checksum
2991		 * as well... except that the change in the port numbers should
2992		 * be offset by the checksum change.  However, the TCP/UDP
2993		 * checksum will also need to change if there has been an
2994		 * IP address change.
2995		 */
2996		if (odst == 1) {
2997			sum1 = ntohs(nat->nat_inport);
2998			sum2 = ntohs(tcp->th_sport);
2999
3000			tcp->th_sport = htons(sum1);
3001		} else {
3002			sum1 = ntohs(nat->nat_outport);
3003			sum2 = ntohs(tcp->th_dport);
3004
3005			tcp->th_dport = htons(sum1);
3006		}
3007
3008		sumd += sum1 - sum2;
3009		if (sumd != 0 || sumd2 != 0) {
3010			/*
3011			 * At this point, sumd is the delta to apply to the
3012			 * TCP/UDP header, given the changes in both the IP
3013			 * address and the ports and sumd2 is the delta to
3014			 * apply to the ICMP header, given the IP address
3015			 * change delta that may need to be applied to the
3016			 * TCP/UDP checksum instead.
3017			 *
3018			 * If we will both the IP and TCP/UDP checksums
3019			 * then the ICMP checksum changes by the address
3020			 * delta applied to the TCP/UDP checksum.  If we
3021			 * do not change the TCP/UDP checksum them we
3022			 * apply the delta in ports to the ICMP checksum.
3023			 */
3024			if (oip->ip_p == IPPROTO_UDP) {
3025				if ((dlen >= 8) && (*csump != 0)) {
3026					fix_datacksum(csump, sumd);
3027				} else {
3028					sumd2 = sum1 - sum2;
3029					if (sum2 > sum1)
3030						sumd2--;
3031				}
3032			} else if (oip->ip_p == IPPROTO_TCP) {
3033				if (dlen >= 18) {
3034					fix_datacksum(csump, sumd);
3035				} else {
3036					sumd2 = sum2 - sum1;
3037					if (sum1 > sum2)
3038						sumd2--;
3039				}
3040			}
3041
3042			if (sumd2 != 0) {
3043				ipnat_t *np;
3044
3045				np = nat->nat_ptr;
3046				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3047				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3048				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3049
3050				if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3051				    (fin->fin_rev == 0) && (np != NULL) &&
3052				    (np->in_redir & NAT_REDIRECT)) {
3053					fix_outcksum(fin, &icmp->icmp_cksum,
3054						     sumd2);
3055				} else {
3056					fix_incksum(fin, &icmp->icmp_cksum,
3057						    sumd2);
3058				}
3059			}
3060		}
3061	} else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3062		icmphdr_t *orgicmp;
3063
3064		/*
3065		 * XXX - what if this is bogus hl and we go off the end ?
3066		 * In this case, nat_icmperrorlookup() will have returned NULL.
3067		 */
3068		orgicmp = (icmphdr_t *)dp;
3069
3070		if (odst == 1) {
3071			if (orgicmp->icmp_id != nat->nat_inport) {
3072
3073				/*
3074				 * Fix ICMP checksum (of the offening ICMP
3075				 * query packet) to compensate the change
3076				 * in the ICMP id of the offending ICMP
3077				 * packet.
3078				 *
3079				 * Since you modify orgicmp->icmp_id with
3080				 * a delta (say x) and you compensate that
3081				 * in origicmp->icmp_cksum with a delta
3082				 * minus x, you don't have to adjust the
3083				 * overall icmp->icmp_cksum
3084				 */
3085				sum1 = ntohs(orgicmp->icmp_id);
3086				sum2 = ntohs(nat->nat_inport);
3087				CALC_SUMD(sum1, sum2, sumd);
3088				orgicmp->icmp_id = nat->nat_inport;
3089				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3090			}
3091		} /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3092	}
3093	return nat;
3094}
3095
3096
3097/*
3098 * NB: these lookups don't lock access to the list, it assumed that it has
3099 * already been done!
3100 */
3101
3102/* ------------------------------------------------------------------------ */
3103/* Function:    nat_inlookup                                                */
3104/* Returns:     nat_t* - NULL == no match,                                  */
3105/*                       else pointer to matching NAT entry                 */
3106/* Parameters:  fin(I)    - pointer to packet information                   */
3107/*              flags(I)  - NAT flags for this packet                       */
3108/*              p(I)      - protocol for this packet                        */
3109/*              src(I)    - source IP address                               */
3110/*              mapdst(I) - destination IP address                          */
3111/*                                                                          */
3112/* Lookup a nat entry based on the mapped destination ip address/port and   */
3113/* real source address/port.  We use this lookup when receiving a packet,   */
3114/* we're looking for a table entry, based on the destination address.       */
3115/*                                                                          */
3116/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3117/*                                                                          */
3118/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3119/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3120/*                                                                          */
3121/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3122/*            the packet is of said protocol                                */
3123/* ------------------------------------------------------------------------ */
3124nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3125fr_info_t *fin;
3126u_int flags, p;
3127struct in_addr src , mapdst;
3128{
3129	u_short sport, dport;
3130	grehdr_t *gre;
3131	ipnat_t *ipn;
3132	u_int sflags;
3133	nat_t *nat;
3134	int nflags;
3135	u_32_t dst;
3136	void *ifp;
3137	u_int hv;
3138
3139	ifp = fin->fin_ifp;
3140	sport = 0;
3141	dport = 0;
3142	gre = NULL;
3143	dst = mapdst.s_addr;
3144	sflags = flags & NAT_TCPUDPICMP;
3145
3146	switch (p)
3147	{
3148	case IPPROTO_TCP :
3149	case IPPROTO_UDP :
3150		sport = htons(fin->fin_data[0]);
3151		dport = htons(fin->fin_data[1]);
3152		break;
3153	case IPPROTO_ICMP :
3154		if (flags & IPN_ICMPERR)
3155			sport = fin->fin_data[1];
3156		else
3157			dport = fin->fin_data[1];
3158		break;
3159	default :
3160		break;
3161	}
3162
3163
3164	if ((flags & SI_WILDP) != 0)
3165		goto find_in_wild_ports;
3166
3167	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3168	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3169	nat = nat_table[1][hv];
3170	for (; nat; nat = nat->nat_hnext[1]) {
3171		if (nat->nat_ifps[0] != NULL) {
3172			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3173				continue;
3174		} else if (ifp != NULL)
3175			nat->nat_ifps[0] = ifp;
3176
3177		nflags = nat->nat_flags;
3178
3179		if (nat->nat_oip.s_addr == src.s_addr &&
3180		    nat->nat_outip.s_addr == dst &&
3181		    (((p == 0) &&
3182		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3183		     || (p == nat->nat_p))) {
3184			switch (p)
3185			{
3186#if 0
3187			case IPPROTO_GRE :
3188				if (nat->nat_call[1] != fin->fin_data[0])
3189					continue;
3190				break;
3191#endif
3192			case IPPROTO_ICMP :
3193				if ((flags & IPN_ICMPERR) != 0) {
3194					if (nat->nat_outport != sport)
3195						continue;
3196				} else {
3197					if (nat->nat_outport != dport)
3198						continue;
3199				}
3200				break;
3201			case IPPROTO_TCP :
3202			case IPPROTO_UDP :
3203				if (nat->nat_oport != sport)
3204					continue;
3205				if (nat->nat_outport != dport)
3206					continue;
3207				break;
3208			default :
3209				break;
3210			}
3211
3212			ipn = nat->nat_ptr;
3213			if ((ipn != NULL) && (nat->nat_aps != NULL))
3214				if (appr_match(fin, nat) != 0)
3215					continue;
3216			return nat;
3217		}
3218	}
3219
3220	/*
3221	 * So if we didn't find it but there are wildcard members in the hash
3222	 * table, go back and look for them.  We do this search and update here
3223	 * because it is modifying the NAT table and we want to do this only
3224	 * for the first packet that matches.  The exception, of course, is
3225	 * for "dummy" (FI_IGNORE) lookups.
3226	 */
3227find_in_wild_ports:
3228	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3229		return NULL;
3230	if (nat_stats.ns_wilds == 0)
3231		return NULL;
3232
3233	RWLOCK_EXIT(&ipf_nat);
3234
3235	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3236	hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3237
3238	WRITE_ENTER(&ipf_nat);
3239
3240	nat = nat_table[1][hv];
3241	for (; nat; nat = nat->nat_hnext[1]) {
3242		if (nat->nat_ifps[0] != NULL) {
3243			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3244				continue;
3245		} else if (ifp != NULL)
3246			nat->nat_ifps[0] = ifp;
3247
3248		if (nat->nat_p != fin->fin_p)
3249			continue;
3250		if (nat->nat_oip.s_addr != src.s_addr ||
3251		    nat->nat_outip.s_addr != dst)
3252			continue;
3253
3254		nflags = nat->nat_flags;
3255		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3256			continue;
3257
3258		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3259			       NAT_INBOUND) == 1) {
3260			if ((fin->fin_flx & FI_IGNORE) != 0)
3261				break;
3262			if ((nflags & SI_CLONE) != 0) {
3263				nat = fr_natclone(fin, nat);
3264				if (nat == NULL)
3265					break;
3266			} else {
3267				MUTEX_ENTER(&ipf_nat_new);
3268				nat_stats.ns_wilds--;
3269				MUTEX_EXIT(&ipf_nat_new);
3270			}
3271			nat->nat_oport = sport;
3272			nat->nat_outport = dport;
3273			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3274			nat_tabmove(nat);
3275			break;
3276		}
3277	}
3278
3279	MUTEX_DOWNGRADE(&ipf_nat);
3280
3281	return nat;
3282}
3283
3284
3285/* ------------------------------------------------------------------------ */
3286/* Function:    nat_tabmove                                                 */
3287/* Returns:     Nil                                                         */
3288/* Parameters:  nat(I) - pointer to NAT structure                           */
3289/* Write Lock:  ipf_nat                                                     */
3290/*                                                                          */
3291/* This function is only called for TCP/UDP NAT table entries where the     */
3292/* original was placed in the table without hashing on the ports and we now */
3293/* want to include hashing on port numbers.                                 */
3294/* ------------------------------------------------------------------------ */
3295static void nat_tabmove(nat)
3296nat_t *nat;
3297{
3298	nat_t **natp;
3299	u_int hv;
3300
3301	if (nat->nat_flags & SI_CLONE)
3302		return;
3303
3304	/*
3305	 * Remove the NAT entry from the old location
3306	 */
3307	if (nat->nat_hnext[0])
3308		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3309	*nat->nat_phnext[0] = nat->nat_hnext[0];
3310	nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3311
3312	if (nat->nat_hnext[1])
3313		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3314	*nat->nat_phnext[1] = nat->nat_hnext[1];
3315	nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3316
3317	/*
3318	 * Add into the NAT table in the new position
3319	 */
3320	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3321	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3322			 ipf_nattable_sz);
3323	nat->nat_hv[0] = hv;
3324	natp = &nat_table[0][hv];
3325	if (*natp)
3326		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3327	nat->nat_phnext[0] = natp;
3328	nat->nat_hnext[0] = *natp;
3329	*natp = nat;
3330	nat_stats.ns_bucketlen[0][hv]++;
3331
3332	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3333	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3334			 ipf_nattable_sz);
3335	nat->nat_hv[1] = hv;
3336	natp = &nat_table[1][hv];
3337	if (*natp)
3338		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3339	nat->nat_phnext[1] = natp;
3340	nat->nat_hnext[1] = *natp;
3341	*natp = nat;
3342	nat_stats.ns_bucketlen[1][hv]++;
3343}
3344
3345
3346/* ------------------------------------------------------------------------ */
3347/* Function:    nat_outlookup                                               */
3348/* Returns:     nat_t* - NULL == no match,                                  */
3349/*                       else pointer to matching NAT entry                 */
3350/* Parameters:  fin(I)   - pointer to packet information                    */
3351/*              flags(I) - NAT flags for this packet                        */
3352/*              p(I)     - protocol for this packet                         */
3353/*              src(I)   - source IP address                                */
3354/*              dst(I)   - destination IP address                           */
3355/*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3356/*                                                                          */
3357/* Lookup a nat entry based on the source 'real' ip address/port and        */
3358/* destination address/port.  We use this lookup when sending a packet out, */
3359/* we're looking for a table entry, based on the source address.            */
3360/*                                                                          */
3361/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3362/*                                                                          */
3363/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3364/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3365/*                                                                          */
3366/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3367/*            the packet is of said protocol                                */
3368/* ------------------------------------------------------------------------ */
3369nat_t *nat_outlookup(fin, flags, p, src, dst)
3370fr_info_t *fin;
3371u_int flags, p;
3372struct in_addr src , dst;
3373{
3374	u_short sport, dport;
3375	u_int sflags;
3376	ipnat_t *ipn;
3377	u_32_t srcip;
3378	nat_t *nat;
3379	int nflags;
3380	void *ifp;
3381	u_int hv;
3382
3383	ifp = fin->fin_ifp;
3384	srcip = src.s_addr;
3385	sflags = flags & IPN_TCPUDPICMP;
3386	sport = 0;
3387	dport = 0;
3388
3389	switch (p)
3390	{
3391	case IPPROTO_TCP :
3392	case IPPROTO_UDP :
3393		sport = htons(fin->fin_data[0]);
3394		dport = htons(fin->fin_data[1]);
3395		break;
3396	case IPPROTO_ICMP :
3397		if (flags & IPN_ICMPERR)
3398			sport = fin->fin_data[1];
3399		else
3400			dport = fin->fin_data[1];
3401		break;
3402	default :
3403		break;
3404	}
3405
3406	if ((flags & SI_WILDP) != 0)
3407		goto find_out_wild_ports;
3408
3409	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3410	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3411	nat = nat_table[0][hv];
3412	for (; nat; nat = nat->nat_hnext[0]) {
3413		if (nat->nat_ifps[1] != NULL) {
3414			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3415				continue;
3416		} else if (ifp != NULL)
3417			nat->nat_ifps[1] = ifp;
3418
3419		nflags = nat->nat_flags;
3420
3421		if (nat->nat_inip.s_addr == srcip &&
3422		    nat->nat_oip.s_addr == dst.s_addr &&
3423		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3424		     || (p == nat->nat_p))) {
3425			switch (p)
3426			{
3427#if 0
3428			case IPPROTO_GRE :
3429				if (nat->nat_call[1] != fin->fin_data[0])
3430					continue;
3431				break;
3432#endif
3433			case IPPROTO_TCP :
3434			case IPPROTO_UDP :
3435				if (nat->nat_oport != dport)
3436					continue;
3437				if (nat->nat_inport != sport)
3438					continue;
3439				break;
3440			default :
3441				break;
3442			}
3443
3444			ipn = nat->nat_ptr;
3445			if ((ipn != NULL) && (nat->nat_aps != NULL))
3446				if (appr_match(fin, nat) != 0)
3447					continue;
3448			return nat;
3449		}
3450	}
3451
3452	/*
3453	 * So if we didn't find it but there are wildcard members in the hash
3454	 * table, go back and look for them.  We do this search and update here
3455	 * because it is modifying the NAT table and we want to do this only
3456	 * for the first packet that matches.  The exception, of course, is
3457	 * for "dummy" (FI_IGNORE) lookups.
3458	 */
3459find_out_wild_ports:
3460	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3461		return NULL;
3462	if (nat_stats.ns_wilds == 0)
3463		return NULL;
3464
3465	RWLOCK_EXIT(&ipf_nat);
3466
3467	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3468	hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3469
3470	WRITE_ENTER(&ipf_nat);
3471
3472	nat = nat_table[0][hv];
3473	for (; nat; nat = nat->nat_hnext[0]) {
3474		if (nat->nat_ifps[1] != NULL) {
3475			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3476				continue;
3477		} else if (ifp != NULL)
3478			nat->nat_ifps[1] = ifp;
3479
3480		if (nat->nat_p != fin->fin_p)
3481			continue;
3482		if ((nat->nat_inip.s_addr != srcip) ||
3483		    (nat->nat_oip.s_addr != dst.s_addr))
3484			continue;
3485
3486		nflags = nat->nat_flags;
3487		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3488			continue;
3489
3490		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3491			       NAT_OUTBOUND) == 1) {
3492			if ((fin->fin_flx & FI_IGNORE) != 0)
3493				break;
3494			if ((nflags & SI_CLONE) != 0) {
3495				nat = fr_natclone(fin, nat);
3496				if (nat == NULL)
3497					break;
3498			} else {
3499				MUTEX_ENTER(&ipf_nat_new);
3500				nat_stats.ns_wilds--;
3501				MUTEX_EXIT(&ipf_nat_new);
3502			}
3503			nat->nat_inport = sport;
3504			nat->nat_oport = dport;
3505			if (nat->nat_outport == 0)
3506				nat->nat_outport = sport;
3507			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3508			nat_tabmove(nat);
3509			break;
3510		}
3511	}
3512
3513	MUTEX_DOWNGRADE(&ipf_nat);
3514
3515	return nat;
3516}
3517
3518
3519/* ------------------------------------------------------------------------ */
3520/* Function:    nat_lookupredir                                             */
3521/* Returns:     nat_t* - NULL == no match,                                  */
3522/*                       else pointer to matching NAT entry                 */
3523/* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3524/*                      entry for.                                          */
3525/*                                                                          */
3526/* Lookup the NAT tables to search for a matching redirect                  */
3527/* The contents of natlookup_t should imitate those found in a packet that  */
3528/* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3529/* We can do the lookup in one of two ways, imitating an inbound or         */
3530/* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3531/* For IN, the fields are set as follows:                                   */
3532/*     nl_real* = source information                                        */
3533/*     nl_out* = destination information (translated)                       */
3534/* For an out packet, the fields are set like this:                         */
3535/*     nl_in* = source information (untranslated)                           */
3536/*     nl_out* = destination information (translated)                       */
3537/* ------------------------------------------------------------------------ */
3538nat_t *nat_lookupredir(np)
3539natlookup_t *np;
3540{
3541	fr_info_t fi;
3542	nat_t *nat;
3543
3544	bzero((char *)&fi, sizeof(fi));
3545	if (np->nl_flags & IPN_IN) {
3546		fi.fin_data[0] = ntohs(np->nl_realport);
3547		fi.fin_data[1] = ntohs(np->nl_outport);
3548	} else {
3549		fi.fin_data[0] = ntohs(np->nl_inport);
3550		fi.fin_data[1] = ntohs(np->nl_outport);
3551	}
3552	if (np->nl_flags & IPN_TCP)
3553		fi.fin_p = IPPROTO_TCP;
3554	else if (np->nl_flags & IPN_UDP)
3555		fi.fin_p = IPPROTO_UDP;
3556	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3557		fi.fin_p = IPPROTO_ICMP;
3558
3559	/*
3560	 * We can do two sorts of lookups:
3561	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3562	 * - default: we have the `in' and `out' address, look for `real'.
3563	 */
3564	if (np->nl_flags & IPN_IN) {
3565		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3566					np->nl_realip, np->nl_outip))) {
3567			np->nl_inip = nat->nat_inip;
3568			np->nl_inport = nat->nat_inport;
3569		}
3570	} else {
3571		/*
3572		 * If nl_inip is non null, this is a lookup based on the real
3573		 * ip address. Else, we use the fake.
3574		 */
3575		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3576					 np->nl_inip, np->nl_outip))) {
3577
3578			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3579				fr_info_t fin;
3580				bzero((char *)&fin, sizeof(fin));
3581				fin.fin_p = nat->nat_p;
3582				fin.fin_data[0] = ntohs(nat->nat_outport);
3583				fin.fin_data[1] = ntohs(nat->nat_oport);
3584				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3585						 nat->nat_outip,
3586						 nat->nat_oip) != NULL) {
3587					np->nl_flags &= ~IPN_FINDFORWARD;
3588				}
3589			}
3590
3591			np->nl_realip = nat->nat_outip;
3592			np->nl_realport = nat->nat_outport;
3593		}
3594 	}
3595
3596	return nat;
3597}
3598
3599
3600/* ------------------------------------------------------------------------ */
3601/* Function:    nat_match                                                   */
3602/* Returns:     int - 0 == no match, 1 == match                             */
3603/* Parameters:  fin(I)   - pointer to packet information                    */
3604/*              np(I)    - pointer to NAT rule                              */
3605/*                                                                          */
3606/* Pull the matching of a packet against a NAT rule out of that complex     */
3607/* loop inside fr_checknatin() and lay it out properly in its own function. */
3608/* ------------------------------------------------------------------------ */
3609static int nat_match(fin, np)
3610fr_info_t *fin;
3611ipnat_t *np;
3612{
3613	frtuc_t *ft;
3614
3615	if (fin->fin_v != 4)
3616		return 0;
3617
3618	if (np->in_p && fin->fin_p != np->in_p)
3619		return 0;
3620
3621	if (fin->fin_out) {
3622		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3623			return 0;
3624		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3625		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3626			return 0;
3627		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3628		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3629			return 0;
3630	} else {
3631		if (!(np->in_redir & NAT_REDIRECT))
3632			return 0;
3633		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3634		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3635			return 0;
3636		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3637		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3638			return 0;
3639	}
3640
3641	ft = &np->in_tuc;
3642	if (!(fin->fin_flx & FI_TCPUDP) ||
3643	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3644		if (ft->ftu_scmp || ft->ftu_dcmp)
3645			return 0;
3646		return 1;
3647	}
3648
3649	return fr_tcpudpchk(fin, ft);
3650}
3651
3652
3653/* ------------------------------------------------------------------------ */
3654/* Function:    nat_update                                                  */
3655/* Returns:     Nil                                                         */
3656/* Parameters:  nat(I)    - pointer to NAT structure                        */
3657/*              np(I)     - pointer to NAT rule                             */
3658/*                                                                          */
3659/* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3660/* called with fin_rev updated - i.e. after calling nat_proto().            */
3661/* ------------------------------------------------------------------------ */
3662void nat_update(fin, nat, np)
3663fr_info_t *fin;
3664nat_t *nat;
3665ipnat_t *np;
3666{
3667	ipftq_t *ifq, *ifq2;
3668	ipftqent_t *tqe;
3669
3670	MUTEX_ENTER(&nat->nat_lock);
3671	tqe = &nat->nat_tqe;
3672	ifq = tqe->tqe_ifq;
3673
3674	/*
3675	 * We allow over-riding of NAT timeouts from NAT rules, even for
3676	 * TCP, however, if it is TCP and there is no rule timeout set,
3677	 * then do not update the timeout here.
3678	 */
3679	if (np != NULL)
3680		ifq2 = np->in_tqehead[fin->fin_rev];
3681	else
3682		ifq2 = NULL;
3683
3684	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3685		u_32_t end, ack;
3686		u_char tcpflags;
3687		tcphdr_t *tcp;
3688		int dsize;
3689
3690		tcp = fin->fin_dp;
3691		tcpflags = tcp->th_flags;
3692		dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3693			((tcpflags & TH_SYN) ? 1 : 0) +
3694			((tcpflags & TH_FIN) ? 1 : 0);
3695
3696		ack = ntohl(tcp->th_ack);
3697		end = ntohl(tcp->th_seq) + dsize;
3698
3699		if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3700			nat->nat_seqnext[1 - fin->fin_rev] = ack;
3701
3702		if (nat->nat_seqnext[fin->fin_rev] == 0)
3703			nat->nat_seqnext[fin->fin_rev] = end;
3704
3705		(void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3706	} else {
3707		if (ifq2 == NULL) {
3708			if (nat->nat_p == IPPROTO_UDP)
3709				ifq2 = &nat_udptq;
3710			else if (nat->nat_p == IPPROTO_ICMP)
3711				ifq2 = &nat_icmptq;
3712			else
3713				ifq2 = &nat_iptq;
3714		}
3715
3716		fr_movequeue(tqe, ifq, ifq2);
3717	}
3718	MUTEX_EXIT(&nat->nat_lock);
3719}
3720
3721
3722/* ------------------------------------------------------------------------ */
3723/* Function:    fr_checknatout                                              */
3724/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3725/*                     0 == no packet translation occurred,                 */
3726/*                     1 == packet was successfully translated.             */
3727/* Parameters:  fin(I)   - pointer to packet information                    */
3728/*              passp(I) - pointer to filtering result flags                */
3729/*                                                                          */
3730/* Check to see if an outcoming packet should be changed.  ICMP packets are */
3731/* first checked to see if they match an existing entry (if an error),      */
3732/* otherwise a search of the current NAT table is made.  If neither results */
3733/* in a match then a search for a matching NAT rule is made.  Create a new  */
3734/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3735/* packet header(s) as required.                                            */
3736/* ------------------------------------------------------------------------ */
3737int fr_checknatout(fin, passp)
3738fr_info_t *fin;
3739u_32_t *passp;
3740{
3741	struct ifnet *ifp, *sifp;
3742	icmphdr_t *icmp = NULL;
3743	tcphdr_t *tcp = NULL;
3744	int rval, natfailed;
3745	ipnat_t *np = NULL;
3746	u_int nflags = 0;
3747	u_32_t ipa, iph;
3748	int natadd = 1;
3749	frentry_t *fr;
3750	nat_t *nat;
3751
3752	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3753		return 0;
3754
3755	natfailed = 0;
3756	fr = fin->fin_fr;
3757	sifp = fin->fin_ifp;
3758	if (fr != NULL) {
3759		ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3760		if ((ifp != NULL) && (ifp != (void *)-1))
3761			fin->fin_ifp = ifp;
3762	}
3763	ifp = fin->fin_ifp;
3764
3765	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3766		switch (fin->fin_p)
3767		{
3768		case IPPROTO_TCP :
3769			nflags = IPN_TCP;
3770			break;
3771		case IPPROTO_UDP :
3772			nflags = IPN_UDP;
3773			break;
3774		case IPPROTO_ICMP :
3775			icmp = fin->fin_dp;
3776
3777			/*
3778			 * This is an incoming packet, so the destination is
3779			 * the icmp_id and the source port equals 0
3780			 */
3781			if (nat_icmpquerytype4(icmp->icmp_type))
3782				nflags = IPN_ICMPQUERY;
3783			break;
3784		default :
3785			break;
3786		}
3787
3788		if ((nflags & IPN_TCPUDP))
3789			tcp = fin->fin_dp;
3790	}
3791
3792	ipa = fin->fin_saddr;
3793
3794	READ_ENTER(&ipf_nat);
3795
3796	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3797	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3798		/*EMPTY*/;
3799	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3800		natadd = 0;
3801	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3802				      fin->fin_src, fin->fin_dst))) {
3803		nflags = nat->nat_flags;
3804	} else {
3805		u_32_t hv, msk, nmsk;
3806
3807		/*
3808		 * If there is no current entry in the nat table for this IP#,
3809		 * create one for it (if there is a matching rule).
3810		 */
3811		RWLOCK_EXIT(&ipf_nat);
3812		msk = 0xffffffff;
3813		nmsk = nat_masks;
3814		WRITE_ENTER(&ipf_nat);
3815maskloop:
3816		iph = ipa & htonl(msk);
3817		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3818		for (np = nat_rules[hv]; np; np = np->in_mnext)
3819		{
3820			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3821				continue;
3822			if (np->in_v != fin->fin_v)
3823				continue;
3824			if (np->in_p && (np->in_p != fin->fin_p))
3825				continue;
3826			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3827				continue;
3828			if (np->in_flags & IPN_FILTER) {
3829				if (!nat_match(fin, np))
3830					continue;
3831			} else if ((ipa & np->in_inmsk) != np->in_inip)
3832				continue;
3833
3834			if ((fr != NULL) &&
3835			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3836				continue;
3837
3838			if (*np->in_plabel != '\0') {
3839				if (((np->in_flags & IPN_FILTER) == 0) &&
3840				    (np->in_dport != tcp->th_dport))
3841					continue;
3842				if (appr_ok(fin, tcp, np) == 0)
3843					continue;
3844			}
3845
3846			if ((nat = nat_new(fin, np, NULL, nflags,
3847					   NAT_OUTBOUND))) {
3848				np->in_hits++;
3849				break;
3850			} else
3851				natfailed = -1;
3852		}
3853		if ((np == NULL) && (nmsk != 0)) {
3854			while (nmsk) {
3855				msk <<= 1;
3856				if (nmsk & 0x80000000)
3857					break;
3858				nmsk <<= 1;
3859			}
3860			if (nmsk != 0) {
3861				nmsk <<= 1;
3862				goto maskloop;
3863			}
3864		}
3865		MUTEX_DOWNGRADE(&ipf_nat);
3866	}
3867
3868	if (nat != NULL) {
3869		rval = fr_natout(fin, nat, natadd, nflags);
3870		if (rval == 1) {
3871			MUTEX_ENTER(&nat->nat_lock);
3872			nat->nat_ref++;
3873			MUTEX_EXIT(&nat->nat_lock);
3874			nat->nat_touched = fr_ticks;
3875			fin->fin_nat = nat;
3876		}
3877	} else
3878		rval = natfailed;
3879	RWLOCK_EXIT(&ipf_nat);
3880
3881	if (rval == -1) {
3882		if (passp != NULL)
3883			*passp = FR_BLOCK;
3884		fin->fin_flx |= FI_BADNAT;
3885	}
3886	fin->fin_ifp = sifp;
3887	return rval;
3888}
3889
3890/* ------------------------------------------------------------------------ */
3891/* Function:    fr_natout                                                   */
3892/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3893/*                     1 == packet was successfully translated.             */
3894/* Parameters:  fin(I)    - pointer to packet information                   */
3895/*              nat(I)    - pointer to NAT structure                        */
3896/*              natadd(I) - flag indicating if it is safe to add frag cache */
3897/*              nflags(I) - NAT flags set for this packet                   */
3898/*                                                                          */
3899/* Translate a packet coming "out" on an interface.                         */
3900/* ------------------------------------------------------------------------ */
3901int fr_natout(fin, nat, natadd, nflags)
3902fr_info_t *fin;
3903nat_t *nat;
3904int natadd;
3905u_32_t nflags;
3906{
3907	icmphdr_t *icmp;
3908	u_short *csump;
3909	tcphdr_t *tcp;
3910	ipnat_t *np;
3911	int i;
3912
3913	tcp = NULL;
3914	icmp = NULL;
3915	csump = NULL;
3916	np = nat->nat_ptr;
3917
3918	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3919		(void) fr_nat_newfrag(fin, 0, nat);
3920
3921	MUTEX_ENTER(&nat->nat_lock);
3922	nat->nat_bytes[1] += fin->fin_plen;
3923	nat->nat_pkts[1]++;
3924	MUTEX_EXIT(&nat->nat_lock);
3925
3926	/*
3927	 * Fix up checksums, not by recalculating them, but
3928	 * simply computing adjustments.
3929	 * This is only done for STREAMS based IP implementations where the
3930	 * checksum has already been calculated by IP.  In all other cases,
3931	 * IPFilter is called before the checksum needs calculating so there
3932	 * is no call to modify whatever is in the header now.
3933	 */
3934	if (fin->fin_v == 4) {
3935		if (nflags == IPN_ICMPERR) {
3936			u_32_t s1, s2, sumd;
3937
3938			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3939			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3940			CALC_SUMD(s1, s2, sumd);
3941			fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3942		}
3943#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3944    defined(linux) || defined(BRIDGE_IPF)
3945		else {
3946			/*
3947			 * Strictly speaking, this isn't necessary on BSD
3948			 * kernels because they do checksum calculation after
3949			 * this code has run BUT if ipfilter is being used
3950			 * to do NAT as a bridge, that code doesn't exist.
3951			 */
3952			if (nat->nat_dir == NAT_OUTBOUND)
3953				fix_outcksum(fin, &fin->fin_ip->ip_sum,
3954					     nat->nat_ipsumd);
3955			else
3956				fix_incksum(fin, &fin->fin_ip->ip_sum,
3957					    nat->nat_ipsumd);
3958		}
3959#endif
3960	}
3961
3962	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3963		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3964			tcp = fin->fin_dp;
3965
3966			tcp->th_sport = nat->nat_outport;
3967			fin->fin_data[0] = ntohs(nat->nat_outport);
3968		}
3969
3970		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3971			icmp = fin->fin_dp;
3972			icmp->icmp_id = nat->nat_outport;
3973		}
3974
3975		csump = nat_proto(fin, nat, nflags);
3976	}
3977
3978	fin->fin_ip->ip_src = nat->nat_outip;
3979
3980	nat_update(fin, nat, np);
3981
3982	/*
3983	 * The above comments do not hold for layer 4 (or higher) checksums...
3984	 */
3985	if (csump != NULL) {
3986		if (nat->nat_dir == NAT_OUTBOUND)
3987			fix_outcksum(fin, csump, nat->nat_sumd[1]);
3988		else
3989			fix_incksum(fin, csump, nat->nat_sumd[1]);
3990	}
3991#ifdef	IPFILTER_SYNC
3992	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3993#endif
3994	/* ------------------------------------------------------------- */
3995	/* A few quick notes:						 */
3996	/*	Following are test conditions prior to calling the 	 */
3997	/*	appr_check routine.					 */
3998	/*								 */
3999	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4000	/*	with a redirect rule, we attempt to match the packet's	 */
4001	/*	source port against in_dport, otherwise	we'd compare the */
4002	/*	packet's destination.			 		 */
4003	/* ------------------------------------------------------------- */
4004	if ((np != NULL) && (np->in_apr != NULL)) {
4005		i = appr_check(fin, nat);
4006		if (i == 0)
4007			i = 1;
4008	} else
4009		i = 1;
4010	ATOMIC_INCL(nat_stats.ns_mapped[1]);
4011	fin->fin_flx |= FI_NATED;
4012	return i;
4013}
4014
4015
4016/* ------------------------------------------------------------------------ */
4017/* Function:    fr_checknatin                                               */
4018/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4019/*                     0 == no packet translation occurred,                 */
4020/*                     1 == packet was successfully translated.             */
4021/* Parameters:  fin(I)   - pointer to packet information                    */
4022/*              passp(I) - pointer to filtering result flags                */
4023/*                                                                          */
4024/* Check to see if an incoming packet should be changed.  ICMP packets are  */
4025/* first checked to see if they match an existing entry (if an error),      */
4026/* otherwise a search of the current NAT table is made.  If neither results */
4027/* in a match then a search for a matching NAT rule is made.  Create a new  */
4028/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4029/* packet header(s) as required.                                            */
4030/* ------------------------------------------------------------------------ */
4031int fr_checknatin(fin, passp)
4032fr_info_t *fin;
4033u_32_t *passp;
4034{
4035	u_int nflags, natadd;
4036	int rval, natfailed;
4037	struct ifnet *ifp;
4038	struct in_addr in;
4039	icmphdr_t *icmp;
4040	tcphdr_t *tcp;
4041	u_short dport;
4042	ipnat_t *np;
4043	nat_t *nat;
4044	u_32_t iph;
4045
4046	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4047		return 0;
4048
4049	tcp = NULL;
4050	icmp = NULL;
4051	dport = 0;
4052	natadd = 1;
4053	nflags = 0;
4054	natfailed = 0;
4055	ifp = fin->fin_ifp;
4056
4057	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4058		switch (fin->fin_p)
4059		{
4060		case IPPROTO_TCP :
4061			nflags = IPN_TCP;
4062			break;
4063		case IPPROTO_UDP :
4064			nflags = IPN_UDP;
4065			break;
4066		case IPPROTO_ICMP :
4067			icmp = fin->fin_dp;
4068
4069			/*
4070			 * This is an incoming packet, so the destination is
4071			 * the icmp_id and the source port equals 0
4072			 */
4073			if (nat_icmpquerytype4(icmp->icmp_type)) {
4074				nflags = IPN_ICMPQUERY;
4075				dport = icmp->icmp_id;
4076			} break;
4077		default :
4078			break;
4079		}
4080
4081		if ((nflags & IPN_TCPUDP)) {
4082			tcp = fin->fin_dp;
4083			dport = tcp->th_dport;
4084		}
4085	}
4086
4087	in = fin->fin_dst;
4088
4089	READ_ENTER(&ipf_nat);
4090
4091	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4092	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4093		/*EMPTY*/;
4094	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4095		natadd = 0;
4096	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4097				     fin->fin_src, in))) {
4098		nflags = nat->nat_flags;
4099	} else {
4100		u_32_t hv, msk, rmsk;
4101
4102		RWLOCK_EXIT(&ipf_nat);
4103		rmsk = rdr_masks;
4104		msk = 0xffffffff;
4105		WRITE_ENTER(&ipf_nat);
4106		/*
4107		 * If there is no current entry in the nat table for this IP#,
4108		 * create one for it (if there is a matching rule).
4109		 */
4110maskloop:
4111		iph = in.s_addr & htonl(msk);
4112		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4113		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4114			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4115				continue;
4116			if (np->in_v != fin->fin_v)
4117				continue;
4118			if (np->in_p && (np->in_p != fin->fin_p))
4119				continue;
4120			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4121				continue;
4122			if (np->in_flags & IPN_FILTER) {
4123				if (!nat_match(fin, np))
4124					continue;
4125			} else {
4126				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4127					continue;
4128				if (np->in_pmin &&
4129				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4130				     (ntohs(dport) < ntohs(np->in_pmin))))
4131					continue;
4132			}
4133
4134			if (*np->in_plabel != '\0') {
4135				if (!appr_ok(fin, tcp, np)) {
4136					continue;
4137				}
4138			}
4139
4140			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4141			if (nat != NULL) {
4142				np->in_hits++;
4143				break;
4144			} else
4145				natfailed = -1;
4146		}
4147
4148		if ((np == NULL) && (rmsk != 0)) {
4149			while (rmsk) {
4150				msk <<= 1;
4151				if (rmsk & 0x80000000)
4152					break;
4153				rmsk <<= 1;
4154			}
4155			if (rmsk != 0) {
4156				rmsk <<= 1;
4157				goto maskloop;
4158			}
4159		}
4160		MUTEX_DOWNGRADE(&ipf_nat);
4161	}
4162	if (nat != NULL) {
4163		rval = fr_natin(fin, nat, natadd, nflags);
4164		if (rval == 1) {
4165			MUTEX_ENTER(&nat->nat_lock);
4166			nat->nat_ref++;
4167			MUTEX_EXIT(&nat->nat_lock);
4168			nat->nat_touched = fr_ticks;
4169			fin->fin_nat = nat;
4170		}
4171	} else
4172		rval = natfailed;
4173	RWLOCK_EXIT(&ipf_nat);
4174
4175	if (rval == -1) {
4176		if (passp != NULL)
4177			*passp = FR_BLOCK;
4178		fin->fin_flx |= FI_BADNAT;
4179	}
4180	return rval;
4181}
4182
4183
4184/* ------------------------------------------------------------------------ */
4185/* Function:    fr_natin                                                    */
4186/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4187/*                     1 == packet was successfully translated.             */
4188/* Parameters:  fin(I)    - pointer to packet information                   */
4189/*              nat(I)    - pointer to NAT structure                        */
4190/*              natadd(I) - flag indicating if it is safe to add frag cache */
4191/*              nflags(I) - NAT flags set for this packet                   */
4192/* Locks Held:  ipf_nat (READ)                                              */
4193/*                                                                          */
4194/* Translate a packet coming "in" on an interface.                          */
4195/* ------------------------------------------------------------------------ */
4196int fr_natin(fin, nat, natadd, nflags)
4197fr_info_t *fin;
4198nat_t *nat;
4199int natadd;
4200u_32_t nflags;
4201{
4202	icmphdr_t *icmp;
4203	u_short *csump;
4204	tcphdr_t *tcp;
4205	ipnat_t *np;
4206	int i;
4207
4208	tcp = NULL;
4209	csump = NULL;
4210	np = nat->nat_ptr;
4211	fin->fin_fr = nat->nat_fr;
4212
4213	if (np != NULL) {
4214		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4215			(void) fr_nat_newfrag(fin, 0, nat);
4216
4217	/* ------------------------------------------------------------- */
4218	/* A few quick notes:						 */
4219	/*	Following are test conditions prior to calling the 	 */
4220	/*	appr_check routine.					 */
4221	/*								 */
4222	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4223	/*	with a map rule, we attempt to match the packet's	 */
4224	/*	source port against in_dport, otherwise	we'd compare the */
4225	/*	packet's destination.			 		 */
4226	/* ------------------------------------------------------------- */
4227		if (np->in_apr != NULL) {
4228			i = appr_check(fin, nat);
4229			if (i == -1) {
4230				return -1;
4231			}
4232		}
4233	}
4234
4235#ifdef	IPFILTER_SYNC
4236	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4237#endif
4238
4239	MUTEX_ENTER(&nat->nat_lock);
4240	nat->nat_bytes[0] += fin->fin_plen;
4241	nat->nat_pkts[0]++;
4242	MUTEX_EXIT(&nat->nat_lock);
4243
4244	fin->fin_ip->ip_dst = nat->nat_inip;
4245	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4246	if (nflags & IPN_TCPUDP)
4247		tcp = fin->fin_dp;
4248
4249	/*
4250	 * Fix up checksums, not by recalculating them, but
4251	 * simply computing adjustments.
4252	 * Why only do this for some platforms on inbound packets ?
4253	 * Because for those that it is done, IP processing is yet to happen
4254	 * and so the IPv4 header checksum has not yet been evaluated.
4255	 * Perhaps it should always be done for the benefit of things like
4256	 * fast forwarding (so that it doesn't need to be recomputed) but with
4257	 * header checksum offloading, perhaps it is a moot point.
4258	 */
4259#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4260     defined(__osf__) || defined(linux)
4261	if (nat->nat_dir == NAT_OUTBOUND)
4262		fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4263	else
4264		fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4265#endif
4266
4267	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4268		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4269			tcp->th_dport = nat->nat_inport;
4270			fin->fin_data[1] = ntohs(nat->nat_inport);
4271		}
4272
4273
4274		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4275			icmp = fin->fin_dp;
4276
4277			icmp->icmp_id = nat->nat_inport;
4278		}
4279
4280		csump = nat_proto(fin, nat, nflags);
4281	}
4282
4283	nat_update(fin, nat, np);
4284
4285	/*
4286	 * The above comments do not hold for layer 4 (or higher) checksums...
4287	 */
4288	if (csump != NULL) {
4289		if (nat->nat_dir == NAT_OUTBOUND)
4290			fix_incksum(fin, csump, nat->nat_sumd[0]);
4291		else
4292			fix_outcksum(fin, csump, nat->nat_sumd[0]);
4293	}
4294	ATOMIC_INCL(nat_stats.ns_mapped[0]);
4295	fin->fin_flx |= FI_NATED;
4296	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4297		fin->fin_nattag = &np->in_tag;
4298	return 1;
4299}
4300
4301
4302/* ------------------------------------------------------------------------ */
4303/* Function:    nat_proto                                                   */
4304/* Returns:     u_short* - pointer to transport header checksum to update,  */
4305/*                         NULL if the transport protocol is not recognised */
4306/*                         as needing a checksum update.                    */
4307/* Parameters:  fin(I)    - pointer to packet information                   */
4308/*              nat(I)    - pointer to NAT structure                        */
4309/*              nflags(I) - NAT flags set for this packet                   */
4310/*                                                                          */
4311/* Return the pointer to the checksum field for each protocol so understood.*/
4312/* If support for making other changes to a protocol header is required,    */
4313/* that is not strictly 'address' translation, such as clamping the MSS in  */
4314/* TCP down to a specific value, then do it from here.                      */
4315/* ------------------------------------------------------------------------ */
4316u_short *nat_proto(fin, nat, nflags)
4317fr_info_t *fin;
4318nat_t *nat;
4319u_int nflags;
4320{
4321	icmphdr_t *icmp;
4322	u_short *csump;
4323	tcphdr_t *tcp;
4324	udphdr_t *udp;
4325
4326	csump = NULL;
4327	if (fin->fin_out == 0) {
4328		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4329	} else {
4330		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4331	}
4332
4333	switch (fin->fin_p)
4334	{
4335	case IPPROTO_TCP :
4336		tcp = fin->fin_dp;
4337
4338		csump = &tcp->th_sum;
4339
4340		/*
4341		 * Do a MSS CLAMPING on a SYN packet,
4342		 * only deal IPv4 for now.
4343		 */
4344		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4345			nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4346
4347		break;
4348
4349	case IPPROTO_UDP :
4350		udp = fin->fin_dp;
4351
4352		if (udp->uh_sum)
4353			csump = &udp->uh_sum;
4354		break;
4355
4356	case IPPROTO_ICMP :
4357		icmp = fin->fin_dp;
4358
4359		if ((nflags & IPN_ICMPQUERY) != 0) {
4360			if (icmp->icmp_cksum != 0)
4361				csump = &icmp->icmp_cksum;
4362		}
4363		break;
4364	}
4365	return csump;
4366}
4367
4368
4369/* ------------------------------------------------------------------------ */
4370/* Function:    fr_natunload                                                */
4371/* Returns:     Nil                                                         */
4372/* Parameters:  Nil                                                         */
4373/*                                                                          */
4374/* Free all memory used by NAT structures allocated at runtime.             */
4375/* ------------------------------------------------------------------------ */
4376void fr_natunload()
4377{
4378	ipftq_t *ifq, *ifqnext;
4379
4380	(void) nat_clearlist();
4381	(void) nat_flushtable();
4382
4383	/*
4384	 * Proxy timeout queues are not cleaned here because although they
4385	 * exist on the NAT list, appr_unload is called after fr_natunload
4386	 * and the proxies actually are responsible for them being created.
4387	 * Should the proxy timeouts have their own list?  There's no real
4388	 * justification as this is the only complication.
4389	 */
4390	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4391		ifqnext = ifq->ifq_next;
4392		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4393		    (fr_deletetimeoutqueue(ifq) == 0))
4394			fr_freetimeoutqueue(ifq);
4395	}
4396
4397	if (nat_table[0] != NULL) {
4398		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4399		nat_table[0] = NULL;
4400	}
4401	if (nat_table[1] != NULL) {
4402		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4403		nat_table[1] = NULL;
4404	}
4405	if (nat_rules != NULL) {
4406		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4407		nat_rules = NULL;
4408	}
4409	if (rdr_rules != NULL) {
4410		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4411		rdr_rules = NULL;
4412	}
4413	if (ipf_hm_maptable != NULL) {
4414		KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4415		ipf_hm_maptable = NULL;
4416	}
4417	if (nat_stats.ns_bucketlen[0] != NULL) {
4418		KFREES(nat_stats.ns_bucketlen[0],
4419		       sizeof(u_long *) * ipf_nattable_sz);
4420		nat_stats.ns_bucketlen[0] = NULL;
4421	}
4422	if (nat_stats.ns_bucketlen[1] != NULL) {
4423		KFREES(nat_stats.ns_bucketlen[1],
4424		       sizeof(u_long *) * ipf_nattable_sz);
4425		nat_stats.ns_bucketlen[1] = NULL;
4426	}
4427
4428	if (fr_nat_maxbucket_reset == 1)
4429		fr_nat_maxbucket = 0;
4430
4431	if (fr_nat_init == 1) {
4432		fr_nat_init = 0;
4433		fr_sttab_destroy(nat_tqb);
4434
4435		RW_DESTROY(&ipf_natfrag);
4436		RW_DESTROY(&ipf_nat);
4437
4438		MUTEX_DESTROY(&ipf_nat_new);
4439		MUTEX_DESTROY(&ipf_natio);
4440
4441		MUTEX_DESTROY(&nat_udptq.ifq_lock);
4442		MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4443		MUTEX_DESTROY(&nat_iptq.ifq_lock);
4444	}
4445}
4446
4447
4448/* ------------------------------------------------------------------------ */
4449/* Function:    fr_natexpire                                                */
4450/* Returns:     Nil                                                         */
4451/* Parameters:  Nil                                                         */
4452/*                                                                          */
4453/* Check all of the timeout queues for entries at the top which need to be  */
4454/* expired.                                                                 */
4455/* ------------------------------------------------------------------------ */
4456void fr_natexpire()
4457{
4458	ipftq_t *ifq, *ifqnext;
4459	ipftqent_t *tqe, *tqn;
4460	int i;
4461	SPL_INT(s);
4462
4463	SPL_NET(s);
4464	WRITE_ENTER(&ipf_nat);
4465	for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4466		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4467			if (tqe->tqe_die > fr_ticks)
4468				break;
4469			tqn = tqe->tqe_next;
4470			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4471		}
4472	}
4473
4474	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4475		ifqnext = ifq->ifq_next;
4476
4477		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4478			if (tqe->tqe_die > fr_ticks)
4479				break;
4480			tqn = tqe->tqe_next;
4481			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4482		}
4483	}
4484
4485	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4486		ifqnext = ifq->ifq_next;
4487
4488		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4489		    (ifq->ifq_ref == 0)) {
4490			fr_freetimeoutqueue(ifq);
4491		}
4492	}
4493
4494	if (fr_nat_doflush != 0) {
4495		nat_extraflush(2);
4496		fr_nat_doflush = 0;
4497	}
4498
4499	RWLOCK_EXIT(&ipf_nat);
4500	SPL_X(s);
4501}
4502
4503
4504/* ------------------------------------------------------------------------ */
4505/* Function:    fr_natsync                                                  */
4506/* Returns:     Nil                                                         */
4507/* Parameters:  ifp(I) - pointer to network interface                       */
4508/*                                                                          */
4509/* Walk through all of the currently active NAT sessions, looking for those */
4510/* which need to have their translated address updated.                     */
4511/* ------------------------------------------------------------------------ */
4512void fr_natsync(ifp)
4513void *ifp;
4514{
4515	u_32_t sum1, sum2, sumd;
4516	struct in_addr in;
4517	ipnat_t *n;
4518	nat_t *nat;
4519	void *ifp2;
4520	SPL_INT(s);
4521
4522	if (fr_running <= 0)
4523		return;
4524
4525	/*
4526	 * Change IP addresses for NAT sessions for any protocol except TCP
4527	 * since it will break the TCP connection anyway.  The only rules
4528	 * which will get changed are those which are "map ... -> 0/32",
4529	 * where the rule specifies the address is taken from the interface.
4530	 */
4531	SPL_NET(s);
4532	WRITE_ENTER(&ipf_nat);
4533
4534	if (fr_running <= 0) {
4535		RWLOCK_EXIT(&ipf_nat);
4536		return;
4537	}
4538
4539	for (nat = nat_instances; nat; nat = nat->nat_next) {
4540		if ((nat->nat_flags & IPN_TCP) != 0)
4541			continue;
4542		n = nat->nat_ptr;
4543		if ((n == NULL) ||
4544		    (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4545			continue;
4546		if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4547		     (ifp == nat->nat_ifps[1]))) {
4548			nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4549			if (nat->nat_ifnames[1][0] != '\0') {
4550				nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4551							  4);
4552			} else
4553				nat->nat_ifps[1] = nat->nat_ifps[0];
4554			ifp2 = nat->nat_ifps[0];
4555			if (ifp2 == NULL)
4556				continue;
4557
4558			/*
4559			 * Change the map-to address to be the same as the
4560			 * new one.
4561			 */
4562			sum1 = nat->nat_outip.s_addr;
4563			if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4564				nat->nat_outip = in;
4565			sum2 = nat->nat_outip.s_addr;
4566
4567			if (sum1 == sum2)
4568				continue;
4569			/*
4570			 * Readjust the checksum adjustment to take into
4571			 * account the new IP#.
4572			 */
4573			CALC_SUMD(sum1, sum2, sumd);
4574			/* XXX - dont change for TCP when solaris does
4575			 * hardware checksumming.
4576			 */
4577			sumd += nat->nat_sumd[0];
4578			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4579			nat->nat_sumd[1] = nat->nat_sumd[0];
4580		}
4581	}
4582
4583	for (n = nat_list; (n != NULL); n = n->in_next) {
4584		if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4585			n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4586		if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4587			n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4588	}
4589	RWLOCK_EXIT(&ipf_nat);
4590	SPL_X(s);
4591}
4592
4593
4594/* ------------------------------------------------------------------------ */
4595/* Function:    nat_icmpquerytype4                                          */
4596/* Returns:     int - 1 == success, 0 == failure                            */
4597/* Parameters:  icmptype(I) - ICMP type number                              */
4598/*                                                                          */
4599/* Tests to see if the ICMP type number passed is a query/response type or  */
4600/* not.                                                                     */
4601/* ------------------------------------------------------------------------ */
4602static int nat_icmpquerytype4(icmptype)
4603int icmptype;
4604{
4605
4606	/*
4607	 * For the ICMP query NAT code, it is essential that both the query
4608	 * and the reply match on the NAT rule. Because the NAT structure
4609	 * does not keep track of the icmptype, and a single NAT structure
4610	 * is used for all icmp types with the same src, dest and id, we
4611	 * simply define the replies as queries as well. The funny thing is,
4612	 * altough it seems silly to call a reply a query, this is exactly
4613	 * as it is defined in the IPv4 specification
4614	 */
4615
4616	switch (icmptype)
4617	{
4618
4619	case ICMP_ECHOREPLY:
4620	case ICMP_ECHO:
4621	/* route aedvertisement/solliciation is currently unsupported: */
4622	/* it would require rewriting the ICMP data section            */
4623	case ICMP_TSTAMP:
4624	case ICMP_TSTAMPREPLY:
4625	case ICMP_IREQ:
4626	case ICMP_IREQREPLY:
4627	case ICMP_MASKREQ:
4628	case ICMP_MASKREPLY:
4629		return 1;
4630	default:
4631		return 0;
4632	}
4633}
4634
4635
4636/* ------------------------------------------------------------------------ */
4637/* Function:    nat_log                                                     */
4638/* Returns:     Nil                                                         */
4639/* Parameters:  nat(I)  - pointer to NAT structure                          */
4640/*              type(I) - type of log entry to create                       */
4641/*                                                                          */
4642/* Creates a NAT log entry.                                                 */
4643/* ------------------------------------------------------------------------ */
4644void nat_log(nat, type)
4645struct nat *nat;
4646u_int type;
4647{
4648#ifdef	IPFILTER_LOG
4649# ifndef LARGE_NAT
4650	struct ipnat *np;
4651	int rulen;
4652# endif
4653	struct natlog natl;
4654	void *items[1];
4655	size_t sizes[1];
4656	int types[1];
4657
4658	natl.nl_inip = nat->nat_inip;
4659	natl.nl_outip = nat->nat_outip;
4660	natl.nl_origip = nat->nat_oip;
4661	natl.nl_bytes[0] = nat->nat_bytes[0];
4662	natl.nl_bytes[1] = nat->nat_bytes[1];
4663	natl.nl_pkts[0] = nat->nat_pkts[0];
4664	natl.nl_pkts[1] = nat->nat_pkts[1];
4665	natl.nl_origport = nat->nat_oport;
4666	natl.nl_inport = nat->nat_inport;
4667	natl.nl_outport = nat->nat_outport;
4668	natl.nl_p = nat->nat_p;
4669	natl.nl_type = type;
4670	natl.nl_rule = -1;
4671# ifndef LARGE_NAT
4672	if (nat->nat_ptr != NULL) {
4673		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4674			if (np == nat->nat_ptr) {
4675				natl.nl_rule = rulen;
4676				break;
4677			}
4678	}
4679# endif
4680	items[0] = &natl;
4681	sizes[0] = sizeof(natl);
4682	types[0] = 0;
4683
4684	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4685#endif
4686}
4687
4688
4689#if defined(__OpenBSD__)
4690/* ------------------------------------------------------------------------ */
4691/* Function:    nat_ifdetach                                                */
4692/* Returns:     Nil                                                         */
4693/* Parameters:  ifp(I) - pointer to network interface                       */
4694/*                                                                          */
4695/* Compatibility interface for OpenBSD to trigger the correct updating of   */
4696/* interface references within IPFilter.                                    */
4697/* ------------------------------------------------------------------------ */
4698void nat_ifdetach(ifp)
4699void *ifp;
4700{
4701	frsync(ifp);
4702	return;
4703}
4704#endif
4705
4706
4707/* ------------------------------------------------------------------------ */
4708/* Function:    fr_ipnatderef                                               */
4709/* Returns:     Nil                                                         */
4710/* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4711/* Write Locks: ipf_nat                                                     */
4712/*                                                                          */
4713/* ------------------------------------------------------------------------ */
4714void fr_ipnatderef(inp)
4715ipnat_t **inp;
4716{
4717	ipnat_t *in;
4718
4719	in = *inp;
4720	*inp = NULL;
4721	in->in_space++;
4722	in->in_use--;
4723	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4724		if (in->in_apr)
4725			appr_free(in->in_apr);
4726		MUTEX_DESTROY(&in->in_lock);
4727		KFREE(in);
4728		nat_stats.ns_rules--;
4729#if SOLARIS && !defined(_INET_IP_STACK_H)
4730		if (nat_stats.ns_rules == 0)
4731			pfil_delayed_copy = 1;
4732#endif
4733	}
4734}
4735
4736
4737/* ------------------------------------------------------------------------ */
4738/* Function:    fr_natderef                                                 */
4739/* Returns:     Nil                                                         */
4740/* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4741/*                                                                          */
4742/* Decrement the reference counter for this NAT table entry and free it if  */
4743/* there are no more things using it.                                       */
4744/*                                                                          */
4745/* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4746/* structure *because* it only gets called on paths _after_ nat_ref has been*/
4747/* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4748/* because nat_delete() will do that and send nat_ref to -1.                */
4749/*                                                                          */
4750/* Holding the lock on nat_lock is required to serialise nat_delete() being */
4751/* called from a NAT flush ioctl with a deref happening because of a packet.*/
4752/* ------------------------------------------------------------------------ */
4753void fr_natderef(natp)
4754nat_t **natp;
4755{
4756	nat_t *nat;
4757
4758	nat = *natp;
4759	*natp = NULL;
4760
4761	MUTEX_ENTER(&nat->nat_lock);
4762	if (nat->nat_ref > 1) {
4763		nat->nat_ref--;
4764		MUTEX_EXIT(&nat->nat_lock);
4765		return;
4766	}
4767	MUTEX_EXIT(&nat->nat_lock);
4768
4769	WRITE_ENTER(&ipf_nat);
4770	nat_delete(nat, NL_EXPIRE);
4771	RWLOCK_EXIT(&ipf_nat);
4772}
4773
4774
4775/* ------------------------------------------------------------------------ */
4776/* Function:    fr_natclone                                                 */
4777/* Returns:     ipstate_t* - NULL == cloning failed,                        */
4778/*                           else pointer to new state structure            */
4779/* Parameters:  fin(I) - pointer to packet information                      */
4780/*              is(I)  - pointer to master state structure                  */
4781/* Write Lock:  ipf_nat                                                     */
4782/*                                                                          */
4783/* Create a "duplcate" state table entry from the master.                   */
4784/* ------------------------------------------------------------------------ */
4785static nat_t *fr_natclone(fin, nat)
4786fr_info_t *fin;
4787nat_t *nat;
4788{
4789	frentry_t *fr;
4790	nat_t *clone;
4791	ipnat_t *np;
4792
4793	KMALLOC(clone, nat_t *);
4794	if (clone == NULL)
4795		return NULL;
4796	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4797
4798	MUTEX_NUKE(&clone->nat_lock);
4799
4800	clone->nat_aps = NULL;
4801	/*
4802	 * Initialize all these so that nat_delete() doesn't cause a crash.
4803	 */
4804	clone->nat_tqe.tqe_pnext = NULL;
4805	clone->nat_tqe.tqe_next = NULL;
4806	clone->nat_tqe.tqe_ifq = NULL;
4807	clone->nat_tqe.tqe_parent = clone;
4808
4809	clone->nat_flags &= ~SI_CLONE;
4810	clone->nat_flags |= SI_CLONED;
4811
4812	if (clone->nat_hm)
4813		clone->nat_hm->hm_ref++;
4814
4815	if (nat_insert(clone, fin->fin_rev) == -1) {
4816		KFREE(clone);
4817		return NULL;
4818	}
4819	np = clone->nat_ptr;
4820	if (np != NULL) {
4821		if (nat_logging)
4822			nat_log(clone, (u_int)np->in_redir);
4823		np->in_use++;
4824	}
4825	fr = clone->nat_fr;
4826	if (fr != NULL) {
4827		MUTEX_ENTER(&fr->fr_lock);
4828		fr->fr_ref++;
4829		MUTEX_EXIT(&fr->fr_lock);
4830	}
4831
4832	/*
4833	 * Because the clone is created outside the normal loop of things and
4834	 * TCP has special needs in terms of state, initialise the timeout
4835	 * state of the new NAT from here.
4836	 */
4837	if (clone->nat_p == IPPROTO_TCP) {
4838		(void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4839				  clone->nat_flags);
4840	}
4841#ifdef	IPFILTER_SYNC
4842	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4843#endif
4844	if (nat_logging)
4845		nat_log(clone, NL_CLONE);
4846	return clone;
4847}
4848
4849
4850/* ------------------------------------------------------------------------ */
4851/* Function:   nat_wildok                                                   */
4852/* Returns:    int - 1 == packet's ports match wildcards                    */
4853/*                   0 == packet's ports don't match wildcards              */
4854/* Parameters: nat(I)   - NAT entry                                         */
4855/*             sport(I) - source port                                       */
4856/*             dport(I) - destination port                                  */
4857/*             flags(I) - wildcard flags                                    */
4858/*             dir(I)   - packet direction                                  */
4859/*                                                                          */
4860/* Use NAT entry and packet direction to determine which combination of     */
4861/* wildcard flags should be used.                                           */
4862/* ------------------------------------------------------------------------ */
4863static int nat_wildok(nat, sport, dport, flags, dir)
4864nat_t *nat;
4865int sport;
4866int dport;
4867int flags;
4868int dir;
4869{
4870	/*
4871	 * When called by       dir is set to
4872	 * nat_inlookup         NAT_INBOUND (0)
4873	 * nat_outlookup        NAT_OUTBOUND (1)
4874	 *
4875	 * We simply combine the packet's direction in dir with the original
4876	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4877	 * which combination of wildcard flags to allow.
4878	 */
4879
4880	switch ((dir << 1) | nat->nat_dir)
4881	{
4882	case 3: /* outbound packet / outbound entry */
4883		if (((nat->nat_inport == sport) ||
4884		    (flags & SI_W_SPORT)) &&
4885		    ((nat->nat_oport == dport) ||
4886		    (flags & SI_W_DPORT)))
4887			return 1;
4888		break;
4889	case 2: /* outbound packet / inbound entry */
4890		if (((nat->nat_outport == sport) ||
4891		    (flags & SI_W_DPORT)) &&
4892		    ((nat->nat_oport == dport) ||
4893		    (flags & SI_W_SPORT)))
4894			return 1;
4895		break;
4896	case 1: /* inbound packet / outbound entry */
4897		if (((nat->nat_oport == sport) ||
4898		    (flags & SI_W_DPORT)) &&
4899		    ((nat->nat_outport == dport) ||
4900		    (flags & SI_W_SPORT)))
4901			return 1;
4902		break;
4903	case 0: /* inbound packet / inbound entry */
4904		if (((nat->nat_oport == sport) ||
4905		    (flags & SI_W_SPORT)) &&
4906		    ((nat->nat_outport == dport) ||
4907		    (flags & SI_W_DPORT)))
4908			return 1;
4909		break;
4910	default:
4911		break;
4912	}
4913
4914	return(0);
4915}
4916
4917
4918/* ------------------------------------------------------------------------ */
4919/* Function:    nat_mssclamp                                                */
4920/* Returns:     Nil                                                         */
4921/* Parameters:  tcp(I)    - pointer to TCP header                           */
4922/*              maxmss(I) - value to clamp the TCP MSS to                   */
4923/*              fin(I)    - pointer to packet information                   */
4924/*              csump(I)  - pointer to TCP checksum                         */
4925/*                                                                          */
4926/* Check for MSS option and clamp it if necessary.  If found and changed,   */
4927/* then the TCP header checksum will be updated to reflect the change in    */
4928/* the MSS.                                                                 */
4929/* ------------------------------------------------------------------------ */
4930static void nat_mssclamp(tcp, maxmss, fin, csump)
4931tcphdr_t *tcp;
4932u_32_t maxmss;
4933fr_info_t *fin;
4934u_short *csump;
4935{
4936	u_char *cp, *ep, opt;
4937	int hlen, advance;
4938	u_32_t mss, sumd;
4939
4940	hlen = TCP_OFF(tcp) << 2;
4941	if (hlen > sizeof(*tcp)) {
4942		cp = (u_char *)tcp + sizeof(*tcp);
4943		ep = (u_char *)tcp + hlen;
4944
4945		while (cp < ep) {
4946			opt = cp[0];
4947			if (opt == TCPOPT_EOL)
4948				break;
4949			else if (opt == TCPOPT_NOP) {
4950				cp++;
4951				continue;
4952			}
4953
4954			if (cp + 1 >= ep)
4955				break;
4956			advance = cp[1];
4957			if ((cp + advance > ep) || (advance <= 0))
4958				break;
4959			switch (opt)
4960			{
4961			case TCPOPT_MAXSEG:
4962				if (advance != 4)
4963					break;
4964				mss = cp[2] * 256 + cp[3];
4965				if (mss > maxmss) {
4966					cp[2] = maxmss / 256;
4967					cp[3] = maxmss & 0xff;
4968					CALC_SUMD(mss, maxmss, sumd);
4969					fix_outcksum(fin, csump, sumd);
4970				}
4971				break;
4972			default:
4973				/* ignore unknown options */
4974				break;
4975			}
4976
4977			cp += advance;
4978		}
4979	}
4980}
4981
4982
4983/* ------------------------------------------------------------------------ */
4984/* Function:    fr_setnatqueue                                              */
4985/* Returns:     Nil                                                         */
4986/* Parameters:  nat(I)- pointer to NAT structure                            */
4987/*              rev(I) - forward(0) or reverse(1) direction                 */
4988/* Locks:       ipf_nat (read or write)                                     */
4989/*                                                                          */
4990/* Put the NAT entry on its default queue entry, using rev as a helped in   */
4991/* determining which queue it should be placed on.                          */
4992/* ------------------------------------------------------------------------ */
4993void fr_setnatqueue(nat, rev)
4994nat_t *nat;
4995int rev;
4996{
4997	ipftq_t *oifq, *nifq;
4998
4999	if (nat->nat_ptr != NULL)
5000		nifq = nat->nat_ptr->in_tqehead[rev];
5001	else
5002		nifq = NULL;
5003
5004	if (nifq == NULL) {
5005		switch (nat->nat_p)
5006		{
5007		case IPPROTO_UDP :
5008			nifq = &nat_udptq;
5009			break;
5010		case IPPROTO_ICMP :
5011			nifq = &nat_icmptq;
5012			break;
5013		case IPPROTO_TCP :
5014			nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5015			break;
5016		default :
5017			nifq = &nat_iptq;
5018			break;
5019		}
5020	}
5021
5022	oifq = nat->nat_tqe.tqe_ifq;
5023	/*
5024	 * If it's currently on a timeout queue, move it from one queue to
5025	 * another, else put it on the end of the newly determined queue.
5026	 */
5027	if (oifq != NULL)
5028		fr_movequeue(&nat->nat_tqe, oifq, nifq);
5029	else
5030		fr_queueappend(&nat->nat_tqe, nifq, nat);
5031	return;
5032}
5033
5034
5035/* ------------------------------------------------------------------------ */
5036/* Function:    nat_getnext                                                 */
5037/* Returns:     int - 0 == ok, else error                                   */
5038/* Parameters:  t(I)   - pointer to ipftoken structure                      */
5039/*              itp(I) - pointer to ipfgeniter_t structure                  */
5040/*                                                                          */
5041/* Fetch the next nat/ipnat structure pointer from the linked list and      */
5042/* copy it out to the storage space pointed to by itp_data.  The next item  */
5043/* in the list to look at is put back in the ipftoken struture.             */
5044/* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5045/* ipf_freetoken will call a deref function for us and we dont want to call */
5046/* that twice (second time would be in the second switch statement below.   */
5047/* ------------------------------------------------------------------------ */
5048static int nat_getnext(t, itp)
5049ipftoken_t *t;
5050ipfgeniter_t *itp;
5051{
5052	hostmap_t *hm, *nexthm = NULL, zerohm;
5053	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5054	nat_t *nat, *nextnat = NULL, zeronat;
5055	int error = 0, count;
5056	char *dst;
5057
5058	count = itp->igi_nitems;
5059	if (count < 1)
5060		return ENOSPC;
5061
5062	READ_ENTER(&ipf_nat);
5063
5064	switch (itp->igi_type)
5065	{
5066	case IPFGENITER_HOSTMAP :
5067		hm = t->ipt_data;
5068		if (hm == NULL) {
5069			nexthm = ipf_hm_maplist;
5070		} else {
5071			nexthm = hm->hm_next;
5072		}
5073		break;
5074
5075	case IPFGENITER_IPNAT :
5076		ipn = t->ipt_data;
5077		if (ipn == NULL) {
5078			nextipnat = nat_list;
5079		} else {
5080			nextipnat = ipn->in_next;
5081		}
5082		break;
5083
5084	case IPFGENITER_NAT :
5085		nat = t->ipt_data;
5086		if (nat == NULL) {
5087			nextnat = nat_instances;
5088		} else {
5089			nextnat = nat->nat_next;
5090		}
5091		break;
5092	default :
5093		RWLOCK_EXIT(&ipf_nat);
5094		return EINVAL;
5095	}
5096
5097	dst = itp->igi_data;
5098	for (;;) {
5099		switch (itp->igi_type)
5100		{
5101		case IPFGENITER_HOSTMAP :
5102			if (nexthm != NULL) {
5103				if (count == 1) {
5104					ATOMIC_INC32(nexthm->hm_ref);
5105					t->ipt_data = nexthm;
5106				}
5107			} else {
5108				bzero(&zerohm, sizeof(zerohm));
5109				nexthm = &zerohm;
5110				count = 1;
5111				t->ipt_data = NULL;
5112			}
5113			break;
5114
5115		case IPFGENITER_IPNAT :
5116			if (nextipnat != NULL) {
5117				if (count == 1) {
5118					MUTEX_ENTER(&nextipnat->in_lock);
5119					nextipnat->in_use++;
5120					MUTEX_EXIT(&nextipnat->in_lock);
5121					t->ipt_data = nextipnat;
5122				}
5123			} else {
5124				bzero(&zeroipn, sizeof(zeroipn));
5125				nextipnat = &zeroipn;
5126				count = 1;
5127				t->ipt_data = NULL;
5128			}
5129			break;
5130
5131		case IPFGENITER_NAT :
5132			if (nextnat != NULL) {
5133				if (count == 1) {
5134					MUTEX_ENTER(&nextnat->nat_lock);
5135					nextnat->nat_ref++;
5136					MUTEX_EXIT(&nextnat->nat_lock);
5137					t->ipt_data = nextnat;
5138				}
5139			} else {
5140				bzero(&zeronat, sizeof(zeronat));
5141				nextnat = &zeronat;
5142				count = 1;
5143				t->ipt_data = NULL;
5144			}
5145			break;
5146		default :
5147			break;
5148		}
5149		RWLOCK_EXIT(&ipf_nat);
5150
5151		/*
5152		 * Copying out to user space needs to be done without the lock.
5153		 */
5154		switch (itp->igi_type)
5155		{
5156		case IPFGENITER_HOSTMAP :
5157			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5158			if (error != 0)
5159				error = EFAULT;
5160			else
5161				dst += sizeof(*nexthm);
5162			break;
5163
5164		case IPFGENITER_IPNAT :
5165			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5166			if (error != 0)
5167				error = EFAULT;
5168			else
5169				dst += sizeof(*nextipnat);
5170			break;
5171
5172		case IPFGENITER_NAT :
5173			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5174			if (error != 0)
5175				error = EFAULT;
5176			else
5177				dst += sizeof(*nextnat);
5178			break;
5179		}
5180
5181		if ((count == 1) || (error != 0))
5182			break;
5183
5184		count--;
5185
5186		READ_ENTER(&ipf_nat);
5187
5188		/*
5189		 * We need to have the lock again here to make sure that
5190		 * using _next is consistent.
5191		 */
5192		switch (itp->igi_type)
5193		{
5194		case IPFGENITER_HOSTMAP :
5195			nexthm = nexthm->hm_next;
5196			break;
5197		case IPFGENITER_IPNAT :
5198			nextipnat = nextipnat->in_next;
5199			break;
5200		case IPFGENITER_NAT :
5201			nextnat = nextnat->nat_next;
5202			break;
5203		}
5204	}
5205
5206
5207	switch (itp->igi_type)
5208	{
5209	case IPFGENITER_HOSTMAP :
5210		if (hm != NULL) {
5211			WRITE_ENTER(&ipf_nat);
5212			fr_hostmapdel(&hm);
5213			RWLOCK_EXIT(&ipf_nat);
5214		}
5215		break;
5216	case IPFGENITER_IPNAT :
5217		if (ipn != NULL) {
5218			fr_ipnatderef(&ipn);
5219		}
5220		break;
5221	case IPFGENITER_NAT :
5222		if (nat != NULL) {
5223			fr_natderef(&nat);
5224		}
5225		break;
5226	default :
5227		break;
5228	}
5229
5230	return error;
5231}
5232
5233
5234/* ------------------------------------------------------------------------ */
5235/* Function:    nat_iterator                                                */
5236/* Returns:     int - 0 == ok, else error                                   */
5237/* Parameters:  token(I) - pointer to ipftoken structure                    */
5238/*              itp(I) - pointer to ipfgeniter_t structure                  */
5239/*                                                                          */
5240/* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5241/* generic structure to iterate through a list.  There are three different  */
5242/* linked lists of NAT related information to go through: NAT rules, active */
5243/* NAT mappings and the NAT fragment cache.                                 */
5244/* ------------------------------------------------------------------------ */
5245static int nat_iterator(token, itp)
5246ipftoken_t *token;
5247ipfgeniter_t *itp;
5248{
5249	int error;
5250
5251	if (itp->igi_data == NULL)
5252		return EFAULT;
5253
5254	token->ipt_subtype = itp->igi_type;
5255
5256	switch (itp->igi_type)
5257	{
5258	case IPFGENITER_HOSTMAP :
5259	case IPFGENITER_IPNAT :
5260	case IPFGENITER_NAT :
5261		error = nat_getnext(token, itp);
5262		break;
5263
5264	case IPFGENITER_NATFRAG :
5265#ifdef USE_MUTEXES
5266		error = fr_nextfrag(token, itp, &ipfr_natlist,
5267				    &ipfr_nattail, &ipf_natfrag);
5268#else
5269		error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5270#endif
5271		break;
5272	default :
5273		error = EINVAL;
5274		break;
5275	}
5276
5277	return error;
5278}
5279
5280
5281/* ------------------------------------------------------------------------ */
5282/* Function:    nat_extraflush                                              */
5283/* Returns:     int - 0 == success, -1 == failure                           */
5284/* Parameters:  which(I) - how to flush the active NAT table                */
5285/* Write Locks: ipf_nat                                                     */
5286/*                                                                          */
5287/* Flush nat tables.  Three actions currently defined:                      */
5288/* which == 0 : flush all nat table entries                                 */
5289/* which == 1 : flush TCP connections which have started to close but are   */
5290/*	      stuck for some reason.                                        */
5291/* which == 2 : flush TCP connections which have been idle for a long time, */
5292/*	      starting at > 4 days idle and working back in successive half-*/
5293/*	      days to at most 12 hours old.  If this fails to free enough   */
5294/*            slots then work backwards in half hour slots to 30 minutes.   */
5295/*            If that too fails, then work backwards in 30 second intervals */
5296/*            for the last 30 minutes to at worst 30 seconds idle.          */
5297/* ------------------------------------------------------------------------ */
5298static int nat_extraflush(which)
5299int which;
5300{
5301	ipftq_t *ifq, *ifqnext;
5302	nat_t *nat, **natp;
5303	ipftqent_t *tqn;
5304	int removed;
5305	SPL_INT(s);
5306
5307	removed = 0;
5308
5309	SPL_NET(s);
5310
5311	switch (which)
5312	{
5313	case 0 :
5314		/*
5315		 * Style 0 flush removes everything...
5316		 */
5317		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5318			nat_delete(nat, NL_FLUSH);
5319			removed++;
5320		}
5321		break;
5322
5323	case 1 :
5324		/*
5325		 * Since we're only interested in things that are closing,
5326		 * we can start with the appropriate timeout queue.
5327		 */
5328		for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5329		     ifq = ifq->ifq_next) {
5330
5331			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5332				nat = tqn->tqe_parent;
5333				tqn = tqn->tqe_next;
5334				if (nat->nat_p != IPPROTO_TCP)
5335					break;
5336				nat_delete(nat, NL_EXPIRE);
5337				removed++;
5338			}
5339		}
5340
5341		/*
5342		 * Also need to look through the user defined queues.
5343		 */
5344		for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5345			ifqnext = ifq->ifq_next;
5346			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5347				nat = tqn->tqe_parent;
5348				tqn = tqn->tqe_next;
5349				if (nat->nat_p != IPPROTO_TCP)
5350					continue;
5351
5352				if ((nat->nat_tcpstate[0] >
5353				     IPF_TCPS_ESTABLISHED) &&
5354				    (nat->nat_tcpstate[1] >
5355				     IPF_TCPS_ESTABLISHED)) {
5356					nat_delete(nat, NL_EXPIRE);
5357					removed++;
5358				}
5359			}
5360		}
5361		break;
5362
5363		/*
5364		 * Args 5-11 correspond to flushing those particular states
5365		 * for TCP connections.
5366		 */
5367	case IPF_TCPS_CLOSE_WAIT :
5368	case IPF_TCPS_FIN_WAIT_1 :
5369	case IPF_TCPS_CLOSING :
5370	case IPF_TCPS_LAST_ACK :
5371	case IPF_TCPS_FIN_WAIT_2 :
5372	case IPF_TCPS_TIME_WAIT :
5373	case IPF_TCPS_CLOSED :
5374		tqn = nat_tqb[which].ifq_head;
5375		while (tqn != NULL) {
5376			nat = tqn->tqe_parent;
5377			tqn = tqn->tqe_next;
5378			nat_delete(nat, NL_FLUSH);
5379			removed++;
5380		}
5381		break;
5382
5383	default :
5384		if (which < 30)
5385			break;
5386
5387		/*
5388		 * Take a large arbitrary number to mean the number of seconds
5389		 * for which which consider to be the maximum value we'll allow
5390		 * the expiration to be.
5391		 */
5392		which = IPF_TTLVAL(which);
5393		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5394			if (fr_ticks - nat->nat_touched > which) {
5395				nat_delete(nat, NL_FLUSH);
5396				removed++;
5397			} else
5398				natp = &nat->nat_next;
5399		}
5400		break;
5401	}
5402
5403	if (which != 2) {
5404		SPL_X(s);
5405		return removed;
5406	}
5407
5408	/*
5409	 * Asked to remove inactive entries because the table is full.
5410	 */
5411	if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5412		nat_last_force_flush = fr_ticks;
5413		removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5414	}
5415
5416	SPL_X(s);
5417	return removed;
5418}
5419
5420
5421/* ------------------------------------------------------------------------ */
5422/* Function:    nat_flush_entry                                             */
5423/* Returns:     0 - always succeeds                                         */
5424/* Parameters:  entry(I) - pointer to NAT entry                             */
5425/* Write Locks: ipf_nat                                                     */
5426/*                                                                          */
5427/* This function is a stepping stone between ipf_queueflush() and           */
5428/* nat_dlete().  It is used so we can provide a uniform interface via the   */
5429/* ipf_queueflush() function.  Since the nat_delete() function returns void */
5430/* we translate that to mean it always succeeds in deleting something.      */
5431/* ------------------------------------------------------------------------ */
5432static int nat_flush_entry(entry)
5433void *entry;
5434{
5435	nat_delete(entry, NL_FLUSH);
5436	return 0;
5437}
5438
5439
5440/* ------------------------------------------------------------------------ */
5441/* Function:    nat_gettable                                                */
5442/* Returns:     int     - 0 = success, else error                           */
5443/* Parameters:  data(I) - pointer to ioctl data                             */
5444/*                                                                          */
5445/* This function handles ioctl requests for tables of nat information.      */
5446/* At present the only table it deals with is the hash bucket statistics.   */
5447/* ------------------------------------------------------------------------ */
5448static int nat_gettable(data)
5449char *data;
5450{
5451	ipftable_t table;
5452	int error;
5453
5454	error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5455	if (error != 0)
5456		return error;
5457
5458	switch (table.ita_type)
5459	{
5460	case IPFTABLE_BUCKETS_NATIN :
5461		error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5462				ipf_nattable_sz * sizeof(u_long));
5463		break;
5464
5465	case IPFTABLE_BUCKETS_NATOUT :
5466		error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5467				ipf_nattable_sz * sizeof(u_long));
5468		break;
5469
5470	default :
5471		return EINVAL;
5472	}
5473
5474	if (error != 0) {
5475		error = EFAULT;
5476	}
5477	return error;
5478}
5479