ip_nat.c revision 172776
1/*	$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 172776 2007-10-18 21:52:14Z darrenr $	*/
2
3/*
4 * Copyright (C) 1995-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#if defined(KERNEL) || defined(_KERNEL)
9# undef KERNEL
10# undef _KERNEL
11# define        KERNEL	1
12# define        _KERNEL	1
13#endif
14#include <sys/errno.h>
15#include <sys/types.h>
16#include <sys/param.h>
17#include <sys/time.h>
18#include <sys/file.h>
19#if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20    (__NetBSD_Version__ >= 399002000)
21# include <sys/kauth.h>
22#endif
23#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24    defined(_KERNEL)
25#if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26#  include "opt_ipfilter_log.h"
27# else
28#  include "opt_ipfilter.h"
29# endif
30#endif
31#if !defined(_KERNEL)
32# include <stdio.h>
33# include <string.h>
34# include <stdlib.h>
35# define _KERNEL
36# ifdef __OpenBSD__
37struct file;
38# endif
39# include <sys/uio.h>
40# undef _KERNEL
41#endif
42#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43# include <sys/filio.h>
44# include <sys/fcntl.h>
45#else
46# include <sys/ioctl.h>
47#endif
48#if !defined(AIX)
49# include <sys/fcntl.h>
50#endif
51#if !defined(linux)
52# include <sys/protosw.h>
53#endif
54#include <sys/socket.h>
55#if defined(_KERNEL)
56# include <sys/systm.h>
57# if !defined(__SVR4) && !defined(__svr4__)
58#  include <sys/mbuf.h>
59# endif
60#endif
61#if defined(__SVR4) || defined(__svr4__)
62# include <sys/filio.h>
63# include <sys/byteorder.h>
64# ifdef _KERNEL
65#  include <sys/dditypes.h>
66# endif
67# include <sys/stream.h>
68# include <sys/kmem.h>
69#endif
70#if __FreeBSD_version >= 300000
71# include <sys/queue.h>
72#endif
73#include <net/if.h>
74#if __FreeBSD_version >= 300000
75# include <net/if_var.h>
76# if defined(_KERNEL) && !defined(IPFILTER_LKM)
77#  include "opt_ipfilter.h"
78# endif
79#endif
80#ifdef sun
81# include <net/af.h>
82#endif
83#include <net/route.h>
84#include <netinet/in.h>
85#include <netinet/in_systm.h>
86#include <netinet/ip.h>
87
88#ifdef RFC1825
89# include <vpn/md5.h>
90# include <vpn/ipsec.h>
91extern struct ifnet vpnif;
92#endif
93
94#if !defined(linux)
95# include <netinet/ip_var.h>
96#endif
97#include <netinet/tcp.h>
98#include <netinet/udp.h>
99#include <netinet/ip_icmp.h>
100#include "netinet/ip_compat.h"
101#include <netinet/tcpip.h>
102#include "netinet/ip_fil.h"
103#include "netinet/ip_nat.h"
104#include "netinet/ip_frag.h"
105#include "netinet/ip_state.h"
106#include "netinet/ip_proxy.h"
107#ifdef	IPFILTER_SYNC
108#include "netinet/ip_sync.h"
109#endif
110#if (__FreeBSD_version >= 300000)
111# include <sys/malloc.h>
112#endif
113/* END OF INCLUDES */
114
115#undef	SOCKADDR_IN
116#define	SOCKADDR_IN	struct sockaddr_in
117
118#if !defined(lint)
119static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
120static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 172776 2007-10-18 21:52:14Z darrenr $";
121/* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122#endif
123
124
125/* ======================================================================== */
126/* How the NAT is organised and works.                                      */
127/*                                                                          */
128/* Inside (interface y) NAT       Outside (interface x)                     */
129/* -------------------- -+- -------------------------------------           */
130/* Packet going          |   out, processsed by fr_checknatout() for x      */
131/* ------------>         |   ------------>                                  */
132/* src=10.1.1.1          |   src=192.1.1.1                                  */
133/*                       |                                                  */
134/*                       |   in, processed by fr_checknatin() for x         */
135/* <------------         |   <------------                                  */
136/* dst=10.1.1.1          |   dst=192.1.1.1                                  */
137/* -------------------- -+- -------------------------------------           */
138/* fr_checknatout() - changes ip_src and if required, sport                 */
139/*             - creates a new mapping, if required.                        */
140/* fr_checknatin()  - changes ip_dst and if required, dport                 */
141/*                                                                          */
142/* In the NAT table, internal source is recorded as "in" and externally     */
143/* seen as "out".                                                           */
144/* ======================================================================== */
145
146
147nat_t	**nat_table[2] = { NULL, NULL },
148	*nat_instances = NULL;
149ipnat_t	*nat_list = NULL;
150u_int	ipf_nattable_max = NAT_TABLE_MAX;
151u_int	ipf_nattable_sz = NAT_TABLE_SZ;
152u_int	ipf_natrules_sz = NAT_SIZE;
153u_int	ipf_rdrrules_sz = RDR_SIZE;
154u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
155u_int	fr_nat_maxbucket = 0,
156	fr_nat_maxbucket_reset = 1;
157u_32_t	nat_masks = 0;
158u_32_t	rdr_masks = 0;
159u_long	nat_last_force_flush = 0;
160ipnat_t	**nat_rules = NULL;
161ipnat_t	**rdr_rules = NULL;
162hostmap_t	**ipf_hm_maptable  = NULL;
163hostmap_t	*ipf_hm_maplist  = NULL;
164ipftq_t	nat_tqb[IPF_TCP_NSTATES];
165ipftq_t	nat_udptq;
166ipftq_t	nat_icmptq;
167ipftq_t	nat_iptq;
168ipftq_t	*nat_utqe = NULL;
169int	fr_nat_doflush = 0;
170#ifdef  IPFILTER_LOG
171int	nat_logging = 1;
172#else
173int	nat_logging = 0;
174#endif
175
176u_long	fr_defnatage = DEF_NAT_AGE,
177	fr_defnatipage = 120,		/* 60 seconds */
178	fr_defnaticmpage = 6;		/* 3 seconds */
179natstat_t nat_stats;
180int	fr_nat_lock = 0;
181int	fr_nat_init = 0;
182#if SOLARIS && !defined(_INET_IP_STACK_H)
183extern	int		pfil_delayed_copy;
184#endif
185
186static	int	nat_flush_entry __P((void *));
187static	int	nat_flushtable __P((void));
188static	int	nat_clearlist __P((void));
189static	void	nat_addnat __P((struct ipnat *));
190static	void	nat_addrdr __P((struct ipnat *));
191static	void	nat_delrdr __P((struct ipnat *));
192static	void	nat_delnat __P((struct ipnat *));
193static	int	fr_natgetent __P((caddr_t));
194static	int	fr_natgetsz __P((caddr_t));
195static	int	fr_natputent __P((caddr_t, int));
196static	int	nat_extraflush __P((int));
197static	int	nat_gettable __P((char *));
198static	void	nat_tabmove __P((nat_t *));
199static	int	nat_match __P((fr_info_t *, ipnat_t *));
200static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203				    struct in_addr, struct in_addr, u_32_t));
204static	int	nat_icmpquerytype4 __P((int));
205static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207static	int	nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208				      tcphdr_t *, nat_t **, int));
209static	int	nat_resolverule __P((ipnat_t *));
210static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
211static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212static	int	nat_wildok __P((nat_t *, int, int, int, int));
213static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215
216
217/* ------------------------------------------------------------------------ */
218/* Function:    fr_natinit                                                  */
219/* Returns:     int - 0 == success, -1 == failure                           */
220/* Parameters:  Nil                                                         */
221/*                                                                          */
222/* Initialise all of the NAT locks, tables and other structures.            */
223/* ------------------------------------------------------------------------ */
224int fr_natinit()
225{
226	int i;
227
228	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229	if (nat_table[0] != NULL)
230		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231	else
232		return -1;
233
234	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235	if (nat_table[1] != NULL)
236		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237	else
238		return -2;
239
240	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241	if (nat_rules != NULL)
242		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243	else
244		return -3;
245
246	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247	if (rdr_rules != NULL)
248		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249	else
250		return -4;
251
252	KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253		 sizeof(hostmap_t *) * ipf_hostmap_sz);
254	if (ipf_hm_maptable != NULL)
255		bzero((char *)ipf_hm_maptable,
256		      sizeof(hostmap_t *) * ipf_hostmap_sz);
257	else
258		return -5;
259	ipf_hm_maplist = NULL;
260
261	KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262		 ipf_nattable_sz * sizeof(u_long));
263	if (nat_stats.ns_bucketlen[0] == NULL)
264		return -6;
265	bzero((char *)nat_stats.ns_bucketlen[0],
266	      ipf_nattable_sz * sizeof(u_long));
267
268	KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269		 ipf_nattable_sz * sizeof(u_long));
270	if (nat_stats.ns_bucketlen[1] == NULL)
271		return -7;
272
273	bzero((char *)nat_stats.ns_bucketlen[1],
274	      ipf_nattable_sz * sizeof(u_long));
275
276	if (fr_nat_maxbucket == 0) {
277		for (i = ipf_nattable_sz; i > 0; i >>= 1)
278			fr_nat_maxbucket++;
279		fr_nat_maxbucket *= 2;
280	}
281
282	fr_sttab_init(nat_tqb);
283	/*
284	 * Increase this because we may have "keep state" following this too
285	 * and packet storms can occur if this is removed too quickly.
286	 */
287	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288	nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289	nat_udptq.ifq_ttl = fr_defnatage;
290	nat_udptq.ifq_ref = 1;
291	nat_udptq.ifq_head = NULL;
292	nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293	MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294	nat_udptq.ifq_next = &nat_icmptq;
295	nat_icmptq.ifq_ttl = fr_defnaticmpage;
296	nat_icmptq.ifq_ref = 1;
297	nat_icmptq.ifq_head = NULL;
298	nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299	MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300	nat_icmptq.ifq_next = &nat_iptq;
301	nat_iptq.ifq_ttl = fr_defnatipage;
302	nat_iptq.ifq_ref = 1;
303	nat_iptq.ifq_head = NULL;
304	nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305	MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306	nat_iptq.ifq_next = NULL;
307
308	for (i = 0; i < IPF_TCP_NSTATES; i++) {
309		if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310			nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311#ifdef LARGE_NAT
312		else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313			nat_tqb[i].ifq_ttl = fr_defnatage;
314#endif
315	}
316
317	/*
318	 * Increase this because we may have "keep state" following
319	 * this too and packet storms can occur if this is removed
320	 * too quickly.
321	 */
322	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323
324	RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325	RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326	MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327	MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328
329	fr_nat_init = 1;
330
331	return 0;
332}
333
334
335/* ------------------------------------------------------------------------ */
336/* Function:    nat_addrdr                                                  */
337/* Returns:     Nil                                                         */
338/* Parameters:  n(I) - pointer to NAT rule to add                           */
339/*                                                                          */
340/* Adds a redirect rule to the hash table of redirect rules and the list of */
341/* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
342/* use by redirect rules.                                                   */
343/* ------------------------------------------------------------------------ */
344static void nat_addrdr(n)
345ipnat_t *n;
346{
347	ipnat_t **np;
348	u_32_t j;
349	u_int hv;
350	int k;
351
352	k = count4bits(n->in_outmsk);
353	if ((k >= 0) && (k != 32))
354		rdr_masks |= 1 << k;
355	j = (n->in_outip & n->in_outmsk);
356	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357	np = rdr_rules + hv;
358	while (*np != NULL)
359		np = &(*np)->in_rnext;
360	n->in_rnext = NULL;
361	n->in_prnext = np;
362	n->in_hv = hv;
363	*np = n;
364}
365
366
367/* ------------------------------------------------------------------------ */
368/* Function:    nat_addnat                                                  */
369/* Returns:     Nil                                                         */
370/* Parameters:  n(I) - pointer to NAT rule to add                           */
371/*                                                                          */
372/* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
373/* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
374/* redirect rules.                                                          */
375/* ------------------------------------------------------------------------ */
376static void nat_addnat(n)
377ipnat_t *n;
378{
379	ipnat_t **np;
380	u_32_t j;
381	u_int hv;
382	int k;
383
384	k = count4bits(n->in_inmsk);
385	if ((k >= 0) && (k != 32))
386		nat_masks |= 1 << k;
387	j = (n->in_inip & n->in_inmsk);
388	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389	np = nat_rules + hv;
390	while (*np != NULL)
391		np = &(*np)->in_mnext;
392	n->in_mnext = NULL;
393	n->in_pmnext = np;
394	n->in_hv = hv;
395	*np = n;
396}
397
398
399/* ------------------------------------------------------------------------ */
400/* Function:    nat_delrdr                                                  */
401/* Returns:     Nil                                                         */
402/* Parameters:  n(I) - pointer to NAT rule to delete                        */
403/*                                                                          */
404/* Removes a redirect rule from the hash table of redirect rules.           */
405/* ------------------------------------------------------------------------ */
406static void nat_delrdr(n)
407ipnat_t *n;
408{
409	if (n->in_rnext)
410		n->in_rnext->in_prnext = n->in_prnext;
411	*n->in_prnext = n->in_rnext;
412}
413
414
415/* ------------------------------------------------------------------------ */
416/* Function:    nat_delnat                                                  */
417/* Returns:     Nil                                                         */
418/* Parameters:  n(I) - pointer to NAT rule to delete                        */
419/*                                                                          */
420/* Removes a NAT map rule from the hash table of NAT map rules.             */
421/* ------------------------------------------------------------------------ */
422static void nat_delnat(n)
423ipnat_t *n;
424{
425	if (n->in_mnext != NULL)
426		n->in_mnext->in_pmnext = n->in_pmnext;
427	*n->in_pmnext = n->in_mnext;
428}
429
430
431/* ------------------------------------------------------------------------ */
432/* Function:    nat_hostmap                                                 */
433/* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
434/*                                else a pointer to the hostmapping to use  */
435/* Parameters:  np(I)   - pointer to NAT rule                               */
436/*              real(I) - real IP address                                   */
437/*              map(I)  - mapped IP address                                 */
438/*              port(I) - destination port number                           */
439/* Write Locks: ipf_nat                                                     */
440/*                                                                          */
441/* Check if an ip address has already been allocated for a given mapping    */
442/* that is not doing port based translation.  If is not yet allocated, then */
443/* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
444/* ------------------------------------------------------------------------ */
445static struct hostmap *nat_hostmap(np, src, dst, map, port)
446ipnat_t *np;
447struct in_addr src;
448struct in_addr dst;
449struct in_addr map;
450u_32_t port;
451{
452	hostmap_t *hm;
453	u_int hv;
454
455	hv = (src.s_addr ^ dst.s_addr);
456	hv += src.s_addr;
457	hv += dst.s_addr;
458	hv %= HOSTMAP_SIZE;
459	for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460		if ((hm->hm_srcip.s_addr == src.s_addr) &&
461		    (hm->hm_dstip.s_addr == dst.s_addr) &&
462		    ((np == NULL) || (np == hm->hm_ipnat)) &&
463		    ((port == 0) || (port == hm->hm_port))) {
464			hm->hm_ref++;
465			return hm;
466		}
467
468	if (np == NULL)
469		return NULL;
470
471	KMALLOC(hm, hostmap_t *);
472	if (hm) {
473		hm->hm_next = ipf_hm_maplist;
474		hm->hm_pnext = &ipf_hm_maplist;
475		if (ipf_hm_maplist != NULL)
476			ipf_hm_maplist->hm_pnext = &hm->hm_next;
477		ipf_hm_maplist = hm;
478		hm->hm_hnext = ipf_hm_maptable[hv];
479		hm->hm_phnext = ipf_hm_maptable + hv;
480		if (ipf_hm_maptable[hv] != NULL)
481			ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482		ipf_hm_maptable[hv] = hm;
483		hm->hm_ipnat = np;
484		hm->hm_srcip = src;
485		hm->hm_dstip = dst;
486		hm->hm_mapip = map;
487		hm->hm_ref = 1;
488		hm->hm_port = port;
489	}
490	return hm;
491}
492
493
494/* ------------------------------------------------------------------------ */
495/* Function:    fr_hostmapdel                                               */
496/* Returns:     Nil                                                         */
497/* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
498/* Write Locks: ipf_nat                                                     */
499/*                                                                          */
500/* Decrement the references to this hostmap structure by one.  If this      */
501/* reaches zero then remove it and free it.                                 */
502/* ------------------------------------------------------------------------ */
503void fr_hostmapdel(hmp)
504struct hostmap **hmp;
505{
506	struct hostmap *hm;
507
508	hm = *hmp;
509	*hmp = NULL;
510
511	hm->hm_ref--;
512	if (hm->hm_ref == 0) {
513		if (hm->hm_hnext)
514			hm->hm_hnext->hm_phnext = hm->hm_phnext;
515		*hm->hm_phnext = hm->hm_hnext;
516		if (hm->hm_next)
517			hm->hm_next->hm_pnext = hm->hm_pnext;
518		*hm->hm_pnext = hm->hm_next;
519		KFREE(hm);
520	}
521}
522
523
524/* ------------------------------------------------------------------------ */
525/* Function:    fix_outcksum                                                */
526/* Returns:     Nil                                                         */
527/* Parameters:  fin(I) - pointer to packet information                      */
528/*              sp(I)  - location of 16bit checksum to update               */
529/*              n((I)  - amount to adjust checksum by                       */
530/*                                                                          */
531/* Adjusts the 16bit checksum by "n" for packets going out.                 */
532/* ------------------------------------------------------------------------ */
533void fix_outcksum(fin, sp, n)
534fr_info_t *fin;
535u_short *sp;
536u_32_t n;
537{
538	u_short sumshort;
539	u_32_t sum1;
540
541	if (n == 0)
542		return;
543
544	if (n & NAT_HW_CKSUM) {
545		n &= 0xffff;
546		n += fin->fin_dlen;
547		n = (n & 0xffff) + (n >> 16);
548		*sp = n & 0xffff;
549		return;
550	}
551	sum1 = (~ntohs(*sp)) & 0xffff;
552	sum1 += (n);
553	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554	/* Again */
555	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556	sumshort = ~(u_short)sum1;
557	*(sp) = htons(sumshort);
558}
559
560
561/* ------------------------------------------------------------------------ */
562/* Function:    fix_incksum                                                 */
563/* Returns:     Nil                                                         */
564/* Parameters:  fin(I) - pointer to packet information                      */
565/*              sp(I)  - location of 16bit checksum to update               */
566/*              n((I)  - amount to adjust checksum by                       */
567/*                                                                          */
568/* Adjusts the 16bit checksum by "n" for packets going in.                  */
569/* ------------------------------------------------------------------------ */
570void fix_incksum(fin, sp, n)
571fr_info_t *fin;
572u_short *sp;
573u_32_t n;
574{
575	u_short sumshort;
576	u_32_t sum1;
577
578	if (n == 0)
579		return;
580
581	if (n & NAT_HW_CKSUM) {
582		n &= 0xffff;
583		n += fin->fin_dlen;
584		n = (n & 0xffff) + (n >> 16);
585		*sp = n & 0xffff;
586		return;
587	}
588	sum1 = (~ntohs(*sp)) & 0xffff;
589	sum1 += ~(n) & 0xffff;
590	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591	/* Again */
592	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593	sumshort = ~(u_short)sum1;
594	*(sp) = htons(sumshort);
595}
596
597
598/* ------------------------------------------------------------------------ */
599/* Function:    fix_datacksum                                               */
600/* Returns:     Nil                                                         */
601/* Parameters:  sp(I)  - location of 16bit checksum to update               */
602/*              n((I)  - amount to adjust checksum by                       */
603/*                                                                          */
604/* Fix_datacksum is used *only* for the adjustments of checksums in the     */
605/* data section of an IP packet.                                            */
606/*                                                                          */
607/* The only situation in which you need to do this is when NAT'ing an       */
608/* ICMP error message. Such a message, contains in its body the IP header   */
609/* of the original IP packet, that causes the error.                        */
610/*                                                                          */
611/* You can't use fix_incksum or fix_outcksum in that case, because for the  */
612/* kernel the data section of the ICMP error is just data, and no special   */
613/* processing like hardware cksum or ntohs processing have been done by the */
614/* kernel on the data section.                                              */
615/* ------------------------------------------------------------------------ */
616void fix_datacksum(sp, n)
617u_short *sp;
618u_32_t n;
619{
620	u_short sumshort;
621	u_32_t sum1;
622
623	if (n == 0)
624		return;
625
626	sum1 = (~ntohs(*sp)) & 0xffff;
627	sum1 += (n);
628	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629	/* Again */
630	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631	sumshort = ~(u_short)sum1;
632	*(sp) = htons(sumshort);
633}
634
635
636/* ------------------------------------------------------------------------ */
637/* Function:    fr_nat_ioctl                                                */
638/* Returns:     int - 0 == success, != 0 == failure                         */
639/* Parameters:  data(I) - pointer to ioctl data                             */
640/*              cmd(I)  - ioctl command integer                             */
641/*              mode(I) - file mode bits used with open                     */
642/*                                                                          */
643/* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644/* ------------------------------------------------------------------------ */
645int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646ioctlcmd_t cmd;
647caddr_t data;
648int mode, uid;
649void *ctx;
650{
651	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652	int error = 0, ret, arg, getlock;
653	ipnat_t natd;
654	SPL_INT(s);
655
656#if (BSD >= 199306) && defined(_KERNEL)
657# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658	if ((mode & FWRITE) &&
659	     kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660				     KAUTH_REQ_NETWORK_FIREWALL_FW,
661				     NULL, NULL, NULL)) {
662		return EPERM;
663	}
664# else
665	if ((securelevel >= 3) && (mode & FWRITE)) {
666		return EPERM;
667	}
668# endif
669#endif
670
671#if defined(__osf__) && defined(_KERNEL)
672	getlock = 0;
673#else
674	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
675#endif
676
677	nat = NULL;     /* XXX gcc -Wuninitialized */
678	if (cmd == (ioctlcmd_t)SIOCADNAT) {
679		KMALLOC(nt, ipnat_t *);
680	} else {
681		nt = NULL;
682	}
683
684	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
685		if (mode & NAT_SYSSPACE) {
686			bcopy(data, (char *)&natd, sizeof(natd));
687			error = 0;
688		} else {
689			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
690		}
691	}
692
693	if (error != 0)
694		goto done;
695
696	/*
697	 * For add/delete, look to see if the NAT entry is already present
698	 */
699	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
700		nat = &natd;
701		if (nat->in_v == 0)	/* For backward compat. */
702			nat->in_v = 4;
703		nat->in_flags &= IPN_USERFLAGS;
704		if ((nat->in_redir & NAT_MAPBLK) == 0) {
705			if ((nat->in_flags & IPN_SPLIT) == 0)
706				nat->in_inip &= nat->in_inmsk;
707			if ((nat->in_flags & IPN_IPRANGE) == 0)
708				nat->in_outip &= nat->in_outmsk;
709		}
710		MUTEX_ENTER(&ipf_natio);
711		for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
712			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
713					IPN_CMPSIZ) == 0) {
714				if (nat->in_redir == NAT_REDIRECT &&
715				    nat->in_pnext != n->in_pnext)
716					continue;
717				break;
718			}
719	}
720
721	switch (cmd)
722	{
723#ifdef  IPFILTER_LOG
724	case SIOCIPFFB :
725	{
726		int tmp;
727
728		if (!(mode & FWRITE))
729			error = EPERM;
730		else {
731			tmp = ipflog_clear(IPL_LOGNAT);
732			error = BCOPYOUT((char *)&tmp, (char *)data,
733					 sizeof(tmp));
734			if (error != 0)
735				error = EFAULT;
736		}
737		break;
738	}
739
740	case SIOCSETLG :
741		if (!(mode & FWRITE))
742			error = EPERM;
743		else {
744			error = BCOPYIN((char *)data, (char *)&nat_logging,
745					sizeof(nat_logging));
746			if (error != 0)
747				error = EFAULT;
748		}
749		break;
750
751	case SIOCGETLG :
752		error = BCOPYOUT((char *)&nat_logging, (char *)data,
753				 sizeof(nat_logging));
754		if (error != 0)
755			error = EFAULT;
756		break;
757
758	case FIONREAD :
759		arg = iplused[IPL_LOGNAT];
760		error = BCOPYOUT(&arg, data, sizeof(arg));
761		if (error != 0)
762			error = EFAULT;
763		break;
764#endif
765	case SIOCADNAT :
766		if (!(mode & FWRITE)) {
767			error = EPERM;
768		} else if (n != NULL) {
769			error = EEXIST;
770		} else if (nt == NULL) {
771			error = ENOMEM;
772		}
773		if (error != 0) {
774			MUTEX_EXIT(&ipf_natio);
775			break;
776		}
777		bcopy((char *)nat, (char *)nt, sizeof(*n));
778		error = nat_siocaddnat(nt, np, getlock);
779		MUTEX_EXIT(&ipf_natio);
780		if (error == 0)
781			nt = NULL;
782		break;
783
784	case SIOCRMNAT :
785		if (!(mode & FWRITE)) {
786			error = EPERM;
787			n = NULL;
788		} else if (n == NULL) {
789			error = ESRCH;
790		}
791
792		if (error != 0) {
793			MUTEX_EXIT(&ipf_natio);
794			break;
795		}
796		nat_siocdelnat(n, np, getlock);
797
798		MUTEX_EXIT(&ipf_natio);
799		n = NULL;
800		break;
801
802	case SIOCGNATS :
803		nat_stats.ns_table[0] = nat_table[0];
804		nat_stats.ns_table[1] = nat_table[1];
805		nat_stats.ns_list = nat_list;
806		nat_stats.ns_maptable = ipf_hm_maptable;
807		nat_stats.ns_maplist = ipf_hm_maplist;
808		nat_stats.ns_nattab_sz = ipf_nattable_sz;
809		nat_stats.ns_nattab_max = ipf_nattable_max;
810		nat_stats.ns_rultab_sz = ipf_natrules_sz;
811		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
812		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
813		nat_stats.ns_instances = nat_instances;
814		nat_stats.ns_apslist = ap_sess_list;
815		nat_stats.ns_ticks = fr_ticks;
816		error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
817		break;
818
819	case SIOCGNATL :
820	    {
821		natlookup_t nl;
822
823		if (getlock) {
824			READ_ENTER(&ipf_nat);
825		}
826		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
827		if (error == 0) {
828			if (nat_lookupredir(&nl) != NULL) {
829				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
830			} else {
831				error = ESRCH;
832			}
833		}
834		if (getlock) {
835			RWLOCK_EXIT(&ipf_nat);
836		}
837		break;
838	    }
839
840	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
841		if (!(mode & FWRITE)) {
842			error = EPERM;
843			break;
844		}
845		if (getlock) {
846			WRITE_ENTER(&ipf_nat);
847		}
848
849		error = BCOPYIN(data, &arg, sizeof(arg));
850		if (error != 0)
851			error = EFAULT;
852		else {
853			if (arg == 0)
854				ret = nat_flushtable();
855			else if (arg == 1)
856				ret = nat_clearlist();
857			else
858				ret = nat_extraflush(arg);
859		}
860
861		if (getlock) {
862			RWLOCK_EXIT(&ipf_nat);
863		}
864		if (error == 0) {
865			error = BCOPYOUT(&ret, data, sizeof(ret));
866		}
867		break;
868
869	case SIOCPROXY :
870		error = appr_ioctl(data, cmd, mode, ctx);
871		break;
872
873	case SIOCSTLCK :
874		if (!(mode & FWRITE)) {
875			error = EPERM;
876		} else {
877			error = fr_lock(data, &fr_nat_lock);
878		}
879		break;
880
881	case SIOCSTPUT :
882		if ((mode & FWRITE) != 0) {
883			error = fr_natputent(data, getlock);
884		} else {
885			error = EACCES;
886		}
887		break;
888
889	case SIOCSTGSZ :
890		if (fr_nat_lock) {
891			if (getlock) {
892				READ_ENTER(&ipf_nat);
893			}
894			error = fr_natgetsz(data);
895			if (getlock) {
896				RWLOCK_EXIT(&ipf_nat);
897			}
898		} else
899			error = EACCES;
900		break;
901
902	case SIOCSTGET :
903		if (fr_nat_lock) {
904			if (getlock) {
905				READ_ENTER(&ipf_nat);
906			}
907			error = fr_natgetent(data);
908			if (getlock) {
909				RWLOCK_EXIT(&ipf_nat);
910			}
911		} else
912			error = EACCES;
913		break;
914
915	case SIOCGENITER :
916	    {
917		ipfgeniter_t iter;
918		ipftoken_t *token;
919
920		SPL_SCHED(s);
921		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
922		if (error == 0) {
923			token = ipf_findtoken(iter.igi_type, uid, ctx);
924			if (token != NULL) {
925				error  = nat_iterator(token, &iter);
926			}
927			RWLOCK_EXIT(&ipf_tokens);
928		}
929		SPL_X(s);
930		break;
931	    }
932
933	case SIOCIPFDELTOK :
934		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
935		if (error == 0) {
936			SPL_SCHED(s);
937			error = ipf_deltoken(arg, uid, ctx);
938			SPL_X(s);
939		} else {
940			error = EFAULT;
941		}
942		break;
943
944	case SIOCGTQTAB :
945		error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
946		break;
947
948	case SIOCGTABL :
949		error = nat_gettable(data);
950		break;
951
952	default :
953		error = EINVAL;
954		break;
955	}
956done:
957	if (nt != NULL)
958		KFREE(nt);
959	return error;
960}
961
962
963/* ------------------------------------------------------------------------ */
964/* Function:    nat_siocaddnat                                              */
965/* Returns:     int - 0 == success, != 0 == failure                         */
966/* Parameters:  n(I)       - pointer to new NAT rule                        */
967/*              np(I)      - pointer to where to insert new NAT rule        */
968/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
969/* Mutex Locks: ipf_natio                                                   */
970/*                                                                          */
971/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
972/* from information passed to the kernel, then add it  to the appropriate   */
973/* NAT rule table(s).                                                       */
974/* ------------------------------------------------------------------------ */
975static int nat_siocaddnat(n, np, getlock)
976ipnat_t *n, **np;
977int getlock;
978{
979	int error = 0, i, j;
980
981	if (nat_resolverule(n) != 0)
982		return ENOENT;
983
984	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
985		return EINVAL;
986
987	n->in_use = 0;
988	if (n->in_redir & NAT_MAPBLK)
989		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
990	else if (n->in_flags & IPN_AUTOPORTMAP)
991		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
992	else if (n->in_flags & IPN_IPRANGE)
993		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
994	else if (n->in_flags & IPN_SPLIT)
995		n->in_space = 2;
996	else if (n->in_outmsk != 0)
997		n->in_space = ~ntohl(n->in_outmsk);
998	else
999		n->in_space = 1;
1000
1001	/*
1002	 * Calculate the number of valid IP addresses in the output
1003	 * mapping range.  In all cases, the range is inclusive of
1004	 * the start and ending IP addresses.
1005	 * If to a CIDR address, lose 2: broadcast + network address
1006	 *                               (so subtract 1)
1007	 * If to a range, add one.
1008	 * If to a single IP address, set to 1.
1009	 */
1010	if (n->in_space) {
1011		if ((n->in_flags & IPN_IPRANGE) != 0)
1012			n->in_space += 1;
1013		else
1014			n->in_space -= 1;
1015	} else
1016		n->in_space = 1;
1017
1018	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1019	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1020		n->in_nip = ntohl(n->in_outip) + 1;
1021	else if ((n->in_flags & IPN_SPLIT) &&
1022		 (n->in_redir & NAT_REDIRECT))
1023		n->in_nip = ntohl(n->in_inip);
1024	else
1025		n->in_nip = ntohl(n->in_outip);
1026	if (n->in_redir & NAT_MAP) {
1027		n->in_pnext = ntohs(n->in_pmin);
1028		/*
1029		 * Multiply by the number of ports made available.
1030		 */
1031		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1032			n->in_space *= (ntohs(n->in_pmax) -
1033					ntohs(n->in_pmin) + 1);
1034			/*
1035			 * Because two different sources can map to
1036			 * different destinations but use the same
1037			 * local IP#/port #.
1038			 * If the result is smaller than in_space, then
1039			 * we may have wrapped around 32bits.
1040			 */
1041			i = n->in_inmsk;
1042			if ((i != 0) && (i != 0xffffffff)) {
1043				j = n->in_space * (~ntohl(i) + 1);
1044				if (j >= n->in_space)
1045					n->in_space = j;
1046				else
1047					n->in_space = 0xffffffff;
1048			}
1049		}
1050		/*
1051		 * If no protocol is specified, multiple by 256 to allow for
1052		 * at least one IP:IP mapping per protocol.
1053		 */
1054		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1055				j = n->in_space * 256;
1056				if (j >= n->in_space)
1057					n->in_space = j;
1058				else
1059					n->in_space = 0xffffffff;
1060		}
1061	}
1062
1063	/* Otherwise, these fields are preset */
1064
1065	if (getlock) {
1066		WRITE_ENTER(&ipf_nat);
1067	}
1068	n->in_next = NULL;
1069	*np = n;
1070
1071	if (n->in_age[0] != 0)
1072		n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1073
1074	if (n->in_age[1] != 0)
1075		n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1076
1077	if (n->in_redir & NAT_REDIRECT) {
1078		n->in_flags &= ~IPN_NOTDST;
1079		nat_addrdr(n);
1080	}
1081	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1082		n->in_flags &= ~IPN_NOTSRC;
1083		nat_addnat(n);
1084	}
1085	MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1086
1087	n = NULL;
1088	nat_stats.ns_rules++;
1089#if SOLARIS && !defined(_INET_IP_STACK_H)
1090	pfil_delayed_copy = 0;
1091#endif
1092	if (getlock) {
1093		RWLOCK_EXIT(&ipf_nat);			/* WRITE */
1094	}
1095
1096	return error;
1097}
1098
1099
1100/* ------------------------------------------------------------------------ */
1101/* Function:    nat_resolvrule                                              */
1102/* Returns:     Nil                                                         */
1103/* Parameters:  n(I)  - pointer to NAT rule                                 */
1104/*                                                                          */
1105/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1106/* from information passed to the kernel, then add it  to the appropriate   */
1107/* NAT rule table(s).                                                       */
1108/* ------------------------------------------------------------------------ */
1109static int nat_resolverule(n)
1110ipnat_t *n;
1111{
1112	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1113	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1114
1115	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1116	if (n->in_ifnames[1][0] == '\0') {
1117		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1118		n->in_ifps[1] = n->in_ifps[0];
1119	} else {
1120		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1121	}
1122
1123	if (n->in_plabel[0] != '\0') {
1124		n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1125		if (n->in_apr == NULL)
1126			return -1;
1127	}
1128	return 0;
1129}
1130
1131
1132/* ------------------------------------------------------------------------ */
1133/* Function:    nat_siocdelnat                                              */
1134/* Returns:     int - 0 == success, != 0 == failure                         */
1135/* Parameters:  n(I)       - pointer to new NAT rule                        */
1136/*              np(I)      - pointer to where to insert new NAT rule        */
1137/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1138/* Mutex Locks: ipf_natio                                                   */
1139/*                                                                          */
1140/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1141/* from information passed to the kernel, then add it  to the appropriate   */
1142/* NAT rule table(s).                                                       */
1143/* ------------------------------------------------------------------------ */
1144static void nat_siocdelnat(n, np, getlock)
1145ipnat_t *n, **np;
1146int getlock;
1147{
1148	if (getlock) {
1149		WRITE_ENTER(&ipf_nat);
1150	}
1151	if (n->in_redir & NAT_REDIRECT)
1152		nat_delrdr(n);
1153	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1154		nat_delnat(n);
1155	if (nat_list == NULL) {
1156		nat_masks = 0;
1157		rdr_masks = 0;
1158	}
1159
1160	if (n->in_tqehead[0] != NULL) {
1161		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1162			fr_freetimeoutqueue(n->in_tqehead[1]);
1163		}
1164	}
1165
1166	if (n->in_tqehead[1] != NULL) {
1167		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1168			fr_freetimeoutqueue(n->in_tqehead[1]);
1169		}
1170	}
1171
1172	*np = n->in_next;
1173
1174	if (n->in_use == 0) {
1175		if (n->in_apr)
1176			appr_free(n->in_apr);
1177		MUTEX_DESTROY(&n->in_lock);
1178		KFREE(n);
1179		nat_stats.ns_rules--;
1180#if SOLARIS && !defined(_INET_IP_STACK_H)
1181		if (nat_stats.ns_rules == 0)
1182			pfil_delayed_copy = 1;
1183#endif
1184	} else {
1185		n->in_flags |= IPN_DELETE;
1186		n->in_next = NULL;
1187	}
1188	if (getlock) {
1189		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1190	}
1191}
1192
1193
1194/* ------------------------------------------------------------------------ */
1195/* Function:    fr_natgetsz                                                 */
1196/* Returns:     int - 0 == success, != 0 is the error value.                */
1197/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1198/*                        get the size of.                                  */
1199/*                                                                          */
1200/* Handle SIOCSTGSZ.                                                        */
1201/* Return the size of the nat list entry to be copied back to user space.   */
1202/* The size of the entry is stored in the ng_sz field and the enture natget */
1203/* structure is copied back to the user.                                    */
1204/* ------------------------------------------------------------------------ */
1205static int fr_natgetsz(data)
1206caddr_t data;
1207{
1208	ap_session_t *aps;
1209	nat_t *nat, *n;
1210	natget_t ng;
1211
1212	if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1213		return EFAULT;
1214
1215	nat = ng.ng_ptr;
1216	if (!nat) {
1217		nat = nat_instances;
1218		ng.ng_sz = 0;
1219		/*
1220		 * Empty list so the size returned is 0.  Simple.
1221		 */
1222		if (nat == NULL) {
1223			if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1224				return EFAULT;
1225			return 0;
1226		}
1227	} else {
1228		/*
1229		 * Make sure the pointer we're copying from exists in the
1230		 * current list of entries.  Security precaution to prevent
1231		 * copying of random kernel data.
1232		 */
1233		for (n = nat_instances; n; n = n->nat_next)
1234			if (n == nat)
1235				break;
1236		if (!n)
1237			return ESRCH;
1238	}
1239
1240	/*
1241	 * Incluse any space required for proxy data structures.
1242	 */
1243	ng.ng_sz = sizeof(nat_save_t);
1244	aps = nat->nat_aps;
1245	if (aps != NULL) {
1246		ng.ng_sz += sizeof(ap_session_t) - 4;
1247		if (aps->aps_data != 0)
1248			ng.ng_sz += aps->aps_psiz;
1249	}
1250
1251	if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1252		return EFAULT;
1253	return 0;
1254}
1255
1256
1257/* ------------------------------------------------------------------------ */
1258/* Function:    fr_natgetent                                                */
1259/* Returns:     int - 0 == success, != 0 is the error value.                */
1260/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1261/*                        to NAT structure to copy out.                     */
1262/*                                                                          */
1263/* Handle SIOCSTGET.                                                        */
1264/* Copies out NAT entry to user space.  Any additional data held for a      */
1265/* proxy is also copied, as to is the NAT rule which was responsible for it */
1266/* ------------------------------------------------------------------------ */
1267static int fr_natgetent(data)
1268caddr_t data;
1269{
1270	int error, outsize;
1271	ap_session_t *aps;
1272	nat_save_t *ipn, ipns;
1273	nat_t *n, *nat;
1274
1275	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1276	if (error != 0)
1277		return error;
1278
1279	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1280		return EINVAL;
1281
1282	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1283	if (ipn == NULL)
1284		return ENOMEM;
1285
1286	ipn->ipn_dsize = ipns.ipn_dsize;
1287	nat = ipns.ipn_next;
1288	if (nat == NULL) {
1289		nat = nat_instances;
1290		if (nat == NULL) {
1291			if (nat_instances == NULL)
1292				error = ENOENT;
1293			goto finished;
1294		}
1295	} else {
1296		/*
1297		 * Make sure the pointer we're copying from exists in the
1298		 * current list of entries.  Security precaution to prevent
1299		 * copying of random kernel data.
1300		 */
1301		for (n = nat_instances; n; n = n->nat_next)
1302			if (n == nat)
1303				break;
1304		if (n == NULL) {
1305			error = ESRCH;
1306			goto finished;
1307		}
1308	}
1309	ipn->ipn_next = nat->nat_next;
1310
1311	/*
1312	 * Copy the NAT structure.
1313	 */
1314	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1315
1316	/*
1317	 * If we have a pointer to the NAT rule it belongs to, save that too.
1318	 */
1319	if (nat->nat_ptr != NULL)
1320		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1321		      sizeof(ipn->ipn_ipnat));
1322
1323	/*
1324	 * If we also know the NAT entry has an associated filter rule,
1325	 * save that too.
1326	 */
1327	if (nat->nat_fr != NULL)
1328		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1329		      sizeof(ipn->ipn_fr));
1330
1331	/*
1332	 * Last but not least, if there is an application proxy session set
1333	 * up for this NAT entry, then copy that out too, including any
1334	 * private data saved along side it by the proxy.
1335	 */
1336	aps = nat->nat_aps;
1337	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1338	if (aps != NULL) {
1339		char *s;
1340
1341		if (outsize < sizeof(*aps)) {
1342			error = ENOBUFS;
1343			goto finished;
1344		}
1345
1346		s = ipn->ipn_data;
1347		bcopy((char *)aps, s, sizeof(*aps));
1348		s += sizeof(*aps);
1349		outsize -= sizeof(*aps);
1350		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1351			bcopy(aps->aps_data, s, aps->aps_psiz);
1352		else
1353			error = ENOBUFS;
1354	}
1355	if (error == 0) {
1356		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1357	}
1358
1359finished:
1360	if (ipn != NULL) {
1361		KFREES(ipn, ipns.ipn_dsize);
1362	}
1363	return error;
1364}
1365
1366
1367/* ------------------------------------------------------------------------ */
1368/* Function:    fr_natputent                                                */
1369/* Returns:     int - 0 == success, != 0 is the error value.                */
1370/* Parameters:  data(I) -     pointer to natget structure with NAT          */
1371/*                            structure information to load into the kernel */
1372/*              getlock(I) - flag indicating whether or not a write lock    */
1373/*                           on ipf_nat is already held.                    */
1374/*                                                                          */
1375/* Handle SIOCSTPUT.                                                        */
1376/* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1377/* firewall rule data structures, if pointers to them indicate so.          */
1378/* ------------------------------------------------------------------------ */
1379static int fr_natputent(data, getlock)
1380caddr_t data;
1381int getlock;
1382{
1383	nat_save_t ipn, *ipnn;
1384	ap_session_t *aps;
1385	nat_t *n, *nat;
1386	frentry_t *fr;
1387	fr_info_t fin;
1388	ipnat_t *in;
1389	int error;
1390
1391	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1392	if (error != 0)
1393		return error;
1394
1395	/*
1396	 * Initialise early because of code at junkput label.
1397	 */
1398	in = NULL;
1399	aps = NULL;
1400	nat = NULL;
1401	ipnn = NULL;
1402	fr = NULL;
1403
1404	/*
1405	 * New entry, copy in the rest of the NAT entry if it's size is more
1406	 * than just the nat_t structure.
1407	 */
1408	if (ipn.ipn_dsize > sizeof(ipn)) {
1409		if (ipn.ipn_dsize > 81920) {
1410			error = ENOMEM;
1411			goto junkput;
1412		}
1413
1414		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1415		if (ipnn == NULL)
1416			return ENOMEM;
1417
1418		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1419		if (error != 0) {
1420			error = EFAULT;
1421			goto junkput;
1422		}
1423	} else
1424		ipnn = &ipn;
1425
1426	KMALLOC(nat, nat_t *);
1427	if (nat == NULL) {
1428		error = ENOMEM;
1429		goto junkput;
1430	}
1431
1432	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1433	/*
1434	 * Initialize all these so that nat_delete() doesn't cause a crash.
1435	 */
1436	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1437	nat->nat_tqe.tqe_pnext = NULL;
1438	nat->nat_tqe.tqe_next = NULL;
1439	nat->nat_tqe.tqe_ifq = NULL;
1440	nat->nat_tqe.tqe_parent = nat;
1441
1442	/*
1443	 * Restore the rule associated with this nat session
1444	 */
1445	in = ipnn->ipn_nat.nat_ptr;
1446	if (in != NULL) {
1447		KMALLOC(in, ipnat_t *);
1448		nat->nat_ptr = in;
1449		if (in == NULL) {
1450			error = ENOMEM;
1451			goto junkput;
1452		}
1453		bzero((char *)in, offsetof(struct ipnat, in_next6));
1454		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1455		in->in_use = 1;
1456		in->in_flags |= IPN_DELETE;
1457
1458		ATOMIC_INC(nat_stats.ns_rules);
1459
1460		if (nat_resolverule(in) != 0) {
1461			error = ESRCH;
1462			goto junkput;
1463		}
1464	}
1465
1466	/*
1467	 * Check that the NAT entry doesn't already exist in the kernel.
1468	 *
1469	 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1470	 * this, we check to see if the inbound combination of addresses and
1471	 * ports is already known.  Similar logic is applied for NAT_INBOUND.
1472	 *
1473	 */
1474	bzero((char *)&fin, sizeof(fin));
1475	fin.fin_p = nat->nat_p;
1476	if (nat->nat_dir == NAT_OUTBOUND) {
1477		fin.fin_ifp = nat->nat_ifps[0];
1478		fin.fin_data[0] = ntohs(nat->nat_oport);
1479		fin.fin_data[1] = ntohs(nat->nat_outport);
1480		if (getlock) {
1481			READ_ENTER(&ipf_nat);
1482		}
1483		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1484				 nat->nat_oip, nat->nat_inip);
1485		if (getlock) {
1486			RWLOCK_EXIT(&ipf_nat);
1487		}
1488		if (n != NULL) {
1489			error = EEXIST;
1490			goto junkput;
1491		}
1492	} else if (nat->nat_dir == NAT_INBOUND) {
1493		fin.fin_ifp = nat->nat_ifps[0];
1494		fin.fin_data[0] = ntohs(nat->nat_outport);
1495		fin.fin_data[1] = ntohs(nat->nat_oport);
1496		if (getlock) {
1497			READ_ENTER(&ipf_nat);
1498		}
1499		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1500				  nat->nat_outip, nat->nat_oip);
1501		if (getlock) {
1502			RWLOCK_EXIT(&ipf_nat);
1503		}
1504		if (n != NULL) {
1505			error = EEXIST;
1506			goto junkput;
1507		}
1508	} else {
1509		error = EINVAL;
1510		goto junkput;
1511	}
1512
1513	/*
1514	 * Restore ap_session_t structure.  Include the private data allocated
1515	 * if it was there.
1516	 */
1517	aps = nat->nat_aps;
1518	if (aps != NULL) {
1519		KMALLOC(aps, ap_session_t *);
1520		nat->nat_aps = aps;
1521		if (aps == NULL) {
1522			error = ENOMEM;
1523			goto junkput;
1524		}
1525		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1526		if (in != NULL)
1527			aps->aps_apr = in->in_apr;
1528		else
1529			aps->aps_apr = NULL;
1530		if (aps->aps_psiz != 0) {
1531			if (aps->aps_psiz > 81920) {
1532				error = ENOMEM;
1533				goto junkput;
1534			}
1535			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1536			if (aps->aps_data == NULL) {
1537				error = ENOMEM;
1538				goto junkput;
1539			}
1540			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1541			      aps->aps_psiz);
1542		} else {
1543			aps->aps_psiz = 0;
1544			aps->aps_data = NULL;
1545		}
1546	}
1547
1548	/*
1549	 * If there was a filtering rule associated with this entry then
1550	 * build up a new one.
1551	 */
1552	fr = nat->nat_fr;
1553	if (fr != NULL) {
1554		if ((nat->nat_flags & SI_NEWFR) != 0) {
1555			KMALLOC(fr, frentry_t *);
1556			nat->nat_fr = fr;
1557			if (fr == NULL) {
1558				error = ENOMEM;
1559				goto junkput;
1560			}
1561			ipnn->ipn_nat.nat_fr = fr;
1562			fr->fr_ref = 1;
1563			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1564			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1565
1566			fr->fr_ref = 1;
1567			fr->fr_dsize = 0;
1568			fr->fr_data = NULL;
1569			fr->fr_type = FR_T_NONE;
1570
1571			MUTEX_NUKE(&fr->fr_lock);
1572			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1573		} else {
1574			if (getlock) {
1575				READ_ENTER(&ipf_nat);
1576			}
1577			for (n = nat_instances; n; n = n->nat_next)
1578				if (n->nat_fr == fr)
1579					break;
1580
1581			if (n != NULL) {
1582				MUTEX_ENTER(&fr->fr_lock);
1583				fr->fr_ref++;
1584				MUTEX_EXIT(&fr->fr_lock);
1585			}
1586			if (getlock) {
1587				RWLOCK_EXIT(&ipf_nat);
1588			}
1589
1590			if (!n) {
1591				error = ESRCH;
1592				goto junkput;
1593			}
1594		}
1595	}
1596
1597	if (ipnn != &ipn) {
1598		KFREES(ipnn, ipn.ipn_dsize);
1599		ipnn = NULL;
1600	}
1601
1602	if (getlock) {
1603		WRITE_ENTER(&ipf_nat);
1604	}
1605	error = nat_insert(nat, nat->nat_rev);
1606	if ((error == 0) && (aps != NULL)) {
1607		aps->aps_next = ap_sess_list;
1608		ap_sess_list = aps;
1609	}
1610	if (getlock) {
1611		RWLOCK_EXIT(&ipf_nat);
1612	}
1613
1614	if (error == 0)
1615		return 0;
1616
1617	error = ENOMEM;
1618
1619junkput:
1620	if (fr != NULL)
1621		(void) fr_derefrule(&fr);
1622
1623	if ((ipnn != NULL) && (ipnn != &ipn)) {
1624		KFREES(ipnn, ipn.ipn_dsize);
1625	}
1626	if (nat != NULL) {
1627		if (aps != NULL) {
1628			if (aps->aps_data != NULL) {
1629				KFREES(aps->aps_data, aps->aps_psiz);
1630			}
1631			KFREE(aps);
1632		}
1633		if (in != NULL) {
1634			if (in->in_apr)
1635				appr_free(in->in_apr);
1636			KFREE(in);
1637		}
1638		KFREE(nat);
1639	}
1640	return error;
1641}
1642
1643
1644/* ------------------------------------------------------------------------ */
1645/* Function:    nat_delete                                                  */
1646/* Returns:     Nil                                                         */
1647/* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1648/*              logtype(I) - type of LOG record to create before deleting   */
1649/* Write Lock:  ipf_nat                                                     */
1650/*                                                                          */
1651/* Delete a nat entry from the various lists and table.  If NAT logging is  */
1652/* enabled then generate a NAT log record for this event.                   */
1653/* ------------------------------------------------------------------------ */
1654void nat_delete(nat, logtype)
1655struct nat *nat;
1656int logtype;
1657{
1658	struct ipnat *ipn;
1659	int removed = 0;
1660
1661	if (logtype != 0 && nat_logging != 0)
1662		nat_log(nat, logtype);
1663
1664	/*
1665	 * Take it as a general indication that all the pointers are set if
1666	 * nat_pnext is set.
1667	 */
1668	if (nat->nat_pnext != NULL) {
1669		removed = 1;
1670
1671		nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1672		nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1673
1674		*nat->nat_pnext = nat->nat_next;
1675		if (nat->nat_next != NULL) {
1676			nat->nat_next->nat_pnext = nat->nat_pnext;
1677			nat->nat_next = NULL;
1678		}
1679		nat->nat_pnext = NULL;
1680
1681		*nat->nat_phnext[0] = nat->nat_hnext[0];
1682		if (nat->nat_hnext[0] != NULL) {
1683			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1684			nat->nat_hnext[0] = NULL;
1685		}
1686		nat->nat_phnext[0] = NULL;
1687
1688		*nat->nat_phnext[1] = nat->nat_hnext[1];
1689		if (nat->nat_hnext[1] != NULL) {
1690			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1691			nat->nat_hnext[1] = NULL;
1692		}
1693		nat->nat_phnext[1] = NULL;
1694
1695		if ((nat->nat_flags & SI_WILDP) != 0)
1696			nat_stats.ns_wilds--;
1697	}
1698
1699	if (nat->nat_me != NULL) {
1700		*nat->nat_me = NULL;
1701		nat->nat_me = NULL;
1702	}
1703
1704	if (nat->nat_tqe.tqe_ifq != NULL)
1705		fr_deletequeueentry(&nat->nat_tqe);
1706
1707	if (logtype == NL_EXPIRE)
1708		nat_stats.ns_expire++;
1709
1710	MUTEX_ENTER(&nat->nat_lock);
1711	/*
1712	 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1713	 * This happens when a nat'd packet is blocked and we want to throw
1714	 * away the NAT session.
1715	 */
1716	if (logtype == NL_DESTROY) {
1717		if (nat->nat_ref > 2) {
1718			nat->nat_ref -= 2;
1719			MUTEX_EXIT(&nat->nat_lock);
1720			if (removed)
1721				nat_stats.ns_orphans++;
1722			return;
1723		}
1724	} else if (nat->nat_ref > 1) {
1725		nat->nat_ref--;
1726		MUTEX_EXIT(&nat->nat_lock);
1727		if (removed)
1728			nat_stats.ns_orphans++;
1729		return;
1730	}
1731	MUTEX_EXIT(&nat->nat_lock);
1732
1733	/*
1734	 * At this point, nat_ref is 1, doing "--" would make it 0..
1735	 */
1736	nat->nat_ref = 0;
1737	if (!removed)
1738		nat_stats.ns_orphans--;
1739
1740#ifdef	IPFILTER_SYNC
1741	if (nat->nat_sync)
1742		ipfsync_del(nat->nat_sync);
1743#endif
1744
1745	if (nat->nat_fr != NULL)
1746		(void) fr_derefrule(&nat->nat_fr);
1747
1748	if (nat->nat_hm != NULL)
1749		fr_hostmapdel(&nat->nat_hm);
1750
1751	/*
1752	 * If there is an active reference from the nat entry to its parent
1753	 * rule, decrement the rule's reference count and free it too if no
1754	 * longer being used.
1755	 */
1756	ipn = nat->nat_ptr;
1757	if (ipn != NULL) {
1758		fr_ipnatderef(&ipn);
1759	}
1760
1761	MUTEX_DESTROY(&nat->nat_lock);
1762
1763	aps_free(nat->nat_aps);
1764	nat_stats.ns_inuse--;
1765
1766	/*
1767	 * If there's a fragment table entry too for this nat entry, then
1768	 * dereference that as well.  This is after nat_lock is released
1769	 * because of Tru64.
1770	 */
1771	fr_forgetnat((void *)nat);
1772
1773	KFREE(nat);
1774}
1775
1776
1777/* ------------------------------------------------------------------------ */
1778/* Function:    nat_flushtable                                              */
1779/* Returns:     int - number of NAT rules deleted                           */
1780/* Parameters:  Nil                                                         */
1781/*                                                                          */
1782/* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1783/* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1784/* ------------------------------------------------------------------------ */
1785/*
1786 * nat_flushtable - clear the NAT table of all mapping entries.
1787 */
1788static int nat_flushtable()
1789{
1790	nat_t *nat;
1791	int j = 0;
1792
1793	/*
1794	 * ALL NAT mappings deleted, so lets just make the deletions
1795	 * quicker.
1796	 */
1797	if (nat_table[0] != NULL)
1798		bzero((char *)nat_table[0],
1799		      sizeof(nat_table[0]) * ipf_nattable_sz);
1800	if (nat_table[1] != NULL)
1801		bzero((char *)nat_table[1],
1802		      sizeof(nat_table[1]) * ipf_nattable_sz);
1803
1804	while ((nat = nat_instances) != NULL) {
1805		nat_delete(nat, NL_FLUSH);
1806		j++;
1807	}
1808
1809	nat_stats.ns_inuse = 0;
1810	return j;
1811}
1812
1813
1814/* ------------------------------------------------------------------------ */
1815/* Function:    nat_clearlist                                               */
1816/* Returns:     int - number of NAT/RDR rules deleted                       */
1817/* Parameters:  Nil                                                         */
1818/*                                                                          */
1819/* Delete all rules in the current list of rules.  There is nothing elegant */
1820/* about this cleanup: simply free all entries on the list of rules and     */
1821/* clear out the tables used for hashed NAT rule lookups.                   */
1822/* ------------------------------------------------------------------------ */
1823static int nat_clearlist()
1824{
1825	ipnat_t *n, **np = &nat_list;
1826	int i = 0;
1827
1828	if (nat_rules != NULL)
1829		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1830	if (rdr_rules != NULL)
1831		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1832
1833	while ((n = *np) != NULL) {
1834		*np = n->in_next;
1835		if (n->in_use == 0) {
1836			if (n->in_apr != NULL)
1837				appr_free(n->in_apr);
1838			MUTEX_DESTROY(&n->in_lock);
1839			KFREE(n);
1840			nat_stats.ns_rules--;
1841		} else {
1842			n->in_flags |= IPN_DELETE;
1843			n->in_next = NULL;
1844		}
1845		i++;
1846	}
1847#if SOLARIS && !defined(_INET_IP_STACK_H)
1848	pfil_delayed_copy = 1;
1849#endif
1850	nat_masks = 0;
1851	rdr_masks = 0;
1852	return i;
1853}
1854
1855
1856/* ------------------------------------------------------------------------ */
1857/* Function:    nat_newmap                                                  */
1858/* Returns:     int - -1 == error, 0 == success                             */
1859/* Parameters:  fin(I) - pointer to packet information                      */
1860/*              nat(I) - pointer to NAT entry                               */
1861/*              ni(I)  - pointer to structure with misc. information needed */
1862/*                       to create new NAT entry.                           */
1863/*                                                                          */
1864/* Given an empty NAT structure, populate it with new information about a   */
1865/* new NAT session, as defined by the matching NAT rule.                    */
1866/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1867/* to the new IP address for the translation.                               */
1868/* ------------------------------------------------------------------------ */
1869static INLINE int nat_newmap(fin, nat, ni)
1870fr_info_t *fin;
1871nat_t *nat;
1872natinfo_t *ni;
1873{
1874	u_short st_port, dport, sport, port, sp, dp;
1875	struct in_addr in, inb;
1876	hostmap_t *hm;
1877	u_32_t flags;
1878	u_32_t st_ip;
1879	ipnat_t *np;
1880	nat_t *natl;
1881	int l;
1882
1883	/*
1884	 * If it's an outbound packet which doesn't match any existing
1885	 * record, then create a new port
1886	 */
1887	l = 0;
1888	hm = NULL;
1889	np = ni->nai_np;
1890	st_ip = np->in_nip;
1891	st_port = np->in_pnext;
1892	flags = ni->nai_flags;
1893	sport = ni->nai_sport;
1894	dport = ni->nai_dport;
1895
1896	/*
1897	 * Do a loop until we either run out of entries to try or we find
1898	 * a NAT mapping that isn't currently being used.  This is done
1899	 * because the change to the source is not (usually) being fixed.
1900	 */
1901	do {
1902		port = 0;
1903		in.s_addr = htonl(np->in_nip);
1904		if (l == 0) {
1905			/*
1906			 * Check to see if there is an existing NAT
1907			 * setup for this IP address pair.
1908			 */
1909			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1910					 in, 0);
1911			if (hm != NULL)
1912				in.s_addr = hm->hm_mapip.s_addr;
1913		} else if ((l == 1) && (hm != NULL)) {
1914			fr_hostmapdel(&hm);
1915		}
1916		in.s_addr = ntohl(in.s_addr);
1917
1918		nat->nat_hm = hm;
1919
1920		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1921			if (l > 0)
1922				return -1;
1923		}
1924
1925		if (np->in_redir == NAT_BIMAP &&
1926		    np->in_inmsk == np->in_outmsk) {
1927			/*
1928			 * map the address block in a 1:1 fashion
1929			 */
1930			in.s_addr = np->in_outip;
1931			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1932			in.s_addr = ntohl(in.s_addr);
1933
1934		} else if (np->in_redir & NAT_MAPBLK) {
1935			if ((l >= np->in_ppip) || ((l > 0) &&
1936			     !(flags & IPN_TCPUDP)))
1937				return -1;
1938			/*
1939			 * map-block - Calculate destination address.
1940			 */
1941			in.s_addr = ntohl(fin->fin_saddr);
1942			in.s_addr &= ntohl(~np->in_inmsk);
1943			inb.s_addr = in.s_addr;
1944			in.s_addr /= np->in_ippip;
1945			in.s_addr &= ntohl(~np->in_outmsk);
1946			in.s_addr += ntohl(np->in_outip);
1947			/*
1948			 * Calculate destination port.
1949			 */
1950			if ((flags & IPN_TCPUDP) &&
1951			    (np->in_ppip != 0)) {
1952				port = ntohs(sport) + l;
1953				port %= np->in_ppip;
1954				port += np->in_ppip *
1955					(inb.s_addr % np->in_ippip);
1956				port += MAPBLK_MINPORT;
1957				port = htons(port);
1958			}
1959
1960		} else if ((np->in_outip == 0) &&
1961			   (np->in_outmsk == 0xffffffff)) {
1962			/*
1963			 * 0/32 - use the interface's IP address.
1964			 */
1965			if ((l > 0) ||
1966			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1967				       &in, NULL) == -1)
1968				return -1;
1969			in.s_addr = ntohl(in.s_addr);
1970
1971		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1972			/*
1973			 * 0/0 - use the original source address/port.
1974			 */
1975			if (l > 0)
1976				return -1;
1977			in.s_addr = ntohl(fin->fin_saddr);
1978
1979		} else if ((np->in_outmsk != 0xffffffff) &&
1980			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1981			np->in_nip++;
1982
1983		natl = NULL;
1984
1985		if ((flags & IPN_TCPUDP) &&
1986		    ((np->in_redir & NAT_MAPBLK) == 0) &&
1987		    (np->in_flags & IPN_AUTOPORTMAP)) {
1988			/*
1989			 * "ports auto" (without map-block)
1990			 */
1991			if ((l > 0) && (l % np->in_ppip == 0)) {
1992				if (l > np->in_space) {
1993					return -1;
1994				} else if ((l > np->in_ppip) &&
1995					   np->in_outmsk != 0xffffffff)
1996					np->in_nip++;
1997			}
1998			if (np->in_ppip != 0) {
1999				port = ntohs(sport);
2000				port += (l % np->in_ppip);
2001				port %= np->in_ppip;
2002				port += np->in_ppip *
2003					(ntohl(fin->fin_saddr) %
2004					 np->in_ippip);
2005				port += MAPBLK_MINPORT;
2006				port = htons(port);
2007			}
2008
2009		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2010			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2011			/*
2012			 * Standard port translation.  Select next port.
2013			 */
2014			port = htons(np->in_pnext++);
2015
2016			if (np->in_pnext > ntohs(np->in_pmax)) {
2017				np->in_pnext = ntohs(np->in_pmin);
2018				if (np->in_outmsk != 0xffffffff)
2019					np->in_nip++;
2020			}
2021		}
2022
2023		if (np->in_flags & IPN_IPRANGE) {
2024			if (np->in_nip > ntohl(np->in_outmsk))
2025				np->in_nip = ntohl(np->in_outip);
2026		} else {
2027			if ((np->in_outmsk != 0xffffffff) &&
2028			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2029			    ntohl(np->in_outip))
2030				np->in_nip = ntohl(np->in_outip) + 1;
2031		}
2032
2033		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2034			port = sport;
2035
2036		/*
2037		 * Here we do a lookup of the connection as seen from
2038		 * the outside.  If an IP# pair already exists, try
2039		 * again.  So if you have A->B becomes C->B, you can
2040		 * also have D->E become C->E but not D->B causing
2041		 * another C->B.  Also take protocol and ports into
2042		 * account when determining whether a pre-existing
2043		 * NAT setup will cause an external conflict where
2044		 * this is appropriate.
2045		 */
2046		inb.s_addr = htonl(in.s_addr);
2047		sp = fin->fin_data[0];
2048		dp = fin->fin_data[1];
2049		fin->fin_data[0] = fin->fin_data[1];
2050		fin->fin_data[1] = htons(port);
2051		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2052				    (u_int)fin->fin_p, fin->fin_dst, inb);
2053		fin->fin_data[0] = sp;
2054		fin->fin_data[1] = dp;
2055
2056		/*
2057		 * Has the search wrapped around and come back to the
2058		 * start ?
2059		 */
2060		if ((natl != NULL) &&
2061		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2062		    (np->in_nip != 0) && (st_ip == np->in_nip))
2063			return -1;
2064		l++;
2065	} while (natl != NULL);
2066
2067	if (np->in_space > 0)
2068		np->in_space--;
2069
2070	/* Setup the NAT table */
2071	nat->nat_inip = fin->fin_src;
2072	nat->nat_outip.s_addr = htonl(in.s_addr);
2073	nat->nat_oip = fin->fin_dst;
2074	if (nat->nat_hm == NULL)
2075		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2076					  nat->nat_outip, 0);
2077
2078	/*
2079	 * The ICMP checksum does not have a pseudo header containing
2080	 * the IP addresses
2081	 */
2082	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2083	ni->nai_sum2 = LONG_SUM(in.s_addr);
2084	if ((flags & IPN_TCPUDP)) {
2085		ni->nai_sum1 += ntohs(sport);
2086		ni->nai_sum2 += ntohs(port);
2087	}
2088
2089	if (flags & IPN_TCPUDP) {
2090		nat->nat_inport = sport;
2091		nat->nat_outport = port;	/* sport */
2092		nat->nat_oport = dport;
2093		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2094	} else if (flags & IPN_ICMPQUERY) {
2095		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2096		nat->nat_inport = port;
2097		nat->nat_outport = port;
2098	} else if (fin->fin_p == IPPROTO_GRE) {
2099#if 0
2100		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2101		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2102			nat->nat_oport = 0;/*fin->fin_data[1];*/
2103			nat->nat_inport = 0;/*fin->fin_data[0];*/
2104			nat->nat_outport = 0;/*fin->fin_data[0];*/
2105			nat->nat_call[0] = fin->fin_data[0];
2106			nat->nat_call[1] = fin->fin_data[0];
2107		}
2108#endif
2109	}
2110	ni->nai_ip.s_addr = in.s_addr;
2111	ni->nai_port = port;
2112	ni->nai_nport = dport;
2113	return 0;
2114}
2115
2116
2117/* ------------------------------------------------------------------------ */
2118/* Function:    nat_newrdr                                                  */
2119/* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2120/*                    allow rule to be moved if IPN_ROUNDR is set.          */
2121/* Parameters:  fin(I) - pointer to packet information                      */
2122/*              nat(I) - pointer to NAT entry                               */
2123/*              ni(I)  - pointer to structure with misc. information needed */
2124/*                       to create new NAT entry.                           */
2125/*                                                                          */
2126/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2127/* to the new IP address for the translation.                               */
2128/* ------------------------------------------------------------------------ */
2129static INLINE int nat_newrdr(fin, nat, ni)
2130fr_info_t *fin;
2131nat_t *nat;
2132natinfo_t *ni;
2133{
2134	u_short nport, dport, sport;
2135	struct in_addr in, inb;
2136	u_short sp, dp;
2137	hostmap_t *hm;
2138	u_32_t flags;
2139	ipnat_t *np;
2140	nat_t *natl;
2141	int move;
2142
2143	move = 1;
2144	hm = NULL;
2145	in.s_addr = 0;
2146	np = ni->nai_np;
2147	flags = ni->nai_flags;
2148	sport = ni->nai_sport;
2149	dport = ni->nai_dport;
2150
2151	/*
2152	 * If the matching rule has IPN_STICKY set, then we want to have the
2153	 * same rule kick in as before.  Why would this happen?  If you have
2154	 * a collection of rdr rules with "round-robin sticky", the current
2155	 * packet might match a different one to the previous connection but
2156	 * we want the same destination to be used.
2157	 */
2158	if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2159	    ((np->in_flags & IPN_STICKY) != 0)) {
2160		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2161				 (u_32_t)dport);
2162		if (hm != NULL) {
2163			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2164			np = hm->hm_ipnat;
2165			ni->nai_np = np;
2166			move = 0;
2167		}
2168	}
2169
2170	/*
2171	 * Otherwise, it's an inbound packet. Most likely, we don't
2172	 * want to rewrite source ports and source addresses. Instead,
2173	 * we want to rewrite to a fixed internal address and fixed
2174	 * internal port.
2175	 */
2176	if (np->in_flags & IPN_SPLIT) {
2177		in.s_addr = np->in_nip;
2178
2179		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2180			hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2181					 in, (u_32_t)dport);
2182			if (hm != NULL) {
2183				in.s_addr = hm->hm_mapip.s_addr;
2184				move = 0;
2185			}
2186		}
2187
2188		if (hm == NULL || hm->hm_ref == 1) {
2189			if (np->in_inip == htonl(in.s_addr)) {
2190				np->in_nip = ntohl(np->in_inmsk);
2191				move = 0;
2192			} else {
2193				np->in_nip = ntohl(np->in_inip);
2194			}
2195		}
2196
2197	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2198		/*
2199		 * 0/32 - use the interface's IP address.
2200		 */
2201		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2202			return -1;
2203		in.s_addr = ntohl(in.s_addr);
2204
2205	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2206		/*
2207		 * 0/0 - use the original destination address/port.
2208		 */
2209		in.s_addr = ntohl(fin->fin_daddr);
2210
2211	} else if (np->in_redir == NAT_BIMAP &&
2212		   np->in_inmsk == np->in_outmsk) {
2213		/*
2214		 * map the address block in a 1:1 fashion
2215		 */
2216		in.s_addr = np->in_inip;
2217		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2218		in.s_addr = ntohl(in.s_addr);
2219	} else {
2220		in.s_addr = ntohl(np->in_inip);
2221	}
2222
2223	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2224		nport = dport;
2225	else {
2226		/*
2227		 * Whilst not optimized for the case where
2228		 * pmin == pmax, the gain is not significant.
2229		 */
2230		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2231		    (np->in_pmin != np->in_pmax)) {
2232			nport = ntohs(dport) - ntohs(np->in_pmin) +
2233				ntohs(np->in_pnext);
2234			nport = htons(nport);
2235		} else
2236			nport = np->in_pnext;
2237	}
2238
2239	/*
2240	 * When the redirect-to address is set to 0.0.0.0, just
2241	 * assume a blank `forwarding' of the packet.  We don't
2242	 * setup any translation for this either.
2243	 */
2244	if (in.s_addr == 0) {
2245		if (nport == dport)
2246			return -1;
2247		in.s_addr = ntohl(fin->fin_daddr);
2248	}
2249
2250	/*
2251	 * Check to see if this redirect mapping already exists and if
2252	 * it does, return "failure" (allowing it to be created will just
2253	 * cause one or both of these "connections" to stop working.)
2254	 */
2255	inb.s_addr = htonl(in.s_addr);
2256	sp = fin->fin_data[0];
2257	dp = fin->fin_data[1];
2258	fin->fin_data[1] = fin->fin_data[0];
2259	fin->fin_data[0] = ntohs(nport);
2260	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2261			     (u_int)fin->fin_p, inb, fin->fin_src);
2262	fin->fin_data[0] = sp;
2263	fin->fin_data[1] = dp;
2264	if (natl != NULL)
2265		return -1;
2266
2267	nat->nat_inip.s_addr = htonl(in.s_addr);
2268	nat->nat_outip = fin->fin_dst;
2269	nat->nat_oip = fin->fin_src;
2270	if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2271		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2272					  (u_32_t)dport);
2273
2274	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2275	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2276
2277	ni->nai_ip.s_addr = in.s_addr;
2278	ni->nai_nport = nport;
2279	ni->nai_port = sport;
2280
2281	if (flags & IPN_TCPUDP) {
2282		nat->nat_inport = nport;
2283		nat->nat_outport = dport;
2284		nat->nat_oport = sport;
2285		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2286	} else if (flags & IPN_ICMPQUERY) {
2287		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2288		nat->nat_inport = nport;
2289		nat->nat_outport = nport;
2290	} else if (fin->fin_p == IPPROTO_GRE) {
2291#if 0
2292		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2293		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2294			nat->nat_call[0] = fin->fin_data[0];
2295			nat->nat_call[1] = fin->fin_data[1];
2296			nat->nat_oport = 0; /*fin->fin_data[0];*/
2297			nat->nat_inport = 0; /*fin->fin_data[1];*/
2298			nat->nat_outport = 0; /*fin->fin_data[1];*/
2299		}
2300#endif
2301	}
2302
2303	return move;
2304}
2305
2306/* ------------------------------------------------------------------------ */
2307/* Function:    nat_new                                                     */
2308/* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2309/*                       else pointer to new NAT structure                  */
2310/* Parameters:  fin(I)       - pointer to packet information                */
2311/*              np(I)        - pointer to NAT rule                          */
2312/*              natsave(I)   - pointer to where to store NAT struct pointer */
2313/*              flags(I)     - flags describing the current packet          */
2314/*              direction(I) - direction of packet (in/out)                 */
2315/* Write Lock:  ipf_nat                                                     */
2316/*                                                                          */
2317/* Attempts to create a new NAT entry.  Does not actually change the packet */
2318/* in any way.                                                              */
2319/*                                                                          */
2320/* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2321/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2322/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2323/* and (3) building that structure and putting it into the NAT table(s).    */
2324/*                                                                          */
2325/* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2326/*       as it can result in memory being corrupted.                        */
2327/* ------------------------------------------------------------------------ */
2328nat_t *nat_new(fin, np, natsave, flags, direction)
2329fr_info_t *fin;
2330ipnat_t *np;
2331nat_t **natsave;
2332u_int flags;
2333int direction;
2334{
2335	u_short port = 0, sport = 0, dport = 0, nport = 0;
2336	tcphdr_t *tcp = NULL;
2337	hostmap_t *hm = NULL;
2338	struct in_addr in;
2339	nat_t *nat, *natl;
2340	u_int nflags;
2341	natinfo_t ni;
2342	u_32_t sumd;
2343	int move;
2344#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2345	qpktinfo_t *qpi = fin->fin_qpi;
2346#endif
2347
2348	if (nat_stats.ns_inuse >= ipf_nattable_max) {
2349		nat_stats.ns_memfail++;
2350		fr_nat_doflush = 1;
2351		return NULL;
2352	}
2353
2354	move = 1;
2355	nflags = np->in_flags & flags;
2356	nflags &= NAT_FROMRULE;
2357
2358	ni.nai_np = np;
2359	ni.nai_nflags = nflags;
2360	ni.nai_flags = flags;
2361	ni.nai_dport = 0;
2362	ni.nai_sport = 0;
2363
2364	/* Give me a new nat */
2365	KMALLOC(nat, nat_t *);
2366	if (nat == NULL) {
2367		nat_stats.ns_memfail++;
2368		/*
2369		 * Try to automatically tune the max # of entries in the
2370		 * table allowed to be less than what will cause kmem_alloc()
2371		 * to fail and try to eliminate panics due to out of memory
2372		 * conditions arising.
2373		 */
2374		if (ipf_nattable_max > ipf_nattable_sz) {
2375			ipf_nattable_max = nat_stats.ns_inuse - 100;
2376			printf("ipf_nattable_max reduced to %d\n",
2377				ipf_nattable_max);
2378		}
2379		return NULL;
2380	}
2381
2382	if (flags & IPN_TCPUDP) {
2383		tcp = fin->fin_dp;
2384		ni.nai_sport = htons(fin->fin_sport);
2385		ni.nai_dport = htons(fin->fin_dport);
2386	} else if (flags & IPN_ICMPQUERY) {
2387		/*
2388		 * In the ICMP query NAT code, we translate the ICMP id fields
2389		 * to make them unique. This is indepedent of the ICMP type
2390		 * (e.g. in the unlikely event that a host sends an echo and
2391		 * an tstamp request with the same id, both packets will have
2392		 * their ip address/id field changed in the same way).
2393		 */
2394		/* The icmp_id field is used by the sender to identify the
2395		 * process making the icmp request. (the receiver justs
2396		 * copies it back in its response). So, it closely matches
2397		 * the concept of source port. We overlay sport, so we can
2398		 * maximally reuse the existing code.
2399		 */
2400		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2401		ni.nai_dport = ni.nai_sport;
2402	}
2403
2404	bzero((char *)nat, sizeof(*nat));
2405	nat->nat_flags = flags;
2406	nat->nat_redir = np->in_redir;
2407
2408	if ((flags & NAT_SLAVE) == 0) {
2409		MUTEX_ENTER(&ipf_nat_new);
2410	}
2411
2412	/*
2413	 * Search the current table for a match.
2414	 */
2415	if (direction == NAT_OUTBOUND) {
2416		/*
2417		 * We can now arrange to call this for the same connection
2418		 * because ipf_nat_new doesn't protect the code path into
2419		 * this function.
2420		 */
2421		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2422				     fin->fin_src, fin->fin_dst);
2423		if (natl != NULL) {
2424			KFREE(nat);
2425			nat = natl;
2426			goto done;
2427		}
2428
2429		move = nat_newmap(fin, nat, &ni);
2430		if (move == -1)
2431			goto badnat;
2432
2433		np = ni.nai_np;
2434		in = ni.nai_ip;
2435	} else {
2436		/*
2437		 * NAT_INBOUND is used only for redirects rules
2438		 */
2439		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2440				    fin->fin_src, fin->fin_dst);
2441		if (natl != NULL) {
2442			KFREE(nat);
2443			nat = natl;
2444			goto done;
2445		}
2446
2447		move = nat_newrdr(fin, nat, &ni);
2448		if (move == -1)
2449			goto badnat;
2450
2451		np = ni.nai_np;
2452		in = ni.nai_ip;
2453	}
2454	port = ni.nai_port;
2455	nport = ni.nai_nport;
2456
2457	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2458		if (np->in_redir == NAT_REDIRECT) {
2459			nat_delrdr(np);
2460			nat_addrdr(np);
2461		} else if (np->in_redir == NAT_MAP) {
2462			nat_delnat(np);
2463			nat_addnat(np);
2464		}
2465	}
2466
2467	if (flags & IPN_TCPUDP) {
2468		sport = ni.nai_sport;
2469		dport = ni.nai_dport;
2470	} else if (flags & IPN_ICMPQUERY) {
2471		sport = ni.nai_sport;
2472		dport = 0;
2473	}
2474
2475	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2476	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2477#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2478	if ((flags & IPN_TCP) && dohwcksum &&
2479	    (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2480		if (direction == NAT_OUTBOUND)
2481			ni.nai_sum1 = LONG_SUM(in.s_addr);
2482		else
2483			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2484		ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2485		ni.nai_sum1 += 30;
2486		ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2487		nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2488	} else
2489#endif
2490		nat->nat_sumd[1] = nat->nat_sumd[0];
2491
2492	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2493		if (direction == NAT_OUTBOUND)
2494			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2495		else
2496			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2497
2498		ni.nai_sum2 = LONG_SUM(in.s_addr);
2499
2500		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2501		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2502	} else {
2503		nat->nat_ipsumd = nat->nat_sumd[0];
2504		if (!(flags & IPN_TCPUDPICMP)) {
2505			nat->nat_sumd[0] = 0;
2506			nat->nat_sumd[1] = 0;
2507		}
2508	}
2509
2510	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2511		fr_nat_doflush = 1;
2512		goto badnat;
2513	}
2514	if (flags & SI_WILDP)
2515		nat_stats.ns_wilds++;
2516	fin->fin_flx |= FI_NEWNAT;
2517	goto done;
2518badnat:
2519	nat_stats.ns_badnat++;
2520	if ((hm = nat->nat_hm) != NULL)
2521		fr_hostmapdel(&hm);
2522	KFREE(nat);
2523	nat = NULL;
2524done:
2525	if ((flags & NAT_SLAVE) == 0) {
2526		MUTEX_EXIT(&ipf_nat_new);
2527	}
2528	return nat;
2529}
2530
2531
2532/* ------------------------------------------------------------------------ */
2533/* Function:    nat_finalise                                                */
2534/* Returns:     int - 0 == sucess, -1 == failure                            */
2535/* Parameters:  fin(I) - pointer to packet information                      */
2536/*              nat(I) - pointer to NAT entry                               */
2537/*              ni(I)  - pointer to structure with misc. information needed */
2538/*                       to create new NAT entry.                           */
2539/* Write Lock:  ipf_nat                                                     */
2540/*                                                                          */
2541/* This is the tail end of constructing a new NAT entry and is the same     */
2542/* for both IPv4 and IPv6.                                                  */
2543/* ------------------------------------------------------------------------ */
2544/*ARGSUSED*/
2545static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2546fr_info_t *fin;
2547nat_t *nat;
2548natinfo_t *ni;
2549tcphdr_t *tcp;
2550nat_t **natsave;
2551int direction;
2552{
2553	frentry_t *fr;
2554	ipnat_t *np;
2555
2556	np = ni->nai_np;
2557
2558	if (np->in_ifps[0] != NULL) {
2559		COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2560	}
2561	if (np->in_ifps[1] != NULL) {
2562		COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2563	}
2564#ifdef	IPFILTER_SYNC
2565	if ((nat->nat_flags & SI_CLONE) == 0)
2566		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2567#endif
2568
2569	nat->nat_me = natsave;
2570	nat->nat_dir = direction;
2571	nat->nat_ifps[0] = np->in_ifps[0];
2572	nat->nat_ifps[1] = np->in_ifps[1];
2573	nat->nat_ptr = np;
2574	nat->nat_p = fin->fin_p;
2575	nat->nat_mssclamp = np->in_mssclamp;
2576	if (nat->nat_p == IPPROTO_TCP)
2577		nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2578
2579	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2580		if (appr_new(fin, nat) == -1)
2581			return -1;
2582
2583	if (nat_insert(nat, fin->fin_rev) == 0) {
2584		if (nat_logging)
2585			nat_log(nat, (u_int)np->in_redir);
2586		np->in_use++;
2587		fr = fin->fin_fr;
2588		nat->nat_fr = fr;
2589		if (fr != NULL) {
2590			MUTEX_ENTER(&fr->fr_lock);
2591			fr->fr_ref++;
2592			MUTEX_EXIT(&fr->fr_lock);
2593		}
2594		return 0;
2595	}
2596
2597	/*
2598	 * nat_insert failed, so cleanup time...
2599	 */
2600	return -1;
2601}
2602
2603
2604/* ------------------------------------------------------------------------ */
2605/* Function:   nat_insert                                                   */
2606/* Returns:    int - 0 == sucess, -1 == failure                             */
2607/* Parameters: nat(I) - pointer to NAT structure                            */
2608/*             rev(I) - flag indicating forward/reverse direction of packet */
2609/* Write Lock: ipf_nat                                                      */
2610/*                                                                          */
2611/* Insert a NAT entry into the hash tables for searching and add it to the  */
2612/* list of active NAT entries.  Adjust global counters when complete.       */
2613/* ------------------------------------------------------------------------ */
2614int	nat_insert(nat, rev)
2615nat_t	*nat;
2616int	rev;
2617{
2618	u_int hv1, hv2;
2619	nat_t **natp;
2620
2621	/*
2622	 * Try and return an error as early as possible, so calculate the hash
2623	 * entry numbers first and then proceed.
2624	 */
2625	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2626		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2627				  0xffffffff);
2628		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2629				  ipf_nattable_sz);
2630		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2631				  0xffffffff);
2632		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2633				  ipf_nattable_sz);
2634	} else {
2635		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2636		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2637		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2638		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2639	}
2640
2641	if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2642	    nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2643		return -1;
2644	}
2645
2646	nat->nat_hv[0] = hv1;
2647	nat->nat_hv[1] = hv2;
2648
2649	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2650
2651	nat->nat_rev = rev;
2652	nat->nat_ref = 1;
2653	nat->nat_bytes[0] = 0;
2654	nat->nat_pkts[0] = 0;
2655	nat->nat_bytes[1] = 0;
2656	nat->nat_pkts[1] = 0;
2657
2658	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2659	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2660
2661	if (nat->nat_ifnames[1][0] != '\0') {
2662		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2663		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2664	} else {
2665		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2666			       LIFNAMSIZ);
2667		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2668		nat->nat_ifps[1] = nat->nat_ifps[0];
2669	}
2670
2671	nat->nat_next = nat_instances;
2672	nat->nat_pnext = &nat_instances;
2673	if (nat_instances)
2674		nat_instances->nat_pnext = &nat->nat_next;
2675	nat_instances = nat;
2676
2677	natp = &nat_table[0][hv1];
2678	if (*natp)
2679		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2680	nat->nat_phnext[0] = natp;
2681	nat->nat_hnext[0] = *natp;
2682	*natp = nat;
2683	nat_stats.ns_bucketlen[0][hv1]++;
2684
2685	natp = &nat_table[1][hv2];
2686	if (*natp)
2687		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2688	nat->nat_phnext[1] = natp;
2689	nat->nat_hnext[1] = *natp;
2690	*natp = nat;
2691	nat_stats.ns_bucketlen[1][hv2]++;
2692
2693	fr_setnatqueue(nat, rev);
2694
2695	nat_stats.ns_added++;
2696	nat_stats.ns_inuse++;
2697	return 0;
2698}
2699
2700
2701/* ------------------------------------------------------------------------ */
2702/* Function:    nat_icmperrorlookup                                         */
2703/* Returns:     nat_t* - point to matching NAT structure                    */
2704/* Parameters:  fin(I) - pointer to packet information                      */
2705/*              dir(I) - direction of packet (in/out)                       */
2706/*                                                                          */
2707/* Check if the ICMP error message is related to an existing TCP, UDP or    */
2708/* ICMP query nat entry.  It is assumed that the packet is already of the   */
2709/* the required length.                                                     */
2710/* ------------------------------------------------------------------------ */
2711nat_t *nat_icmperrorlookup(fin, dir)
2712fr_info_t *fin;
2713int dir;
2714{
2715	int flags = 0, type, minlen;
2716	icmphdr_t *icmp, *orgicmp;
2717	tcphdr_t *tcp = NULL;
2718	u_short data[2];
2719	nat_t *nat;
2720	ip_t *oip;
2721	u_int p;
2722
2723	icmp = fin->fin_dp;
2724	type = icmp->icmp_type;
2725	/*
2726	 * Does it at least have the return (basic) IP header ?
2727	 * Only a basic IP header (no options) should be with an ICMP error
2728	 * header.  Also, if it's not an error type, then return.
2729	 */
2730	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2731		return NULL;
2732
2733	/*
2734	 * Check packet size
2735	 */
2736	oip = (ip_t *)((char *)fin->fin_dp + 8);
2737	minlen = IP_HL(oip) << 2;
2738	if ((minlen < sizeof(ip_t)) ||
2739	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2740		return NULL;
2741	/*
2742	 * Is the buffer big enough for all of it ?  It's the size of the IP
2743	 * header claimed in the encapsulated part which is of concern.  It
2744	 * may be too big to be in this buffer but not so big that it's
2745	 * outside the ICMP packet, leading to TCP deref's causing problems.
2746	 * This is possible because we don't know how big oip_hl is when we
2747	 * do the pullup early in fr_check() and thus can't gaurantee it is
2748	 * all here now.
2749	 */
2750#ifdef  _KERNEL
2751	{
2752	mb_t *m;
2753
2754	m = fin->fin_m;
2755# if defined(MENTAT)
2756	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2757		return NULL;
2758# else
2759	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2760	    (char *)fin->fin_ip + M_LEN(m))
2761		return NULL;
2762# endif
2763	}
2764#endif
2765
2766	if (fin->fin_daddr != oip->ip_src.s_addr)
2767		return NULL;
2768
2769	p = oip->ip_p;
2770	if (p == IPPROTO_TCP)
2771		flags = IPN_TCP;
2772	else if (p == IPPROTO_UDP)
2773		flags = IPN_UDP;
2774	else if (p == IPPROTO_ICMP) {
2775		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2776
2777		/* see if this is related to an ICMP query */
2778		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2779			data[0] = fin->fin_data[0];
2780			data[1] = fin->fin_data[1];
2781			fin->fin_data[0] = 0;
2782			fin->fin_data[1] = orgicmp->icmp_id;
2783
2784			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2785			/*
2786			 * NOTE : dir refers to the direction of the original
2787			 *        ip packet. By definition the icmp error
2788			 *        message flows in the opposite direction.
2789			 */
2790			if (dir == NAT_INBOUND)
2791				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2792						   oip->ip_src);
2793			else
2794				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2795						    oip->ip_src);
2796			fin->fin_data[0] = data[0];
2797			fin->fin_data[1] = data[1];
2798			return nat;
2799		}
2800	}
2801
2802	if (flags & IPN_TCPUDP) {
2803		minlen += 8;		/* + 64bits of data to get ports */
2804		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2805			return NULL;
2806
2807		data[0] = fin->fin_data[0];
2808		data[1] = fin->fin_data[1];
2809		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2810		fin->fin_data[0] = ntohs(tcp->th_dport);
2811		fin->fin_data[1] = ntohs(tcp->th_sport);
2812
2813		if (dir == NAT_INBOUND) {
2814			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2815					   oip->ip_src);
2816		} else {
2817			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2818					    oip->ip_src);
2819		}
2820		fin->fin_data[0] = data[0];
2821		fin->fin_data[1] = data[1];
2822		return nat;
2823	}
2824	if (dir == NAT_INBOUND)
2825		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2826	else
2827		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2828}
2829
2830
2831/* ------------------------------------------------------------------------ */
2832/* Function:    nat_icmperror                                               */
2833/* Returns:     nat_t* - point to matching NAT structure                    */
2834/* Parameters:  fin(I)    - pointer to packet information                   */
2835/*              nflags(I) - NAT flags for this packet                       */
2836/*              dir(I)    - direction of packet (in/out)                    */
2837/*                                                                          */
2838/* Fix up an ICMP packet which is an error message for an existing NAT      */
2839/* session.  This will correct both packet header data and checksums.       */
2840/*                                                                          */
2841/* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2842/* a NAT'd ICMP packet gets correctly recognised.                           */
2843/* ------------------------------------------------------------------------ */
2844nat_t *nat_icmperror(fin, nflags, dir)
2845fr_info_t *fin;
2846u_int *nflags;
2847int dir;
2848{
2849	u_32_t sum1, sum2, sumd, sumd2;
2850	struct in_addr a1, a2;
2851	int flags, dlen, odst;
2852	icmphdr_t *icmp;
2853	u_short *csump;
2854	tcphdr_t *tcp;
2855	nat_t *nat;
2856	ip_t *oip;
2857	void *dp;
2858
2859	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2860		return NULL;
2861	/*
2862	 * nat_icmperrorlookup() will return NULL for `defective' packets.
2863	 */
2864	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2865		return NULL;
2866
2867	tcp = NULL;
2868	csump = NULL;
2869	flags = 0;
2870	sumd2 = 0;
2871	*nflags = IPN_ICMPERR;
2872	icmp = fin->fin_dp;
2873	oip = (ip_t *)&icmp->icmp_ip;
2874	dp = (((char *)oip) + (IP_HL(oip) << 2));
2875	if (oip->ip_p == IPPROTO_TCP) {
2876		tcp = (tcphdr_t *)dp;
2877		csump = (u_short *)&tcp->th_sum;
2878		flags = IPN_TCP;
2879	} else if (oip->ip_p == IPPROTO_UDP) {
2880		udphdr_t *udp;
2881
2882		udp = (udphdr_t *)dp;
2883		tcp = (tcphdr_t *)dp;
2884		csump = (u_short *)&udp->uh_sum;
2885		flags = IPN_UDP;
2886	} else if (oip->ip_p == IPPROTO_ICMP)
2887		flags = IPN_ICMPQUERY;
2888	dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2889
2890	/*
2891	 * Need to adjust ICMP header to include the real IP#'s and
2892	 * port #'s.  Only apply a checksum change relative to the
2893	 * IP address change as it will be modified again in fr_checknatout
2894	 * for both address and port.  Two checksum changes are
2895	 * necessary for the two header address changes.  Be careful
2896	 * to only modify the checksum once for the port # and twice
2897	 * for the IP#.
2898	 */
2899
2900	/*
2901	 * Step 1
2902	 * Fix the IP addresses in the offending IP packet. You also need
2903	 * to adjust the IP header checksum of that offending IP packet.
2904	 *
2905	 * Normally, you would expect that the ICMP checksum of the
2906	 * ICMP error message needs to be adjusted as well for the
2907	 * IP address change in oip.
2908	 * However, this is a NOP, because the ICMP checksum is
2909	 * calculated over the complete ICMP packet, which includes the
2910	 * changed oip IP addresses and oip->ip_sum. However, these
2911	 * two changes cancel each other out (if the delta for
2912	 * the IP address is x, then the delta for ip_sum is minus x),
2913	 * so no change in the icmp_cksum is necessary.
2914	 *
2915	 * Inbound ICMP
2916	 * ------------
2917	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2918	 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2919	 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2920	 *
2921	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2922	 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2923	 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2924	 *
2925	 * Outbound ICMP
2926	 * -------------
2927	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2928	 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2929	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2930	 *
2931	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2932	 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2933	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2934	 *
2935	 */
2936	odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2937	if (odst == 1) {
2938		a1.s_addr = ntohl(nat->nat_inip.s_addr);
2939		a2.s_addr = ntohl(oip->ip_src.s_addr);
2940		oip->ip_src.s_addr = htonl(a1.s_addr);
2941	} else {
2942		a1.s_addr = ntohl(nat->nat_outip.s_addr);
2943		a2.s_addr = ntohl(oip->ip_dst.s_addr);
2944		oip->ip_dst.s_addr = htonl(a1.s_addr);
2945	}
2946
2947	sumd = a2.s_addr - a1.s_addr;
2948	if (sumd != 0) {
2949		if (a1.s_addr > a2.s_addr)
2950			sumd--;
2951		sumd = ~sumd;
2952
2953		fix_datacksum(&oip->ip_sum, sumd);
2954	}
2955
2956	sumd2 = sumd;
2957	sum1 = 0;
2958	sum2 = 0;
2959
2960	/*
2961	 * Fix UDP pseudo header checksum to compensate for the
2962	 * IP address change.
2963	 */
2964	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2965		/*
2966		 * Step 2 :
2967		 * For offending TCP/UDP IP packets, translate the ports as
2968		 * well, based on the NAT specification. Of course such
2969		 * a change may be reflected in the ICMP checksum as well.
2970		 *
2971		 * Since the port fields are part of the TCP/UDP checksum
2972		 * of the offending IP packet, you need to adjust that checksum
2973		 * as well... except that the change in the port numbers should
2974		 * be offset by the checksum change.  However, the TCP/UDP
2975		 * checksum will also need to change if there has been an
2976		 * IP address change.
2977		 */
2978		if (odst == 1) {
2979			sum1 = ntohs(nat->nat_inport);
2980			sum2 = ntohs(tcp->th_sport);
2981
2982			tcp->th_sport = htons(sum1);
2983		} else {
2984			sum1 = ntohs(nat->nat_outport);
2985			sum2 = ntohs(tcp->th_dport);
2986
2987			tcp->th_dport = htons(sum1);
2988		}
2989
2990		sumd += sum1 - sum2;
2991		if (sumd != 0 || sumd2 != 0) {
2992			/*
2993			 * At this point, sumd is the delta to apply to the
2994			 * TCP/UDP header, given the changes in both the IP
2995			 * address and the ports and sumd2 is the delta to
2996			 * apply to the ICMP header, given the IP address
2997			 * change delta that may need to be applied to the
2998			 * TCP/UDP checksum instead.
2999			 *
3000			 * If we will both the IP and TCP/UDP checksums
3001			 * then the ICMP checksum changes by the address
3002			 * delta applied to the TCP/UDP checksum.  If we
3003			 * do not change the TCP/UDP checksum them we
3004			 * apply the delta in ports to the ICMP checksum.
3005			 */
3006			if (oip->ip_p == IPPROTO_UDP) {
3007				if ((dlen >= 8) && (*csump != 0)) {
3008					fix_datacksum(csump, sumd);
3009				} else {
3010					sumd2 = sum1 - sum2;
3011					if (sum2 > sum1)
3012						sumd2--;
3013				}
3014			} else if (oip->ip_p == IPPROTO_TCP) {
3015				if (dlen >= 18) {
3016					fix_datacksum(csump, sumd);
3017				} else {
3018					sumd2 = sum2 - sum1;
3019					if (sum1 > sum2)
3020						sumd2--;
3021				}
3022			}
3023
3024			if (sumd2 != 0) {
3025				ipnat_t *np;
3026
3027				np = nat->nat_ptr;
3028				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3029				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3030				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3031
3032				if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3033				    (fin->fin_rev == 0) && (np != NULL) &&
3034				    (np->in_redir & NAT_REDIRECT)) {
3035					fix_outcksum(fin, &icmp->icmp_cksum,
3036						     sumd2);
3037				} else {
3038					fix_incksum(fin, &icmp->icmp_cksum,
3039						    sumd2);
3040				}
3041			}
3042		}
3043	} else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3044		icmphdr_t *orgicmp;
3045
3046		/*
3047		 * XXX - what if this is bogus hl and we go off the end ?
3048		 * In this case, nat_icmperrorlookup() will have returned NULL.
3049		 */
3050		orgicmp = (icmphdr_t *)dp;
3051
3052		if (odst == 1) {
3053			if (orgicmp->icmp_id != nat->nat_inport) {
3054
3055				/*
3056				 * Fix ICMP checksum (of the offening ICMP
3057				 * query packet) to compensate the change
3058				 * in the ICMP id of the offending ICMP
3059				 * packet.
3060				 *
3061				 * Since you modify orgicmp->icmp_id with
3062				 * a delta (say x) and you compensate that
3063				 * in origicmp->icmp_cksum with a delta
3064				 * minus x, you don't have to adjust the
3065				 * overall icmp->icmp_cksum
3066				 */
3067				sum1 = ntohs(orgicmp->icmp_id);
3068				sum2 = ntohs(nat->nat_inport);
3069				CALC_SUMD(sum1, sum2, sumd);
3070				orgicmp->icmp_id = nat->nat_inport;
3071				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3072			}
3073		} /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3074	}
3075	return nat;
3076}
3077
3078
3079/*
3080 * NB: these lookups don't lock access to the list, it assumed that it has
3081 * already been done!
3082 */
3083
3084/* ------------------------------------------------------------------------ */
3085/* Function:    nat_inlookup                                                */
3086/* Returns:     nat_t* - NULL == no match,                                  */
3087/*                       else pointer to matching NAT entry                 */
3088/* Parameters:  fin(I)    - pointer to packet information                   */
3089/*              flags(I)  - NAT flags for this packet                       */
3090/*              p(I)      - protocol for this packet                        */
3091/*              src(I)    - source IP address                               */
3092/*              mapdst(I) - destination IP address                          */
3093/*                                                                          */
3094/* Lookup a nat entry based on the mapped destination ip address/port and   */
3095/* real source address/port.  We use this lookup when receiving a packet,   */
3096/* we're looking for a table entry, based on the destination address.       */
3097/*                                                                          */
3098/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3099/*                                                                          */
3100/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3101/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3102/*                                                                          */
3103/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3104/*            the packet is of said protocol                                */
3105/* ------------------------------------------------------------------------ */
3106nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3107fr_info_t *fin;
3108u_int flags, p;
3109struct in_addr src , mapdst;
3110{
3111	u_short sport, dport;
3112	grehdr_t *gre;
3113	ipnat_t *ipn;
3114	u_int sflags;
3115	nat_t *nat;
3116	int nflags;
3117	u_32_t dst;
3118	void *ifp;
3119	u_int hv;
3120
3121	ifp = fin->fin_ifp;
3122	sport = 0;
3123	dport = 0;
3124	gre = NULL;
3125	dst = mapdst.s_addr;
3126	sflags = flags & NAT_TCPUDPICMP;
3127
3128	switch (p)
3129	{
3130	case IPPROTO_TCP :
3131	case IPPROTO_UDP :
3132		sport = htons(fin->fin_data[0]);
3133		dport = htons(fin->fin_data[1]);
3134		break;
3135	case IPPROTO_ICMP :
3136		if (flags & IPN_ICMPERR)
3137			sport = fin->fin_data[1];
3138		else
3139			dport = fin->fin_data[1];
3140		break;
3141	default :
3142		break;
3143	}
3144
3145
3146	if ((flags & SI_WILDP) != 0)
3147		goto find_in_wild_ports;
3148
3149	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3150	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3151	nat = nat_table[1][hv];
3152	for (; nat; nat = nat->nat_hnext[1]) {
3153		if (nat->nat_ifps[0] != NULL) {
3154			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3155				continue;
3156		} else if (ifp != NULL)
3157			nat->nat_ifps[0] = ifp;
3158
3159		nflags = nat->nat_flags;
3160
3161		if (nat->nat_oip.s_addr == src.s_addr &&
3162		    nat->nat_outip.s_addr == dst &&
3163		    (((p == 0) &&
3164		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3165		     || (p == nat->nat_p))) {
3166			switch (p)
3167			{
3168#if 0
3169			case IPPROTO_GRE :
3170				if (nat->nat_call[1] != fin->fin_data[0])
3171					continue;
3172				break;
3173#endif
3174			case IPPROTO_ICMP :
3175				if ((flags & IPN_ICMPERR) != 0) {
3176					if (nat->nat_outport != sport)
3177						continue;
3178				} else {
3179					if (nat->nat_outport != dport)
3180						continue;
3181				}
3182				break;
3183			case IPPROTO_TCP :
3184			case IPPROTO_UDP :
3185				if (nat->nat_oport != sport)
3186					continue;
3187				if (nat->nat_outport != dport)
3188					continue;
3189				break;
3190			default :
3191				break;
3192			}
3193
3194			ipn = nat->nat_ptr;
3195			if ((ipn != NULL) && (nat->nat_aps != NULL))
3196				if (appr_match(fin, nat) != 0)
3197					continue;
3198			return nat;
3199		}
3200	}
3201
3202	/*
3203	 * So if we didn't find it but there are wildcard members in the hash
3204	 * table, go back and look for them.  We do this search and update here
3205	 * because it is modifying the NAT table and we want to do this only
3206	 * for the first packet that matches.  The exception, of course, is
3207	 * for "dummy" (FI_IGNORE) lookups.
3208	 */
3209find_in_wild_ports:
3210	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3211		return NULL;
3212	if (nat_stats.ns_wilds == 0)
3213		return NULL;
3214
3215	RWLOCK_EXIT(&ipf_nat);
3216
3217	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3218	hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3219
3220	WRITE_ENTER(&ipf_nat);
3221
3222	nat = nat_table[1][hv];
3223	for (; nat; nat = nat->nat_hnext[1]) {
3224		if (nat->nat_ifps[0] != NULL) {
3225			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3226				continue;
3227		} else if (ifp != NULL)
3228			nat->nat_ifps[0] = ifp;
3229
3230		if (nat->nat_p != fin->fin_p)
3231			continue;
3232		if (nat->nat_oip.s_addr != src.s_addr ||
3233		    nat->nat_outip.s_addr != dst)
3234			continue;
3235
3236		nflags = nat->nat_flags;
3237		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3238			continue;
3239
3240		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3241			       NAT_INBOUND) == 1) {
3242			if ((fin->fin_flx & FI_IGNORE) != 0)
3243				break;
3244			if ((nflags & SI_CLONE) != 0) {
3245				nat = fr_natclone(fin, nat);
3246				if (nat == NULL)
3247					break;
3248			} else {
3249				MUTEX_ENTER(&ipf_nat_new);
3250				nat_stats.ns_wilds--;
3251				MUTEX_EXIT(&ipf_nat_new);
3252			}
3253			nat->nat_oport = sport;
3254			nat->nat_outport = dport;
3255			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3256			nat_tabmove(nat);
3257			break;
3258		}
3259	}
3260
3261	MUTEX_DOWNGRADE(&ipf_nat);
3262
3263	return nat;
3264}
3265
3266
3267/* ------------------------------------------------------------------------ */
3268/* Function:    nat_tabmove                                                 */
3269/* Returns:     Nil                                                         */
3270/* Parameters:  nat(I) - pointer to NAT structure                           */
3271/* Write Lock:  ipf_nat                                                     */
3272/*                                                                          */
3273/* This function is only called for TCP/UDP NAT table entries where the     */
3274/* original was placed in the table without hashing on the ports and we now */
3275/* want to include hashing on port numbers.                                 */
3276/* ------------------------------------------------------------------------ */
3277static void nat_tabmove(nat)
3278nat_t *nat;
3279{
3280	nat_t **natp;
3281	u_int hv;
3282
3283	if (nat->nat_flags & SI_CLONE)
3284		return;
3285
3286	/*
3287	 * Remove the NAT entry from the old location
3288	 */
3289	if (nat->nat_hnext[0])
3290		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3291	*nat->nat_phnext[0] = nat->nat_hnext[0];
3292	nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3293
3294	if (nat->nat_hnext[1])
3295		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3296	*nat->nat_phnext[1] = nat->nat_hnext[1];
3297	nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3298
3299	/*
3300	 * Add into the NAT table in the new position
3301	 */
3302	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3303	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3304			 ipf_nattable_sz);
3305	nat->nat_hv[0] = hv;
3306	natp = &nat_table[0][hv];
3307	if (*natp)
3308		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3309	nat->nat_phnext[0] = natp;
3310	nat->nat_hnext[0] = *natp;
3311	*natp = nat;
3312	nat_stats.ns_bucketlen[0][hv]++;
3313
3314	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3315	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3316			 ipf_nattable_sz);
3317	nat->nat_hv[1] = hv;
3318	natp = &nat_table[1][hv];
3319	if (*natp)
3320		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3321	nat->nat_phnext[1] = natp;
3322	nat->nat_hnext[1] = *natp;
3323	*natp = nat;
3324	nat_stats.ns_bucketlen[1][hv]++;
3325}
3326
3327
3328/* ------------------------------------------------------------------------ */
3329/* Function:    nat_outlookup                                               */
3330/* Returns:     nat_t* - NULL == no match,                                  */
3331/*                       else pointer to matching NAT entry                 */
3332/* Parameters:  fin(I)   - pointer to packet information                    */
3333/*              flags(I) - NAT flags for this packet                        */
3334/*              p(I)     - protocol for this packet                         */
3335/*              src(I)   - source IP address                                */
3336/*              dst(I)   - destination IP address                           */
3337/*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3338/*                                                                          */
3339/* Lookup a nat entry based on the source 'real' ip address/port and        */
3340/* destination address/port.  We use this lookup when sending a packet out, */
3341/* we're looking for a table entry, based on the source address.            */
3342/*                                                                          */
3343/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3344/*                                                                          */
3345/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3346/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3347/*                                                                          */
3348/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3349/*            the packet is of said protocol                                */
3350/* ------------------------------------------------------------------------ */
3351nat_t *nat_outlookup(fin, flags, p, src, dst)
3352fr_info_t *fin;
3353u_int flags, p;
3354struct in_addr src , dst;
3355{
3356	u_short sport, dport;
3357	u_int sflags;
3358	ipnat_t *ipn;
3359	u_32_t srcip;
3360	nat_t *nat;
3361	int nflags;
3362	void *ifp;
3363	u_int hv;
3364
3365	ifp = fin->fin_ifp;
3366	srcip = src.s_addr;
3367	sflags = flags & IPN_TCPUDPICMP;
3368	sport = 0;
3369	dport = 0;
3370
3371	switch (p)
3372	{
3373	case IPPROTO_TCP :
3374	case IPPROTO_UDP :
3375		sport = htons(fin->fin_data[0]);
3376		dport = htons(fin->fin_data[1]);
3377		break;
3378	case IPPROTO_ICMP :
3379		if (flags & IPN_ICMPERR)
3380			sport = fin->fin_data[1];
3381		else
3382			dport = fin->fin_data[1];
3383		break;
3384	default :
3385		break;
3386	}
3387
3388	if ((flags & SI_WILDP) != 0)
3389		goto find_out_wild_ports;
3390
3391	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3392	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3393	nat = nat_table[0][hv];
3394	for (; nat; nat = nat->nat_hnext[0]) {
3395		if (nat->nat_ifps[1] != NULL) {
3396			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3397				continue;
3398		} else if (ifp != NULL)
3399			nat->nat_ifps[1] = ifp;
3400
3401		nflags = nat->nat_flags;
3402
3403		if (nat->nat_inip.s_addr == srcip &&
3404		    nat->nat_oip.s_addr == dst.s_addr &&
3405		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3406		     || (p == nat->nat_p))) {
3407			switch (p)
3408			{
3409#if 0
3410			case IPPROTO_GRE :
3411				if (nat->nat_call[1] != fin->fin_data[0])
3412					continue;
3413				break;
3414#endif
3415			case IPPROTO_TCP :
3416			case IPPROTO_UDP :
3417				if (nat->nat_oport != dport)
3418					continue;
3419				if (nat->nat_inport != sport)
3420					continue;
3421				break;
3422			default :
3423				break;
3424			}
3425
3426			ipn = nat->nat_ptr;
3427			if ((ipn != NULL) && (nat->nat_aps != NULL))
3428				if (appr_match(fin, nat) != 0)
3429					continue;
3430			return nat;
3431		}
3432	}
3433
3434	/*
3435	 * So if we didn't find it but there are wildcard members in the hash
3436	 * table, go back and look for them.  We do this search and update here
3437	 * because it is modifying the NAT table and we want to do this only
3438	 * for the first packet that matches.  The exception, of course, is
3439	 * for "dummy" (FI_IGNORE) lookups.
3440	 */
3441find_out_wild_ports:
3442	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3443		return NULL;
3444	if (nat_stats.ns_wilds == 0)
3445		return NULL;
3446
3447	RWLOCK_EXIT(&ipf_nat);
3448
3449	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3450	hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3451
3452	WRITE_ENTER(&ipf_nat);
3453
3454	nat = nat_table[0][hv];
3455	for (; nat; nat = nat->nat_hnext[0]) {
3456		if (nat->nat_ifps[1] != NULL) {
3457			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3458				continue;
3459		} else if (ifp != NULL)
3460			nat->nat_ifps[1] = ifp;
3461
3462		if (nat->nat_p != fin->fin_p)
3463			continue;
3464		if ((nat->nat_inip.s_addr != srcip) ||
3465		    (nat->nat_oip.s_addr != dst.s_addr))
3466			continue;
3467
3468		nflags = nat->nat_flags;
3469		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3470			continue;
3471
3472		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3473			       NAT_OUTBOUND) == 1) {
3474			if ((fin->fin_flx & FI_IGNORE) != 0)
3475				break;
3476			if ((nflags & SI_CLONE) != 0) {
3477				nat = fr_natclone(fin, nat);
3478				if (nat == NULL)
3479					break;
3480			} else {
3481				MUTEX_ENTER(&ipf_nat_new);
3482				nat_stats.ns_wilds--;
3483				MUTEX_EXIT(&ipf_nat_new);
3484			}
3485			nat->nat_inport = sport;
3486			nat->nat_oport = dport;
3487			if (nat->nat_outport == 0)
3488				nat->nat_outport = sport;
3489			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3490			nat_tabmove(nat);
3491			break;
3492		}
3493	}
3494
3495	MUTEX_DOWNGRADE(&ipf_nat);
3496
3497	return nat;
3498}
3499
3500
3501/* ------------------------------------------------------------------------ */
3502/* Function:    nat_lookupredir                                             */
3503/* Returns:     nat_t* - NULL == no match,                                  */
3504/*                       else pointer to matching NAT entry                 */
3505/* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3506/*                      entry for.                                          */
3507/*                                                                          */
3508/* Lookup the NAT tables to search for a matching redirect                  */
3509/* The contents of natlookup_t should imitate those found in a packet that  */
3510/* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3511/* We can do the lookup in one of two ways, imitating an inbound or         */
3512/* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3513/* For IN, the fields are set as follows:                                   */
3514/*     nl_real* = source information                                        */
3515/*     nl_out* = destination information (translated)                       */
3516/* For an out packet, the fields are set like this:                         */
3517/*     nl_in* = source information (untranslated)                           */
3518/*     nl_out* = destination information (translated)                       */
3519/* ------------------------------------------------------------------------ */
3520nat_t *nat_lookupredir(np)
3521natlookup_t *np;
3522{
3523	fr_info_t fi;
3524	nat_t *nat;
3525
3526	bzero((char *)&fi, sizeof(fi));
3527	if (np->nl_flags & IPN_IN) {
3528		fi.fin_data[0] = ntohs(np->nl_realport);
3529		fi.fin_data[1] = ntohs(np->nl_outport);
3530	} else {
3531		fi.fin_data[0] = ntohs(np->nl_inport);
3532		fi.fin_data[1] = ntohs(np->nl_outport);
3533	}
3534	if (np->nl_flags & IPN_TCP)
3535		fi.fin_p = IPPROTO_TCP;
3536	else if (np->nl_flags & IPN_UDP)
3537		fi.fin_p = IPPROTO_UDP;
3538	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3539		fi.fin_p = IPPROTO_ICMP;
3540
3541	/*
3542	 * We can do two sorts of lookups:
3543	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3544	 * - default: we have the `in' and `out' address, look for `real'.
3545	 */
3546	if (np->nl_flags & IPN_IN) {
3547		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3548					np->nl_realip, np->nl_outip))) {
3549			np->nl_inip = nat->nat_inip;
3550			np->nl_inport = nat->nat_inport;
3551		}
3552	} else {
3553		/*
3554		 * If nl_inip is non null, this is a lookup based on the real
3555		 * ip address. Else, we use the fake.
3556		 */
3557		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3558					 np->nl_inip, np->nl_outip))) {
3559
3560			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3561				fr_info_t fin;
3562				bzero((char *)&fin, sizeof(fin));
3563				fin.fin_p = nat->nat_p;
3564				fin.fin_data[0] = ntohs(nat->nat_outport);
3565				fin.fin_data[1] = ntohs(nat->nat_oport);
3566				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3567						 nat->nat_outip,
3568						 nat->nat_oip) != NULL) {
3569					np->nl_flags &= ~IPN_FINDFORWARD;
3570				}
3571			}
3572
3573			np->nl_realip = nat->nat_outip;
3574			np->nl_realport = nat->nat_outport;
3575		}
3576 	}
3577
3578	return nat;
3579}
3580
3581
3582/* ------------------------------------------------------------------------ */
3583/* Function:    nat_match                                                   */
3584/* Returns:     int - 0 == no match, 1 == match                             */
3585/* Parameters:  fin(I)   - pointer to packet information                    */
3586/*              np(I)    - pointer to NAT rule                              */
3587/*                                                                          */
3588/* Pull the matching of a packet against a NAT rule out of that complex     */
3589/* loop inside fr_checknatin() and lay it out properly in its own function. */
3590/* ------------------------------------------------------------------------ */
3591static int nat_match(fin, np)
3592fr_info_t *fin;
3593ipnat_t *np;
3594{
3595	frtuc_t *ft;
3596
3597	if (fin->fin_v != 4)
3598		return 0;
3599
3600	if (np->in_p && fin->fin_p != np->in_p)
3601		return 0;
3602
3603	if (fin->fin_out) {
3604		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3605			return 0;
3606		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3607		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3608			return 0;
3609		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3610		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3611			return 0;
3612	} else {
3613		if (!(np->in_redir & NAT_REDIRECT))
3614			return 0;
3615		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3616		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3617			return 0;
3618		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3619		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3620			return 0;
3621	}
3622
3623	ft = &np->in_tuc;
3624	if (!(fin->fin_flx & FI_TCPUDP) ||
3625	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3626		if (ft->ftu_scmp || ft->ftu_dcmp)
3627			return 0;
3628		return 1;
3629	}
3630
3631	return fr_tcpudpchk(fin, ft);
3632}
3633
3634
3635/* ------------------------------------------------------------------------ */
3636/* Function:    nat_update                                                  */
3637/* Returns:     Nil                                                         */
3638/* Parameters:  nat(I)    - pointer to NAT structure                        */
3639/*              np(I)     - pointer to NAT rule                             */
3640/*                                                                          */
3641/* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3642/* called with fin_rev updated - i.e. after calling nat_proto().            */
3643/* ------------------------------------------------------------------------ */
3644void nat_update(fin, nat, np)
3645fr_info_t *fin;
3646nat_t *nat;
3647ipnat_t *np;
3648{
3649	ipftq_t *ifq, *ifq2;
3650	ipftqent_t *tqe;
3651
3652	MUTEX_ENTER(&nat->nat_lock);
3653	tqe = &nat->nat_tqe;
3654	ifq = tqe->tqe_ifq;
3655
3656	/*
3657	 * We allow over-riding of NAT timeouts from NAT rules, even for
3658	 * TCP, however, if it is TCP and there is no rule timeout set,
3659	 * then do not update the timeout here.
3660	 */
3661	if (np != NULL)
3662		ifq2 = np->in_tqehead[fin->fin_rev];
3663	else
3664		ifq2 = NULL;
3665
3666	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3667		u_32_t end, ack;
3668		u_char tcpflags;
3669		tcphdr_t *tcp;
3670		int dsize;
3671
3672		tcp = fin->fin_dp;
3673		tcpflags = tcp->th_flags;
3674		dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3675			((tcpflags & TH_SYN) ? 1 : 0) +
3676			((tcpflags & TH_FIN) ? 1 : 0);
3677
3678		ack = ntohl(tcp->th_ack);
3679		end = ntohl(tcp->th_seq) + dsize;
3680
3681		if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3682			nat->nat_seqnext[1 - fin->fin_rev] = ack;
3683
3684		if (nat->nat_seqnext[fin->fin_rev] == 0)
3685			nat->nat_seqnext[fin->fin_rev] = end;
3686
3687		(void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3688	} else {
3689		if (ifq2 == NULL) {
3690			if (nat->nat_p == IPPROTO_UDP)
3691				ifq2 = &nat_udptq;
3692			else if (nat->nat_p == IPPROTO_ICMP)
3693				ifq2 = &nat_icmptq;
3694			else
3695				ifq2 = &nat_iptq;
3696		}
3697
3698		fr_movequeue(tqe, ifq, ifq2);
3699	}
3700	MUTEX_EXIT(&nat->nat_lock);
3701}
3702
3703
3704/* ------------------------------------------------------------------------ */
3705/* Function:    fr_checknatout                                              */
3706/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3707/*                     0 == no packet translation occurred,                 */
3708/*                     1 == packet was successfully translated.             */
3709/* Parameters:  fin(I)   - pointer to packet information                    */
3710/*              passp(I) - pointer to filtering result flags                */
3711/*                                                                          */
3712/* Check to see if an outcoming packet should be changed.  ICMP packets are */
3713/* first checked to see if they match an existing entry (if an error),      */
3714/* otherwise a search of the current NAT table is made.  If neither results */
3715/* in a match then a search for a matching NAT rule is made.  Create a new  */
3716/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3717/* packet header(s) as required.                                            */
3718/* ------------------------------------------------------------------------ */
3719int fr_checknatout(fin, passp)
3720fr_info_t *fin;
3721u_32_t *passp;
3722{
3723	struct ifnet *ifp, *sifp;
3724	icmphdr_t *icmp = NULL;
3725	tcphdr_t *tcp = NULL;
3726	int rval, natfailed;
3727	ipnat_t *np = NULL;
3728	u_int nflags = 0;
3729	u_32_t ipa, iph;
3730	int natadd = 1;
3731	frentry_t *fr;
3732	nat_t *nat;
3733
3734	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3735		return 0;
3736
3737	natfailed = 0;
3738	fr = fin->fin_fr;
3739	sifp = fin->fin_ifp;
3740	if (fr != NULL) {
3741		ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3742		if ((ifp != NULL) && (ifp != (void *)-1))
3743			fin->fin_ifp = ifp;
3744	}
3745	ifp = fin->fin_ifp;
3746
3747	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3748		switch (fin->fin_p)
3749		{
3750		case IPPROTO_TCP :
3751			nflags = IPN_TCP;
3752			break;
3753		case IPPROTO_UDP :
3754			nflags = IPN_UDP;
3755			break;
3756		case IPPROTO_ICMP :
3757			icmp = fin->fin_dp;
3758
3759			/*
3760			 * This is an incoming packet, so the destination is
3761			 * the icmp_id and the source port equals 0
3762			 */
3763			if (nat_icmpquerytype4(icmp->icmp_type))
3764				nflags = IPN_ICMPQUERY;
3765			break;
3766		default :
3767			break;
3768		}
3769
3770		if ((nflags & IPN_TCPUDP))
3771			tcp = fin->fin_dp;
3772	}
3773
3774	ipa = fin->fin_saddr;
3775
3776	READ_ENTER(&ipf_nat);
3777
3778	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3779	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3780		/*EMPTY*/;
3781	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3782		natadd = 0;
3783	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3784				      fin->fin_src, fin->fin_dst))) {
3785		nflags = nat->nat_flags;
3786	} else {
3787		u_32_t hv, msk, nmsk;
3788
3789		/*
3790		 * If there is no current entry in the nat table for this IP#,
3791		 * create one for it (if there is a matching rule).
3792		 */
3793		RWLOCK_EXIT(&ipf_nat);
3794		msk = 0xffffffff;
3795		nmsk = nat_masks;
3796		WRITE_ENTER(&ipf_nat);
3797maskloop:
3798		iph = ipa & htonl(msk);
3799		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3800		for (np = nat_rules[hv]; np; np = np->in_mnext)
3801		{
3802			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3803				continue;
3804			if (np->in_v != fin->fin_v)
3805				continue;
3806			if (np->in_p && (np->in_p != fin->fin_p))
3807				continue;
3808			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3809				continue;
3810			if (np->in_flags & IPN_FILTER) {
3811				if (!nat_match(fin, np))
3812					continue;
3813			} else if ((ipa & np->in_inmsk) != np->in_inip)
3814				continue;
3815
3816			if ((fr != NULL) &&
3817			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3818				continue;
3819
3820			if (*np->in_plabel != '\0') {
3821				if (((np->in_flags & IPN_FILTER) == 0) &&
3822				    (np->in_dport != tcp->th_dport))
3823					continue;
3824				if (appr_ok(fin, tcp, np) == 0)
3825					continue;
3826			}
3827
3828			if ((nat = nat_new(fin, np, NULL, nflags,
3829					   NAT_OUTBOUND))) {
3830				np->in_hits++;
3831				break;
3832			} else
3833				natfailed = -1;
3834		}
3835		if ((np == NULL) && (nmsk != 0)) {
3836			while (nmsk) {
3837				msk <<= 1;
3838				if (nmsk & 0x80000000)
3839					break;
3840				nmsk <<= 1;
3841			}
3842			if (nmsk != 0) {
3843				nmsk <<= 1;
3844				goto maskloop;
3845			}
3846		}
3847		MUTEX_DOWNGRADE(&ipf_nat);
3848	}
3849
3850	if (nat != NULL) {
3851		rval = fr_natout(fin, nat, natadd, nflags);
3852		if (rval == 1) {
3853			MUTEX_ENTER(&nat->nat_lock);
3854			nat->nat_ref++;
3855			MUTEX_EXIT(&nat->nat_lock);
3856			nat->nat_touched = fr_ticks;
3857			fin->fin_nat = nat;
3858		}
3859	} else
3860		rval = natfailed;
3861	RWLOCK_EXIT(&ipf_nat);
3862
3863	if (rval == -1) {
3864		if (passp != NULL)
3865			*passp = FR_BLOCK;
3866		fin->fin_flx |= FI_BADNAT;
3867	}
3868	fin->fin_ifp = sifp;
3869	return rval;
3870}
3871
3872/* ------------------------------------------------------------------------ */
3873/* Function:    fr_natout                                                   */
3874/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3875/*                     1 == packet was successfully translated.             */
3876/* Parameters:  fin(I)    - pointer to packet information                   */
3877/*              nat(I)    - pointer to NAT structure                        */
3878/*              natadd(I) - flag indicating if it is safe to add frag cache */
3879/*              nflags(I) - NAT flags set for this packet                   */
3880/*                                                                          */
3881/* Translate a packet coming "out" on an interface.                         */
3882/* ------------------------------------------------------------------------ */
3883int fr_natout(fin, nat, natadd, nflags)
3884fr_info_t *fin;
3885nat_t *nat;
3886int natadd;
3887u_32_t nflags;
3888{
3889	icmphdr_t *icmp;
3890	u_short *csump;
3891	tcphdr_t *tcp;
3892	ipnat_t *np;
3893	int i;
3894
3895	tcp = NULL;
3896	icmp = NULL;
3897	csump = NULL;
3898	np = nat->nat_ptr;
3899
3900	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3901		(void) fr_nat_newfrag(fin, 0, nat);
3902
3903	MUTEX_ENTER(&nat->nat_lock);
3904	nat->nat_bytes[1] += fin->fin_plen;
3905	nat->nat_pkts[1]++;
3906	MUTEX_EXIT(&nat->nat_lock);
3907
3908	/*
3909	 * Fix up checksums, not by recalculating them, but
3910	 * simply computing adjustments.
3911	 * This is only done for STREAMS based IP implementations where the
3912	 * checksum has already been calculated by IP.  In all other cases,
3913	 * IPFilter is called before the checksum needs calculating so there
3914	 * is no call to modify whatever is in the header now.
3915	 */
3916	if (fin->fin_v == 4) {
3917		if (nflags == IPN_ICMPERR) {
3918			u_32_t s1, s2, sumd;
3919
3920			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3921			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3922			CALC_SUMD(s1, s2, sumd);
3923			fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3924		}
3925#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3926    defined(linux) || defined(BRIDGE_IPF)
3927		else {
3928			/*
3929			 * Strictly speaking, this isn't necessary on BSD
3930			 * kernels because they do checksum calculation after
3931			 * this code has run BUT if ipfilter is being used
3932			 * to do NAT as a bridge, that code doesn't exist.
3933			 */
3934			if (nat->nat_dir == NAT_OUTBOUND)
3935				fix_outcksum(fin, &fin->fin_ip->ip_sum,
3936					     nat->nat_ipsumd);
3937			else
3938				fix_incksum(fin, &fin->fin_ip->ip_sum,
3939					    nat->nat_ipsumd);
3940		}
3941#endif
3942	}
3943
3944	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3945		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3946			tcp = fin->fin_dp;
3947
3948			tcp->th_sport = nat->nat_outport;
3949			fin->fin_data[0] = ntohs(nat->nat_outport);
3950		}
3951
3952		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3953			icmp = fin->fin_dp;
3954			icmp->icmp_id = nat->nat_outport;
3955		}
3956
3957		csump = nat_proto(fin, nat, nflags);
3958	}
3959
3960	fin->fin_ip->ip_src = nat->nat_outip;
3961
3962	nat_update(fin, nat, np);
3963
3964	/*
3965	 * The above comments do not hold for layer 4 (or higher) checksums...
3966	 */
3967	if (csump != NULL) {
3968		if (nat->nat_dir == NAT_OUTBOUND)
3969			fix_outcksum(fin, csump, nat->nat_sumd[1]);
3970		else
3971			fix_incksum(fin, csump, nat->nat_sumd[1]);
3972	}
3973#ifdef	IPFILTER_SYNC
3974	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3975#endif
3976	/* ------------------------------------------------------------- */
3977	/* A few quick notes:						 */
3978	/*	Following are test conditions prior to calling the 	 */
3979	/*	appr_check routine.					 */
3980	/*								 */
3981	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3982	/*	with a redirect rule, we attempt to match the packet's	 */
3983	/*	source port against in_dport, otherwise	we'd compare the */
3984	/*	packet's destination.			 		 */
3985	/* ------------------------------------------------------------- */
3986	if ((np != NULL) && (np->in_apr != NULL)) {
3987		i = appr_check(fin, nat);
3988		if (i == 0)
3989			i = 1;
3990	} else
3991		i = 1;
3992	ATOMIC_INCL(nat_stats.ns_mapped[1]);
3993	fin->fin_flx |= FI_NATED;
3994	return i;
3995}
3996
3997
3998/* ------------------------------------------------------------------------ */
3999/* Function:    fr_checknatin                                               */
4000/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4001/*                     0 == no packet translation occurred,                 */
4002/*                     1 == packet was successfully translated.             */
4003/* Parameters:  fin(I)   - pointer to packet information                    */
4004/*              passp(I) - pointer to filtering result flags                */
4005/*                                                                          */
4006/* Check to see if an incoming packet should be changed.  ICMP packets are  */
4007/* first checked to see if they match an existing entry (if an error),      */
4008/* otherwise a search of the current NAT table is made.  If neither results */
4009/* in a match then a search for a matching NAT rule is made.  Create a new  */
4010/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4011/* packet header(s) as required.                                            */
4012/* ------------------------------------------------------------------------ */
4013int fr_checknatin(fin, passp)
4014fr_info_t *fin;
4015u_32_t *passp;
4016{
4017	u_int nflags, natadd;
4018	int rval, natfailed;
4019	struct ifnet *ifp;
4020	struct in_addr in;
4021	icmphdr_t *icmp;
4022	tcphdr_t *tcp;
4023	u_short dport;
4024	ipnat_t *np;
4025	nat_t *nat;
4026	u_32_t iph;
4027
4028	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4029		return 0;
4030
4031	tcp = NULL;
4032	icmp = NULL;
4033	dport = 0;
4034	natadd = 1;
4035	nflags = 0;
4036	natfailed = 0;
4037	ifp = fin->fin_ifp;
4038
4039	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4040		switch (fin->fin_p)
4041		{
4042		case IPPROTO_TCP :
4043			nflags = IPN_TCP;
4044			break;
4045		case IPPROTO_UDP :
4046			nflags = IPN_UDP;
4047			break;
4048		case IPPROTO_ICMP :
4049			icmp = fin->fin_dp;
4050
4051			/*
4052			 * This is an incoming packet, so the destination is
4053			 * the icmp_id and the source port equals 0
4054			 */
4055			if (nat_icmpquerytype4(icmp->icmp_type)) {
4056				nflags = IPN_ICMPQUERY;
4057				dport = icmp->icmp_id;
4058			} break;
4059		default :
4060			break;
4061		}
4062
4063		if ((nflags & IPN_TCPUDP)) {
4064			tcp = fin->fin_dp;
4065			dport = tcp->th_dport;
4066		}
4067	}
4068
4069	in = fin->fin_dst;
4070
4071	READ_ENTER(&ipf_nat);
4072
4073	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4074	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4075		/*EMPTY*/;
4076	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4077		natadd = 0;
4078	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4079				     fin->fin_src, in))) {
4080		nflags = nat->nat_flags;
4081	} else {
4082		u_32_t hv, msk, rmsk;
4083
4084		RWLOCK_EXIT(&ipf_nat);
4085		rmsk = rdr_masks;
4086		msk = 0xffffffff;
4087		WRITE_ENTER(&ipf_nat);
4088		/*
4089		 * If there is no current entry in the nat table for this IP#,
4090		 * create one for it (if there is a matching rule).
4091		 */
4092maskloop:
4093		iph = in.s_addr & htonl(msk);
4094		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4095		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4096			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4097				continue;
4098			if (np->in_v != fin->fin_v)
4099				continue;
4100			if (np->in_p && (np->in_p != fin->fin_p))
4101				continue;
4102			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4103				continue;
4104			if (np->in_flags & IPN_FILTER) {
4105				if (!nat_match(fin, np))
4106					continue;
4107			} else {
4108				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4109					continue;
4110				if (np->in_pmin &&
4111				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4112				     (ntohs(dport) < ntohs(np->in_pmin))))
4113					continue;
4114			}
4115
4116			if (*np->in_plabel != '\0') {
4117				if (!appr_ok(fin, tcp, np)) {
4118					continue;
4119				}
4120			}
4121
4122			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4123			if (nat != NULL) {
4124				np->in_hits++;
4125				break;
4126			} else
4127				natfailed = -1;
4128		}
4129
4130		if ((np == NULL) && (rmsk != 0)) {
4131			while (rmsk) {
4132				msk <<= 1;
4133				if (rmsk & 0x80000000)
4134					break;
4135				rmsk <<= 1;
4136			}
4137			if (rmsk != 0) {
4138				rmsk <<= 1;
4139				goto maskloop;
4140			}
4141		}
4142		MUTEX_DOWNGRADE(&ipf_nat);
4143	}
4144	if (nat != NULL) {
4145		rval = fr_natin(fin, nat, natadd, nflags);
4146		if (rval == 1) {
4147			MUTEX_ENTER(&nat->nat_lock);
4148			nat->nat_ref++;
4149			MUTEX_EXIT(&nat->nat_lock);
4150			nat->nat_touched = fr_ticks;
4151			fin->fin_nat = nat;
4152		}
4153	} else
4154		rval = natfailed;
4155	RWLOCK_EXIT(&ipf_nat);
4156
4157	if (rval == -1) {
4158		if (passp != NULL)
4159			*passp = FR_BLOCK;
4160		fin->fin_flx |= FI_BADNAT;
4161	}
4162	return rval;
4163}
4164
4165
4166/* ------------------------------------------------------------------------ */
4167/* Function:    fr_natin                                                    */
4168/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4169/*                     1 == packet was successfully translated.             */
4170/* Parameters:  fin(I)    - pointer to packet information                   */
4171/*              nat(I)    - pointer to NAT structure                        */
4172/*              natadd(I) - flag indicating if it is safe to add frag cache */
4173/*              nflags(I) - NAT flags set for this packet                   */
4174/* Locks Held:  ipf_nat (READ)                                              */
4175/*                                                                          */
4176/* Translate a packet coming "in" on an interface.                          */
4177/* ------------------------------------------------------------------------ */
4178int fr_natin(fin, nat, natadd, nflags)
4179fr_info_t *fin;
4180nat_t *nat;
4181int natadd;
4182u_32_t nflags;
4183{
4184	icmphdr_t *icmp;
4185	u_short *csump;
4186	tcphdr_t *tcp;
4187	ipnat_t *np;
4188	int i;
4189
4190	tcp = NULL;
4191	csump = NULL;
4192	np = nat->nat_ptr;
4193	fin->fin_fr = nat->nat_fr;
4194
4195	if (np != NULL) {
4196		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4197			(void) fr_nat_newfrag(fin, 0, nat);
4198
4199	/* ------------------------------------------------------------- */
4200	/* A few quick notes:						 */
4201	/*	Following are test conditions prior to calling the 	 */
4202	/*	appr_check routine.					 */
4203	/*								 */
4204	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4205	/*	with a map rule, we attempt to match the packet's	 */
4206	/*	source port against in_dport, otherwise	we'd compare the */
4207	/*	packet's destination.			 		 */
4208	/* ------------------------------------------------------------- */
4209		if (np->in_apr != NULL) {
4210			i = appr_check(fin, nat);
4211			if (i == -1) {
4212				return -1;
4213			}
4214		}
4215	}
4216
4217#ifdef	IPFILTER_SYNC
4218	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4219#endif
4220
4221	MUTEX_ENTER(&nat->nat_lock);
4222	nat->nat_bytes[0] += fin->fin_plen;
4223	nat->nat_pkts[0]++;
4224	MUTEX_EXIT(&nat->nat_lock);
4225
4226	fin->fin_ip->ip_dst = nat->nat_inip;
4227	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4228	if (nflags & IPN_TCPUDP)
4229		tcp = fin->fin_dp;
4230
4231	/*
4232	 * Fix up checksums, not by recalculating them, but
4233	 * simply computing adjustments.
4234	 * Why only do this for some platforms on inbound packets ?
4235	 * Because for those that it is done, IP processing is yet to happen
4236	 * and so the IPv4 header checksum has not yet been evaluated.
4237	 * Perhaps it should always be done for the benefit of things like
4238	 * fast forwarding (so that it doesn't need to be recomputed) but with
4239	 * header checksum offloading, perhaps it is a moot point.
4240	 */
4241#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4242     defined(__osf__) || defined(linux)
4243	if (nat->nat_dir == NAT_OUTBOUND)
4244		fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4245	else
4246		fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4247#endif
4248
4249	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4250		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4251			tcp->th_dport = nat->nat_inport;
4252			fin->fin_data[1] = ntohs(nat->nat_inport);
4253		}
4254
4255
4256		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4257			icmp = fin->fin_dp;
4258
4259			icmp->icmp_id = nat->nat_inport;
4260		}
4261
4262		csump = nat_proto(fin, nat, nflags);
4263	}
4264
4265	nat_update(fin, nat, np);
4266
4267	/*
4268	 * The above comments do not hold for layer 4 (or higher) checksums...
4269	 */
4270	if (csump != NULL) {
4271		if (nat->nat_dir == NAT_OUTBOUND)
4272			fix_incksum(fin, csump, nat->nat_sumd[0]);
4273		else
4274			fix_outcksum(fin, csump, nat->nat_sumd[0]);
4275	}
4276	ATOMIC_INCL(nat_stats.ns_mapped[0]);
4277	fin->fin_flx |= FI_NATED;
4278	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4279		fin->fin_nattag = &np->in_tag;
4280	return 1;
4281}
4282
4283
4284/* ------------------------------------------------------------------------ */
4285/* Function:    nat_proto                                                   */
4286/* Returns:     u_short* - pointer to transport header checksum to update,  */
4287/*                         NULL if the transport protocol is not recognised */
4288/*                         as needing a checksum update.                    */
4289/* Parameters:  fin(I)    - pointer to packet information                   */
4290/*              nat(I)    - pointer to NAT structure                        */
4291/*              nflags(I) - NAT flags set for this packet                   */
4292/*                                                                          */
4293/* Return the pointer to the checksum field for each protocol so understood.*/
4294/* If support for making other changes to a protocol header is required,    */
4295/* that is not strictly 'address' translation, such as clamping the MSS in  */
4296/* TCP down to a specific value, then do it from here.                      */
4297/* ------------------------------------------------------------------------ */
4298u_short *nat_proto(fin, nat, nflags)
4299fr_info_t *fin;
4300nat_t *nat;
4301u_int nflags;
4302{
4303	icmphdr_t *icmp;
4304	u_short *csump;
4305	tcphdr_t *tcp;
4306	udphdr_t *udp;
4307
4308	csump = NULL;
4309	if (fin->fin_out == 0) {
4310		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4311	} else {
4312		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4313	}
4314
4315	switch (fin->fin_p)
4316	{
4317	case IPPROTO_TCP :
4318		tcp = fin->fin_dp;
4319
4320		csump = &tcp->th_sum;
4321
4322		/*
4323		 * Do a MSS CLAMPING on a SYN packet,
4324		 * only deal IPv4 for now.
4325		 */
4326		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4327			nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4328
4329		break;
4330
4331	case IPPROTO_UDP :
4332		udp = fin->fin_dp;
4333
4334		if (udp->uh_sum)
4335			csump = &udp->uh_sum;
4336		break;
4337
4338	case IPPROTO_ICMP :
4339		icmp = fin->fin_dp;
4340
4341		if ((nflags & IPN_ICMPQUERY) != 0) {
4342			if (icmp->icmp_cksum != 0)
4343				csump = &icmp->icmp_cksum;
4344		}
4345		break;
4346	}
4347	return csump;
4348}
4349
4350
4351/* ------------------------------------------------------------------------ */
4352/* Function:    fr_natunload                                                */
4353/* Returns:     Nil                                                         */
4354/* Parameters:  Nil                                                         */
4355/*                                                                          */
4356/* Free all memory used by NAT structures allocated at runtime.             */
4357/* ------------------------------------------------------------------------ */
4358void fr_natunload()
4359{
4360	ipftq_t *ifq, *ifqnext;
4361
4362	(void) nat_clearlist();
4363	(void) nat_flushtable();
4364
4365	/*
4366	 * Proxy timeout queues are not cleaned here because although they
4367	 * exist on the NAT list, appr_unload is called after fr_natunload
4368	 * and the proxies actually are responsible for them being created.
4369	 * Should the proxy timeouts have their own list?  There's no real
4370	 * justification as this is the only complication.
4371	 */
4372	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4373		ifqnext = ifq->ifq_next;
4374		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4375		    (fr_deletetimeoutqueue(ifq) == 0))
4376			fr_freetimeoutqueue(ifq);
4377	}
4378
4379	if (nat_table[0] != NULL) {
4380		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4381		nat_table[0] = NULL;
4382	}
4383	if (nat_table[1] != NULL) {
4384		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4385		nat_table[1] = NULL;
4386	}
4387	if (nat_rules != NULL) {
4388		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4389		nat_rules = NULL;
4390	}
4391	if (rdr_rules != NULL) {
4392		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4393		rdr_rules = NULL;
4394	}
4395	if (ipf_hm_maptable != NULL) {
4396		KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4397		ipf_hm_maptable = NULL;
4398	}
4399	if (nat_stats.ns_bucketlen[0] != NULL) {
4400		KFREES(nat_stats.ns_bucketlen[0],
4401		       sizeof(u_long *) * ipf_nattable_sz);
4402		nat_stats.ns_bucketlen[0] = NULL;
4403	}
4404	if (nat_stats.ns_bucketlen[1] != NULL) {
4405		KFREES(nat_stats.ns_bucketlen[1],
4406		       sizeof(u_long *) * ipf_nattable_sz);
4407		nat_stats.ns_bucketlen[1] = NULL;
4408	}
4409
4410	if (fr_nat_maxbucket_reset == 1)
4411		fr_nat_maxbucket = 0;
4412
4413	if (fr_nat_init == 1) {
4414		fr_nat_init = 0;
4415		fr_sttab_destroy(nat_tqb);
4416
4417		RW_DESTROY(&ipf_natfrag);
4418		RW_DESTROY(&ipf_nat);
4419
4420		MUTEX_DESTROY(&ipf_nat_new);
4421		MUTEX_DESTROY(&ipf_natio);
4422
4423		MUTEX_DESTROY(&nat_udptq.ifq_lock);
4424		MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4425		MUTEX_DESTROY(&nat_iptq.ifq_lock);
4426	}
4427}
4428
4429
4430/* ------------------------------------------------------------------------ */
4431/* Function:    fr_natexpire                                                */
4432/* Returns:     Nil                                                         */
4433/* Parameters:  Nil                                                         */
4434/*                                                                          */
4435/* Check all of the timeout queues for entries at the top which need to be  */
4436/* expired.                                                                 */
4437/* ------------------------------------------------------------------------ */
4438void fr_natexpire()
4439{
4440	ipftq_t *ifq, *ifqnext;
4441	ipftqent_t *tqe, *tqn;
4442	int i;
4443	SPL_INT(s);
4444
4445	SPL_NET(s);
4446	WRITE_ENTER(&ipf_nat);
4447	for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4448		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4449			if (tqe->tqe_die > fr_ticks)
4450				break;
4451			tqn = tqe->tqe_next;
4452			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4453		}
4454	}
4455
4456	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4457		ifqnext = ifq->ifq_next;
4458
4459		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4460			if (tqe->tqe_die > fr_ticks)
4461				break;
4462			tqn = tqe->tqe_next;
4463			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4464		}
4465	}
4466
4467	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4468		ifqnext = ifq->ifq_next;
4469
4470		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4471		    (ifq->ifq_ref == 0)) {
4472			fr_freetimeoutqueue(ifq);
4473		}
4474	}
4475
4476	if (fr_nat_doflush != 0) {
4477		nat_extraflush(2);
4478		fr_nat_doflush = 0;
4479	}
4480
4481	RWLOCK_EXIT(&ipf_nat);
4482	SPL_X(s);
4483}
4484
4485
4486/* ------------------------------------------------------------------------ */
4487/* Function:    fr_natsync                                                  */
4488/* Returns:     Nil                                                         */
4489/* Parameters:  ifp(I) - pointer to network interface                       */
4490/*                                                                          */
4491/* Walk through all of the currently active NAT sessions, looking for those */
4492/* which need to have their translated address updated.                     */
4493/* ------------------------------------------------------------------------ */
4494void fr_natsync(ifp)
4495void *ifp;
4496{
4497	u_32_t sum1, sum2, sumd;
4498	struct in_addr in;
4499	ipnat_t *n;
4500	nat_t *nat;
4501	void *ifp2;
4502	SPL_INT(s);
4503
4504	if (fr_running <= 0)
4505		return;
4506
4507	/*
4508	 * Change IP addresses for NAT sessions for any protocol except TCP
4509	 * since it will break the TCP connection anyway.  The only rules
4510	 * which will get changed are those which are "map ... -> 0/32",
4511	 * where the rule specifies the address is taken from the interface.
4512	 */
4513	SPL_NET(s);
4514	WRITE_ENTER(&ipf_nat);
4515
4516	if (fr_running <= 0) {
4517		RWLOCK_EXIT(&ipf_nat);
4518		return;
4519	}
4520
4521	for (nat = nat_instances; nat; nat = nat->nat_next) {
4522		if ((nat->nat_flags & IPN_TCP) != 0)
4523			continue;
4524		n = nat->nat_ptr;
4525		if ((n == NULL) ||
4526		    (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4527			continue;
4528		if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4529		     (ifp == nat->nat_ifps[1]))) {
4530			nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4531			if (nat->nat_ifnames[1][0] != '\0') {
4532				nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4533							  4);
4534			} else
4535				nat->nat_ifps[1] = nat->nat_ifps[0];
4536			ifp2 = nat->nat_ifps[0];
4537			if (ifp2 == NULL)
4538				continue;
4539
4540			/*
4541			 * Change the map-to address to be the same as the
4542			 * new one.
4543			 */
4544			sum1 = nat->nat_outip.s_addr;
4545			if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4546				nat->nat_outip = in;
4547			sum2 = nat->nat_outip.s_addr;
4548
4549			if (sum1 == sum2)
4550				continue;
4551			/*
4552			 * Readjust the checksum adjustment to take into
4553			 * account the new IP#.
4554			 */
4555			CALC_SUMD(sum1, sum2, sumd);
4556			/* XXX - dont change for TCP when solaris does
4557			 * hardware checksumming.
4558			 */
4559			sumd += nat->nat_sumd[0];
4560			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4561			nat->nat_sumd[1] = nat->nat_sumd[0];
4562		}
4563	}
4564
4565	for (n = nat_list; (n != NULL); n = n->in_next) {
4566		if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4567			n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4568		if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4569			n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4570	}
4571	RWLOCK_EXIT(&ipf_nat);
4572	SPL_X(s);
4573}
4574
4575
4576/* ------------------------------------------------------------------------ */
4577/* Function:    nat_icmpquerytype4                                          */
4578/* Returns:     int - 1 == success, 0 == failure                            */
4579/* Parameters:  icmptype(I) - ICMP type number                              */
4580/*                                                                          */
4581/* Tests to see if the ICMP type number passed is a query/response type or  */
4582/* not.                                                                     */
4583/* ------------------------------------------------------------------------ */
4584static int nat_icmpquerytype4(icmptype)
4585int icmptype;
4586{
4587
4588	/*
4589	 * For the ICMP query NAT code, it is essential that both the query
4590	 * and the reply match on the NAT rule. Because the NAT structure
4591	 * does not keep track of the icmptype, and a single NAT structure
4592	 * is used for all icmp types with the same src, dest and id, we
4593	 * simply define the replies as queries as well. The funny thing is,
4594	 * altough it seems silly to call a reply a query, this is exactly
4595	 * as it is defined in the IPv4 specification
4596	 */
4597
4598	switch (icmptype)
4599	{
4600
4601	case ICMP_ECHOREPLY:
4602	case ICMP_ECHO:
4603	/* route aedvertisement/solliciation is currently unsupported: */
4604	/* it would require rewriting the ICMP data section            */
4605	case ICMP_TSTAMP:
4606	case ICMP_TSTAMPREPLY:
4607	case ICMP_IREQ:
4608	case ICMP_IREQREPLY:
4609	case ICMP_MASKREQ:
4610	case ICMP_MASKREPLY:
4611		return 1;
4612	default:
4613		return 0;
4614	}
4615}
4616
4617
4618/* ------------------------------------------------------------------------ */
4619/* Function:    nat_log                                                     */
4620/* Returns:     Nil                                                         */
4621/* Parameters:  nat(I)  - pointer to NAT structure                          */
4622/*              type(I) - type of log entry to create                       */
4623/*                                                                          */
4624/* Creates a NAT log entry.                                                 */
4625/* ------------------------------------------------------------------------ */
4626void nat_log(nat, type)
4627struct nat *nat;
4628u_int type;
4629{
4630#ifdef	IPFILTER_LOG
4631# ifndef LARGE_NAT
4632	struct ipnat *np;
4633	int rulen;
4634# endif
4635	struct natlog natl;
4636	void *items[1];
4637	size_t sizes[1];
4638	int types[1];
4639
4640	natl.nl_inip = nat->nat_inip;
4641	natl.nl_outip = nat->nat_outip;
4642	natl.nl_origip = nat->nat_oip;
4643	natl.nl_bytes[0] = nat->nat_bytes[0];
4644	natl.nl_bytes[1] = nat->nat_bytes[1];
4645	natl.nl_pkts[0] = nat->nat_pkts[0];
4646	natl.nl_pkts[1] = nat->nat_pkts[1];
4647	natl.nl_origport = nat->nat_oport;
4648	natl.nl_inport = nat->nat_inport;
4649	natl.nl_outport = nat->nat_outport;
4650	natl.nl_p = nat->nat_p;
4651	natl.nl_type = type;
4652	natl.nl_rule = -1;
4653# ifndef LARGE_NAT
4654	if (nat->nat_ptr != NULL) {
4655		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4656			if (np == nat->nat_ptr) {
4657				natl.nl_rule = rulen;
4658				break;
4659			}
4660	}
4661# endif
4662	items[0] = &natl;
4663	sizes[0] = sizeof(natl);
4664	types[0] = 0;
4665
4666	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4667#endif
4668}
4669
4670
4671#if defined(__OpenBSD__)
4672/* ------------------------------------------------------------------------ */
4673/* Function:    nat_ifdetach                                                */
4674/* Returns:     Nil                                                         */
4675/* Parameters:  ifp(I) - pointer to network interface                       */
4676/*                                                                          */
4677/* Compatibility interface for OpenBSD to trigger the correct updating of   */
4678/* interface references within IPFilter.                                    */
4679/* ------------------------------------------------------------------------ */
4680void nat_ifdetach(ifp)
4681void *ifp;
4682{
4683	frsync(ifp);
4684	return;
4685}
4686#endif
4687
4688
4689/* ------------------------------------------------------------------------ */
4690/* Function:    fr_ipnatderef                                               */
4691/* Returns:     Nil                                                         */
4692/* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4693/* Write Locks: ipf_nat                                                     */
4694/*                                                                          */
4695/* ------------------------------------------------------------------------ */
4696void fr_ipnatderef(inp)
4697ipnat_t **inp;
4698{
4699	ipnat_t *in;
4700
4701	in = *inp;
4702	*inp = NULL;
4703	in->in_space++;
4704	in->in_use--;
4705	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4706		if (in->in_apr)
4707			appr_free(in->in_apr);
4708		MUTEX_DESTROY(&in->in_lock);
4709		KFREE(in);
4710		nat_stats.ns_rules--;
4711#if SOLARIS && !defined(_INET_IP_STACK_H)
4712		if (nat_stats.ns_rules == 0)
4713			pfil_delayed_copy = 1;
4714#endif
4715	}
4716}
4717
4718
4719/* ------------------------------------------------------------------------ */
4720/* Function:    fr_natderef                                                 */
4721/* Returns:     Nil                                                         */
4722/* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4723/*                                                                          */
4724/* Decrement the reference counter for this NAT table entry and free it if  */
4725/* there are no more things using it.                                       */
4726/*                                                                          */
4727/* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4728/* structure *because* it only gets called on paths _after_ nat_ref has been*/
4729/* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4730/* because nat_delete() will do that and send nat_ref to -1.                */
4731/*                                                                          */
4732/* Holding the lock on nat_lock is required to serialise nat_delete() being */
4733/* called from a NAT flush ioctl with a deref happening because of a packet.*/
4734/* ------------------------------------------------------------------------ */
4735void fr_natderef(natp)
4736nat_t **natp;
4737{
4738	nat_t *nat;
4739
4740	nat = *natp;
4741	*natp = NULL;
4742
4743	MUTEX_ENTER(&nat->nat_lock);
4744	if (nat->nat_ref > 1) {
4745		nat->nat_ref--;
4746		MUTEX_EXIT(&nat->nat_lock);
4747		return;
4748	}
4749	MUTEX_EXIT(&nat->nat_lock);
4750
4751	WRITE_ENTER(&ipf_nat);
4752	nat_delete(nat, NL_EXPIRE);
4753	RWLOCK_EXIT(&ipf_nat);
4754}
4755
4756
4757/* ------------------------------------------------------------------------ */
4758/* Function:    fr_natclone                                                 */
4759/* Returns:     ipstate_t* - NULL == cloning failed,                        */
4760/*                           else pointer to new state structure            */
4761/* Parameters:  fin(I) - pointer to packet information                      */
4762/*              is(I)  - pointer to master state structure                  */
4763/* Write Lock:  ipf_nat                                                     */
4764/*                                                                          */
4765/* Create a "duplcate" state table entry from the master.                   */
4766/* ------------------------------------------------------------------------ */
4767static nat_t *fr_natclone(fin, nat)
4768fr_info_t *fin;
4769nat_t *nat;
4770{
4771	frentry_t *fr;
4772	nat_t *clone;
4773	ipnat_t *np;
4774
4775	KMALLOC(clone, nat_t *);
4776	if (clone == NULL)
4777		return NULL;
4778	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4779
4780	MUTEX_NUKE(&clone->nat_lock);
4781
4782	clone->nat_aps = NULL;
4783	/*
4784	 * Initialize all these so that nat_delete() doesn't cause a crash.
4785	 */
4786	clone->nat_tqe.tqe_pnext = NULL;
4787	clone->nat_tqe.tqe_next = NULL;
4788	clone->nat_tqe.tqe_ifq = NULL;
4789	clone->nat_tqe.tqe_parent = clone;
4790
4791	clone->nat_flags &= ~SI_CLONE;
4792	clone->nat_flags |= SI_CLONED;
4793
4794	if (clone->nat_hm)
4795		clone->nat_hm->hm_ref++;
4796
4797	if (nat_insert(clone, fin->fin_rev) == -1) {
4798		KFREE(clone);
4799		return NULL;
4800	}
4801	np = clone->nat_ptr;
4802	if (np != NULL) {
4803		if (nat_logging)
4804			nat_log(clone, (u_int)np->in_redir);
4805		np->in_use++;
4806	}
4807	fr = clone->nat_fr;
4808	if (fr != NULL) {
4809		MUTEX_ENTER(&fr->fr_lock);
4810		fr->fr_ref++;
4811		MUTEX_EXIT(&fr->fr_lock);
4812	}
4813
4814	/*
4815	 * Because the clone is created outside the normal loop of things and
4816	 * TCP has special needs in terms of state, initialise the timeout
4817	 * state of the new NAT from here.
4818	 */
4819	if (clone->nat_p == IPPROTO_TCP) {
4820		(void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4821				  clone->nat_flags);
4822	}
4823#ifdef	IPFILTER_SYNC
4824	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4825#endif
4826	if (nat_logging)
4827		nat_log(clone, NL_CLONE);
4828	return clone;
4829}
4830
4831
4832/* ------------------------------------------------------------------------ */
4833/* Function:   nat_wildok                                                   */
4834/* Returns:    int - 1 == packet's ports match wildcards                    */
4835/*                   0 == packet's ports don't match wildcards              */
4836/* Parameters: nat(I)   - NAT entry                                         */
4837/*             sport(I) - source port                                       */
4838/*             dport(I) - destination port                                  */
4839/*             flags(I) - wildcard flags                                    */
4840/*             dir(I)   - packet direction                                  */
4841/*                                                                          */
4842/* Use NAT entry and packet direction to determine which combination of     */
4843/* wildcard flags should be used.                                           */
4844/* ------------------------------------------------------------------------ */
4845static int nat_wildok(nat, sport, dport, flags, dir)
4846nat_t *nat;
4847int sport;
4848int dport;
4849int flags;
4850int dir;
4851{
4852	/*
4853	 * When called by       dir is set to
4854	 * nat_inlookup         NAT_INBOUND (0)
4855	 * nat_outlookup        NAT_OUTBOUND (1)
4856	 *
4857	 * We simply combine the packet's direction in dir with the original
4858	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4859	 * which combination of wildcard flags to allow.
4860	 */
4861
4862	switch ((dir << 1) | nat->nat_dir)
4863	{
4864	case 3: /* outbound packet / outbound entry */
4865		if (((nat->nat_inport == sport) ||
4866		    (flags & SI_W_SPORT)) &&
4867		    ((nat->nat_oport == dport) ||
4868		    (flags & SI_W_DPORT)))
4869			return 1;
4870		break;
4871	case 2: /* outbound packet / inbound entry */
4872		if (((nat->nat_outport == sport) ||
4873		    (flags & SI_W_DPORT)) &&
4874		    ((nat->nat_oport == dport) ||
4875		    (flags & SI_W_SPORT)))
4876			return 1;
4877		break;
4878	case 1: /* inbound packet / outbound entry */
4879		if (((nat->nat_oport == sport) ||
4880		    (flags & SI_W_DPORT)) &&
4881		    ((nat->nat_outport == dport) ||
4882		    (flags & SI_W_SPORT)))
4883			return 1;
4884		break;
4885	case 0: /* inbound packet / inbound entry */
4886		if (((nat->nat_oport == sport) ||
4887		    (flags & SI_W_SPORT)) &&
4888		    ((nat->nat_outport == dport) ||
4889		    (flags & SI_W_DPORT)))
4890			return 1;
4891		break;
4892	default:
4893		break;
4894	}
4895
4896	return(0);
4897}
4898
4899
4900/* ------------------------------------------------------------------------ */
4901/* Function:    nat_mssclamp                                                */
4902/* Returns:     Nil                                                         */
4903/* Parameters:  tcp(I)    - pointer to TCP header                           */
4904/*              maxmss(I) - value to clamp the TCP MSS to                   */
4905/*              fin(I)    - pointer to packet information                   */
4906/*              csump(I)  - pointer to TCP checksum                         */
4907/*                                                                          */
4908/* Check for MSS option and clamp it if necessary.  If found and changed,   */
4909/* then the TCP header checksum will be updated to reflect the change in    */
4910/* the MSS.                                                                 */
4911/* ------------------------------------------------------------------------ */
4912static void nat_mssclamp(tcp, maxmss, fin, csump)
4913tcphdr_t *tcp;
4914u_32_t maxmss;
4915fr_info_t *fin;
4916u_short *csump;
4917{
4918	u_char *cp, *ep, opt;
4919	int hlen, advance;
4920	u_32_t mss, sumd;
4921
4922	hlen = TCP_OFF(tcp) << 2;
4923	if (hlen > sizeof(*tcp)) {
4924		cp = (u_char *)tcp + sizeof(*tcp);
4925		ep = (u_char *)tcp + hlen;
4926
4927		while (cp < ep) {
4928			opt = cp[0];
4929			if (opt == TCPOPT_EOL)
4930				break;
4931			else if (opt == TCPOPT_NOP) {
4932				cp++;
4933				continue;
4934			}
4935
4936			if (cp + 1 >= ep)
4937				break;
4938			advance = cp[1];
4939			if ((cp + advance > ep) || (advance <= 0))
4940				break;
4941			switch (opt)
4942			{
4943			case TCPOPT_MAXSEG:
4944				if (advance != 4)
4945					break;
4946				mss = cp[2] * 256 + cp[3];
4947				if (mss > maxmss) {
4948					cp[2] = maxmss / 256;
4949					cp[3] = maxmss & 0xff;
4950					CALC_SUMD(mss, maxmss, sumd);
4951					fix_outcksum(fin, csump, sumd);
4952				}
4953				break;
4954			default:
4955				/* ignore unknown options */
4956				break;
4957			}
4958
4959			cp += advance;
4960		}
4961	}
4962}
4963
4964
4965/* ------------------------------------------------------------------------ */
4966/* Function:    fr_setnatqueue                                              */
4967/* Returns:     Nil                                                         */
4968/* Parameters:  nat(I)- pointer to NAT structure                            */
4969/*              rev(I) - forward(0) or reverse(1) direction                 */
4970/* Locks:       ipf_nat (read or write)                                     */
4971/*                                                                          */
4972/* Put the NAT entry on its default queue entry, using rev as a helped in   */
4973/* determining which queue it should be placed on.                          */
4974/* ------------------------------------------------------------------------ */
4975void fr_setnatqueue(nat, rev)
4976nat_t *nat;
4977int rev;
4978{
4979	ipftq_t *oifq, *nifq;
4980
4981	if (nat->nat_ptr != NULL)
4982		nifq = nat->nat_ptr->in_tqehead[rev];
4983	else
4984		nifq = NULL;
4985
4986	if (nifq == NULL) {
4987		switch (nat->nat_p)
4988		{
4989		case IPPROTO_UDP :
4990			nifq = &nat_udptq;
4991			break;
4992		case IPPROTO_ICMP :
4993			nifq = &nat_icmptq;
4994			break;
4995		case IPPROTO_TCP :
4996			nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
4997			break;
4998		default :
4999			nifq = &nat_iptq;
5000			break;
5001		}
5002	}
5003
5004	oifq = nat->nat_tqe.tqe_ifq;
5005	/*
5006	 * If it's currently on a timeout queue, move it from one queue to
5007	 * another, else put it on the end of the newly determined queue.
5008	 */
5009	if (oifq != NULL)
5010		fr_movequeue(&nat->nat_tqe, oifq, nifq);
5011	else
5012		fr_queueappend(&nat->nat_tqe, nifq, nat);
5013	return;
5014}
5015
5016
5017/* ------------------------------------------------------------------------ */
5018/* Function:    nat_getnext                                                 */
5019/* Returns:     int - 0 == ok, else error                                   */
5020/* Parameters:  t(I)   - pointer to ipftoken structure                      */
5021/*              itp(I) - pointer to ipfgeniter_t structure                  */
5022/*                                                                          */
5023/* Fetch the next nat/ipnat structure pointer from the linked list and      */
5024/* copy it out to the storage space pointed to by itp_data.  The next item  */
5025/* in the list to look at is put back in the ipftoken struture.             */
5026/* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5027/* ipf_freetoken will call a deref function for us and we dont want to call */
5028/* that twice (second time would be in the second switch statement below.   */
5029/* ------------------------------------------------------------------------ */
5030static int nat_getnext(t, itp)
5031ipftoken_t *t;
5032ipfgeniter_t *itp;
5033{
5034	hostmap_t *hm, *nexthm = NULL, zerohm;
5035	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5036	nat_t *nat, *nextnat = NULL, zeronat;
5037	int error = 0, count;
5038	char *dst;
5039
5040	count = itp->igi_nitems;
5041	if (count < 1)
5042		return ENOSPC;
5043
5044	READ_ENTER(&ipf_nat);
5045
5046	switch (itp->igi_type)
5047	{
5048	case IPFGENITER_HOSTMAP :
5049		hm = t->ipt_data;
5050		if (hm == NULL) {
5051			nexthm = ipf_hm_maplist;
5052		} else {
5053			nexthm = hm->hm_next;
5054		}
5055		break;
5056
5057	case IPFGENITER_IPNAT :
5058		ipn = t->ipt_data;
5059		if (ipn == NULL) {
5060			nextipnat = nat_list;
5061		} else {
5062			nextipnat = ipn->in_next;
5063		}
5064		break;
5065
5066	case IPFGENITER_NAT :
5067		nat = t->ipt_data;
5068		if (nat == NULL) {
5069			nextnat = nat_instances;
5070		} else {
5071			nextnat = nat->nat_next;
5072		}
5073		break;
5074	default :
5075		RWLOCK_EXIT(&ipf_nat);
5076		return EINVAL;
5077	}
5078
5079	dst = itp->igi_data;
5080	for (;;) {
5081		switch (itp->igi_type)
5082		{
5083		case IPFGENITER_HOSTMAP :
5084			if (nexthm != NULL) {
5085				if (count == 1) {
5086					ATOMIC_INC32(nexthm->hm_ref);
5087					t->ipt_data = nexthm;
5088				}
5089			} else {
5090				bzero(&zerohm, sizeof(zerohm));
5091				nexthm = &zerohm;
5092				count = 1;
5093				t->ipt_data = NULL;
5094			}
5095			break;
5096
5097		case IPFGENITER_IPNAT :
5098			if (nextipnat != NULL) {
5099				if (count == 1) {
5100					MUTEX_ENTER(&nextipnat->in_lock);
5101					nextipnat->in_use++;
5102					MUTEX_EXIT(&nextipnat->in_lock);
5103					t->ipt_data = nextipnat;
5104				}
5105			} else {
5106				bzero(&zeroipn, sizeof(zeroipn));
5107				nextipnat = &zeroipn;
5108				count = 1;
5109				t->ipt_data = NULL;
5110			}
5111			break;
5112
5113		case IPFGENITER_NAT :
5114			if (nextnat != NULL) {
5115				if (count == 1) {
5116					MUTEX_ENTER(&nextnat->nat_lock);
5117					nextnat->nat_ref++;
5118					MUTEX_EXIT(&nextnat->nat_lock);
5119					t->ipt_data = nextnat;
5120				}
5121			} else {
5122				bzero(&zeronat, sizeof(zeronat));
5123				nextnat = &zeronat;
5124				count = 1;
5125				t->ipt_data = NULL;
5126			}
5127			break;
5128		default :
5129			break;
5130		}
5131		RWLOCK_EXIT(&ipf_nat);
5132
5133		/*
5134		 * Copying out to user space needs to be done without the lock.
5135		 */
5136		switch (itp->igi_type)
5137		{
5138		case IPFGENITER_HOSTMAP :
5139			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5140			if (error != 0)
5141				error = EFAULT;
5142			else
5143				dst += sizeof(*nexthm);
5144			break;
5145
5146		case IPFGENITER_IPNAT :
5147			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5148			if (error != 0)
5149				error = EFAULT;
5150			else
5151				dst += sizeof(*nextipnat);
5152			break;
5153
5154		case IPFGENITER_NAT :
5155			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5156			if (error != 0)
5157				error = EFAULT;
5158			else
5159				dst += sizeof(*nextnat);
5160			break;
5161		}
5162
5163		if ((count == 1) || (error != 0))
5164			break;
5165
5166		count--;
5167
5168		READ_ENTER(&ipf_nat);
5169
5170		/*
5171		 * We need to have the lock again here to make sure that
5172		 * using _next is consistent.
5173		 */
5174		switch (itp->igi_type)
5175		{
5176		case IPFGENITER_HOSTMAP :
5177			nexthm = nexthm->hm_next;
5178			break;
5179		case IPFGENITER_IPNAT :
5180			nextipnat = nextipnat->in_next;
5181			break;
5182		case IPFGENITER_NAT :
5183			nextnat = nextnat->nat_next;
5184			break;
5185		}
5186	}
5187
5188
5189	switch (itp->igi_type)
5190	{
5191	case IPFGENITER_HOSTMAP :
5192		if (hm != NULL) {
5193			WRITE_ENTER(&ipf_nat);
5194			fr_hostmapdel(&hm);
5195			RWLOCK_EXIT(&ipf_nat);
5196		}
5197		break;
5198	case IPFGENITER_IPNAT :
5199		if (ipn != NULL) {
5200			fr_ipnatderef(&ipn);
5201		}
5202		break;
5203	case IPFGENITER_NAT :
5204		if (nat != NULL) {
5205			fr_natderef(&nat);
5206		}
5207		break;
5208	default :
5209		break;
5210	}
5211
5212	return error;
5213}
5214
5215
5216/* ------------------------------------------------------------------------ */
5217/* Function:    nat_iterator                                                */
5218/* Returns:     int - 0 == ok, else error                                   */
5219/* Parameters:  token(I) - pointer to ipftoken structure                    */
5220/*              itp(I) - pointer to ipfgeniter_t structure                  */
5221/*                                                                          */
5222/* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5223/* generic structure to iterate through a list.  There are three different  */
5224/* linked lists of NAT related information to go through: NAT rules, active */
5225/* NAT mappings and the NAT fragment cache.                                 */
5226/* ------------------------------------------------------------------------ */
5227static int nat_iterator(token, itp)
5228ipftoken_t *token;
5229ipfgeniter_t *itp;
5230{
5231	int error;
5232
5233	if (itp->igi_data == NULL)
5234		return EFAULT;
5235
5236	token->ipt_subtype = itp->igi_type;
5237
5238	switch (itp->igi_type)
5239	{
5240	case IPFGENITER_HOSTMAP :
5241	case IPFGENITER_IPNAT :
5242	case IPFGENITER_NAT :
5243		error = nat_getnext(token, itp);
5244		break;
5245
5246	case IPFGENITER_NATFRAG :
5247#ifdef USE_MUTEXES
5248		error = fr_nextfrag(token, itp, &ipfr_natlist,
5249				    &ipfr_nattail, &ipf_natfrag);
5250#else
5251		error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5252#endif
5253		break;
5254	default :
5255		error = EINVAL;
5256		break;
5257	}
5258
5259	return error;
5260}
5261
5262
5263/* ------------------------------------------------------------------------ */
5264/* Function:    nat_extraflush                                              */
5265/* Returns:     int - 0 == success, -1 == failure                           */
5266/* Parameters:  which(I) - how to flush the active NAT table                */
5267/* Write Locks: ipf_nat                                                     */
5268/*                                                                          */
5269/* Flush nat tables.  Three actions currently defined:                      */
5270/* which == 0 : flush all nat table entries                                 */
5271/* which == 1 : flush TCP connections which have started to close but are   */
5272/*	      stuck for some reason.                                        */
5273/* which == 2 : flush TCP connections which have been idle for a long time, */
5274/*	      starting at > 4 days idle and working back in successive half-*/
5275/*	      days to at most 12 hours old.  If this fails to free enough   */
5276/*            slots then work backwards in half hour slots to 30 minutes.   */
5277/*            If that too fails, then work backwards in 30 second intervals */
5278/*            for the last 30 minutes to at worst 30 seconds idle.          */
5279/* ------------------------------------------------------------------------ */
5280static int nat_extraflush(which)
5281int which;
5282{
5283	ipftq_t *ifq, *ifqnext;
5284	nat_t *nat, **natp;
5285	ipftqent_t *tqn;
5286	int removed;
5287	SPL_INT(s);
5288
5289	removed = 0;
5290
5291	SPL_NET(s);
5292
5293	switch (which)
5294	{
5295	case 0 :
5296		/*
5297		 * Style 0 flush removes everything...
5298		 */
5299		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5300			nat_delete(nat, NL_FLUSH);
5301			removed++;
5302		}
5303		break;
5304
5305	case 1 :
5306		/*
5307		 * Since we're only interested in things that are closing,
5308		 * we can start with the appropriate timeout queue.
5309		 */
5310		for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5311		     ifq = ifq->ifq_next) {
5312
5313			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5314				nat = tqn->tqe_parent;
5315				tqn = tqn->tqe_next;
5316				if (nat->nat_p != IPPROTO_TCP)
5317					break;
5318				nat_delete(nat, NL_EXPIRE);
5319				removed++;
5320			}
5321		}
5322
5323		/*
5324		 * Also need to look through the user defined queues.
5325		 */
5326		for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5327			ifqnext = ifq->ifq_next;
5328			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5329				nat = tqn->tqe_parent;
5330				tqn = tqn->tqe_next;
5331				if (nat->nat_p != IPPROTO_TCP)
5332					continue;
5333
5334				if ((nat->nat_tcpstate[0] >
5335				     IPF_TCPS_ESTABLISHED) &&
5336				    (nat->nat_tcpstate[1] >
5337				     IPF_TCPS_ESTABLISHED)) {
5338					nat_delete(nat, NL_EXPIRE);
5339					removed++;
5340				}
5341			}
5342		}
5343		break;
5344
5345		/*
5346		 * Args 5-11 correspond to flushing those particular states
5347		 * for TCP connections.
5348		 */
5349	case IPF_TCPS_CLOSE_WAIT :
5350	case IPF_TCPS_FIN_WAIT_1 :
5351	case IPF_TCPS_CLOSING :
5352	case IPF_TCPS_LAST_ACK :
5353	case IPF_TCPS_FIN_WAIT_2 :
5354	case IPF_TCPS_TIME_WAIT :
5355	case IPF_TCPS_CLOSED :
5356		tqn = nat_tqb[which].ifq_head;
5357		while (tqn != NULL) {
5358			nat = tqn->tqe_parent;
5359			tqn = tqn->tqe_next;
5360			nat_delete(nat, NL_FLUSH);
5361			removed++;
5362		}
5363		break;
5364
5365	default :
5366		if (which < 30)
5367			break;
5368
5369		/*
5370		 * Take a large arbitrary number to mean the number of seconds
5371		 * for which which consider to be the maximum value we'll allow
5372		 * the expiration to be.
5373		 */
5374		which = IPF_TTLVAL(which);
5375		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5376			if (fr_ticks - nat->nat_touched > which) {
5377				nat_delete(nat, NL_FLUSH);
5378				removed++;
5379			} else
5380				natp = &nat->nat_next;
5381		}
5382		break;
5383	}
5384
5385	if (which != 2) {
5386		SPL_X(s);
5387		return removed;
5388	}
5389
5390	/*
5391	 * Asked to remove inactive entries because the table is full.
5392	 */
5393	if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5394		nat_last_force_flush = fr_ticks;
5395		removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5396	}
5397
5398	SPL_X(s);
5399	return removed;
5400}
5401
5402
5403/* ------------------------------------------------------------------------ */
5404/* Function:    nat_flush_entry                                             */
5405/* Returns:     0 - always succeeds                                         */
5406/* Parameters:  entry(I) - pointer to NAT entry                             */
5407/* Write Locks: ipf_nat                                                     */
5408/*                                                                          */
5409/* This function is a stepping stone between ipf_queueflush() and           */
5410/* nat_dlete().  It is used so we can provide a uniform interface via the   */
5411/* ipf_queueflush() function.  Since the nat_delete() function returns void */
5412/* we translate that to mean it always succeeds in deleting something.      */
5413/* ------------------------------------------------------------------------ */
5414static int nat_flush_entry(entry)
5415void *entry;
5416{
5417	nat_delete(entry, NL_FLUSH);
5418	return 0;
5419}
5420
5421
5422/* ------------------------------------------------------------------------ */
5423/* Function:    nat_gettable                                                */
5424/* Returns:     int     - 0 = success, else error                           */
5425/* Parameters:  data(I) - pointer to ioctl data                             */
5426/*                                                                          */
5427/* This function handles ioctl requests for tables of nat information.      */
5428/* At present the only table it deals with is the hash bucket statistics.   */
5429/* ------------------------------------------------------------------------ */
5430static int nat_gettable(data)
5431char *data;
5432{
5433	ipftable_t table;
5434	int error;
5435
5436	error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5437	if (error != 0)
5438		return error;
5439
5440	switch (table.ita_type)
5441	{
5442	case IPFTABLE_BUCKETS_NATIN :
5443		error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5444				ipf_nattable_sz * sizeof(u_long));
5445		break;
5446
5447	case IPFTABLE_BUCKETS_NATOUT :
5448		error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5449				ipf_nattable_sz * sizeof(u_long));
5450		break;
5451
5452	default :
5453		return EINVAL;
5454	}
5455
5456	if (error != 0) {
5457		error = EFAULT;
5458	}
5459	return error;
5460}
5461