ip_nat.c revision 180778
1/*	$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 180778 2008-07-24 12:35:05Z darrenr $	*/
2
3/*
4 * Copyright (C) 1995-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#if defined(KERNEL) || defined(_KERNEL)
9# undef KERNEL
10# undef _KERNEL
11# define        KERNEL	1
12# define        _KERNEL	1
13#endif
14#include <sys/errno.h>
15#include <sys/types.h>
16#include <sys/param.h>
17#include <sys/time.h>
18#include <sys/file.h>
19#if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20    (__NetBSD_Version__ >= 399002000)
21# include <sys/kauth.h>
22#endif
23#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24    defined(_KERNEL)
25#if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26#  include "opt_ipfilter_log.h"
27# else
28#  include "opt_ipfilter.h"
29# endif
30#endif
31#if !defined(_KERNEL)
32# include <stdio.h>
33# include <string.h>
34# include <stdlib.h>
35# define _KERNEL
36# ifdef __OpenBSD__
37struct file;
38# endif
39# include <sys/uio.h>
40# undef _KERNEL
41#endif
42#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43# include <sys/filio.h>
44# include <sys/fcntl.h>
45#else
46# include <sys/ioctl.h>
47#endif
48#if !defined(AIX)
49# include <sys/fcntl.h>
50#endif
51#if !defined(linux)
52# include <sys/protosw.h>
53#endif
54#include <sys/socket.h>
55#if defined(_KERNEL)
56# include <sys/systm.h>
57# if !defined(__SVR4) && !defined(__svr4__)
58#  include <sys/mbuf.h>
59# endif
60#endif
61#if defined(__SVR4) || defined(__svr4__)
62# include <sys/filio.h>
63# include <sys/byteorder.h>
64# ifdef _KERNEL
65#  include <sys/dditypes.h>
66# endif
67# include <sys/stream.h>
68# include <sys/kmem.h>
69#endif
70#if __FreeBSD_version >= 300000
71# include <sys/queue.h>
72#endif
73#include <net/if.h>
74#if __FreeBSD_version >= 300000
75# include <net/if_var.h>
76# if defined(_KERNEL) && !defined(IPFILTER_LKM)
77#  include "opt_ipfilter.h"
78# endif
79#endif
80#ifdef sun
81# include <net/af.h>
82#endif
83#include <net/route.h>
84#include <netinet/in.h>
85#include <netinet/in_systm.h>
86#include <netinet/ip.h>
87
88#ifdef RFC1825
89# include <vpn/md5.h>
90# include <vpn/ipsec.h>
91extern struct ifnet vpnif;
92#endif
93
94#if !defined(linux)
95# include <netinet/ip_var.h>
96#endif
97#include <netinet/tcp.h>
98#include <netinet/udp.h>
99#include <netinet/ip_icmp.h>
100#include "netinet/ip_compat.h"
101#include <netinet/tcpip.h>
102#include "netinet/ip_fil.h"
103#include "netinet/ip_nat.h"
104#include "netinet/ip_frag.h"
105#include "netinet/ip_state.h"
106#include "netinet/ip_proxy.h"
107#ifdef	IPFILTER_SYNC
108#include "netinet/ip_sync.h"
109#endif
110#if (__FreeBSD_version >= 300000)
111# include <sys/malloc.h>
112#endif
113/* END OF INCLUDES */
114
115#undef	SOCKADDR_IN
116#define	SOCKADDR_IN	struct sockaddr_in
117
118#if !defined(lint)
119static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
120static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 180778 2008-07-24 12:35:05Z darrenr $";
121/* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122#endif
123
124
125/* ======================================================================== */
126/* How the NAT is organised and works.                                      */
127/*                                                                          */
128/* Inside (interface y) NAT       Outside (interface x)                     */
129/* -------------------- -+- -------------------------------------           */
130/* Packet going          |   out, processsed by fr_checknatout() for x      */
131/* ------------>         |   ------------>                                  */
132/* src=10.1.1.1          |   src=192.1.1.1                                  */
133/*                       |                                                  */
134/*                       |   in, processed by fr_checknatin() for x         */
135/* <------------         |   <------------                                  */
136/* dst=10.1.1.1          |   dst=192.1.1.1                                  */
137/* -------------------- -+- -------------------------------------           */
138/* fr_checknatout() - changes ip_src and if required, sport                 */
139/*             - creates a new mapping, if required.                        */
140/* fr_checknatin()  - changes ip_dst and if required, dport                 */
141/*                                                                          */
142/* In the NAT table, internal source is recorded as "in" and externally     */
143/* seen as "out".                                                           */
144/* ======================================================================== */
145
146
147nat_t	**nat_table[2] = { NULL, NULL },
148	*nat_instances = NULL;
149ipnat_t	*nat_list = NULL;
150u_int	ipf_nattable_max = NAT_TABLE_MAX;
151u_int	ipf_nattable_sz = NAT_TABLE_SZ;
152u_int	ipf_natrules_sz = NAT_SIZE;
153u_int	ipf_rdrrules_sz = RDR_SIZE;
154u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
155u_int	fr_nat_maxbucket = 0,
156	fr_nat_maxbucket_reset = 1;
157u_32_t	nat_masks = 0;
158u_32_t	rdr_masks = 0;
159u_long	nat_last_force_flush = 0;
160ipnat_t	**nat_rules = NULL;
161ipnat_t	**rdr_rules = NULL;
162hostmap_t	**ipf_hm_maptable  = NULL;
163hostmap_t	*ipf_hm_maplist  = NULL;
164ipftq_t	nat_tqb[IPF_TCP_NSTATES];
165ipftq_t	nat_udptq;
166ipftq_t	nat_icmptq;
167ipftq_t	nat_iptq;
168ipftq_t	*nat_utqe = NULL;
169int	fr_nat_doflush = 0;
170#ifdef  IPFILTER_LOG
171int	nat_logging = 1;
172#else
173int	nat_logging = 0;
174#endif
175
176u_long	fr_defnatage = DEF_NAT_AGE,
177	fr_defnatipage = 120,		/* 60 seconds */
178	fr_defnaticmpage = 6;		/* 3 seconds */
179natstat_t nat_stats;
180int	fr_nat_lock = 0;
181int	fr_nat_init = 0;
182#if SOLARIS && !defined(_INET_IP_STACK_H)
183extern	int		pfil_delayed_copy;
184#endif
185
186static	int	nat_flush_entry __P((void *));
187static	int	nat_flushtable __P((void));
188static	int	nat_clearlist __P((void));
189static	void	nat_addnat __P((struct ipnat *));
190static	void	nat_addrdr __P((struct ipnat *));
191static	void	nat_delrdr __P((struct ipnat *));
192static	void	nat_delnat __P((struct ipnat *));
193static	int	fr_natgetent __P((caddr_t, int));
194static	int	fr_natgetsz __P((caddr_t, int));
195static	int	fr_natputent __P((caddr_t, int));
196static	int	nat_extraflush __P((int));
197static	int	nat_gettable __P((char *));
198static	void	nat_tabmove __P((nat_t *));
199static	int	nat_match __P((fr_info_t *, ipnat_t *));
200static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203				    struct in_addr, struct in_addr, u_32_t));
204static	int	nat_icmpquerytype4 __P((int));
205static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207static	int	nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208				      tcphdr_t *, nat_t **, int));
209static	int	nat_resolverule __P((ipnat_t *));
210static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
211static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212static	int	nat_wildok __P((nat_t *, int, int, int, int));
213static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215
216
217/* ------------------------------------------------------------------------ */
218/* Function:    fr_natinit                                                  */
219/* Returns:     int - 0 == success, -1 == failure                           */
220/* Parameters:  Nil                                                         */
221/*                                                                          */
222/* Initialise all of the NAT locks, tables and other structures.            */
223/* ------------------------------------------------------------------------ */
224int fr_natinit()
225{
226	int i;
227
228	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229	if (nat_table[0] != NULL)
230		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231	else
232		return -1;
233
234	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235	if (nat_table[1] != NULL)
236		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237	else
238		return -2;
239
240	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241	if (nat_rules != NULL)
242		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243	else
244		return -3;
245
246	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247	if (rdr_rules != NULL)
248		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249	else
250		return -4;
251
252	KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253		 sizeof(hostmap_t *) * ipf_hostmap_sz);
254	if (ipf_hm_maptable != NULL)
255		bzero((char *)ipf_hm_maptable,
256		      sizeof(hostmap_t *) * ipf_hostmap_sz);
257	else
258		return -5;
259	ipf_hm_maplist = NULL;
260
261	KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262		 ipf_nattable_sz * sizeof(u_long));
263	if (nat_stats.ns_bucketlen[0] == NULL)
264		return -6;
265	bzero((char *)nat_stats.ns_bucketlen[0],
266	      ipf_nattable_sz * sizeof(u_long));
267
268	KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269		 ipf_nattable_sz * sizeof(u_long));
270	if (nat_stats.ns_bucketlen[1] == NULL)
271		return -7;
272
273	bzero((char *)nat_stats.ns_bucketlen[1],
274	      ipf_nattable_sz * sizeof(u_long));
275
276	if (fr_nat_maxbucket == 0) {
277		for (i = ipf_nattable_sz; i > 0; i >>= 1)
278			fr_nat_maxbucket++;
279		fr_nat_maxbucket *= 2;
280	}
281
282	fr_sttab_init(nat_tqb);
283	/*
284	 * Increase this because we may have "keep state" following this too
285	 * and packet storms can occur if this is removed too quickly.
286	 */
287	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288	nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289	nat_udptq.ifq_ttl = fr_defnatage;
290	nat_udptq.ifq_ref = 1;
291	nat_udptq.ifq_head = NULL;
292	nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293	MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294	nat_udptq.ifq_next = &nat_icmptq;
295	nat_icmptq.ifq_ttl = fr_defnaticmpage;
296	nat_icmptq.ifq_ref = 1;
297	nat_icmptq.ifq_head = NULL;
298	nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299	MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300	nat_icmptq.ifq_next = &nat_iptq;
301	nat_iptq.ifq_ttl = fr_defnatipage;
302	nat_iptq.ifq_ref = 1;
303	nat_iptq.ifq_head = NULL;
304	nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305	MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306	nat_iptq.ifq_next = NULL;
307
308	for (i = 0; i < IPF_TCP_NSTATES; i++) {
309		if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310			nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311#ifdef LARGE_NAT
312		else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313			nat_tqb[i].ifq_ttl = fr_defnatage;
314#endif
315	}
316
317	/*
318	 * Increase this because we may have "keep state" following
319	 * this too and packet storms can occur if this is removed
320	 * too quickly.
321	 */
322	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323
324	RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325	RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326	MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327	MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328
329	fr_nat_init = 1;
330
331	return 0;
332}
333
334
335/* ------------------------------------------------------------------------ */
336/* Function:    nat_addrdr                                                  */
337/* Returns:     Nil                                                         */
338/* Parameters:  n(I) - pointer to NAT rule to add                           */
339/*                                                                          */
340/* Adds a redirect rule to the hash table of redirect rules and the list of */
341/* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
342/* use by redirect rules.                                                   */
343/* ------------------------------------------------------------------------ */
344static void nat_addrdr(n)
345ipnat_t *n;
346{
347	ipnat_t **np;
348	u_32_t j;
349	u_int hv;
350	int k;
351
352	k = count4bits(n->in_outmsk);
353	if ((k >= 0) && (k != 32))
354		rdr_masks |= 1 << k;
355	j = (n->in_outip & n->in_outmsk);
356	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357	np = rdr_rules + hv;
358	while (*np != NULL)
359		np = &(*np)->in_rnext;
360	n->in_rnext = NULL;
361	n->in_prnext = np;
362	n->in_hv = hv;
363	*np = n;
364}
365
366
367/* ------------------------------------------------------------------------ */
368/* Function:    nat_addnat                                                  */
369/* Returns:     Nil                                                         */
370/* Parameters:  n(I) - pointer to NAT rule to add                           */
371/*                                                                          */
372/* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
373/* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
374/* redirect rules.                                                          */
375/* ------------------------------------------------------------------------ */
376static void nat_addnat(n)
377ipnat_t *n;
378{
379	ipnat_t **np;
380	u_32_t j;
381	u_int hv;
382	int k;
383
384	k = count4bits(n->in_inmsk);
385	if ((k >= 0) && (k != 32))
386		nat_masks |= 1 << k;
387	j = (n->in_inip & n->in_inmsk);
388	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389	np = nat_rules + hv;
390	while (*np != NULL)
391		np = &(*np)->in_mnext;
392	n->in_mnext = NULL;
393	n->in_pmnext = np;
394	n->in_hv = hv;
395	*np = n;
396}
397
398
399/* ------------------------------------------------------------------------ */
400/* Function:    nat_delrdr                                                  */
401/* Returns:     Nil                                                         */
402/* Parameters:  n(I) - pointer to NAT rule to delete                        */
403/*                                                                          */
404/* Removes a redirect rule from the hash table of redirect rules.           */
405/* ------------------------------------------------------------------------ */
406static void nat_delrdr(n)
407ipnat_t *n;
408{
409	if (n->in_rnext)
410		n->in_rnext->in_prnext = n->in_prnext;
411	*n->in_prnext = n->in_rnext;
412}
413
414
415/* ------------------------------------------------------------------------ */
416/* Function:    nat_delnat                                                  */
417/* Returns:     Nil                                                         */
418/* Parameters:  n(I) - pointer to NAT rule to delete                        */
419/*                                                                          */
420/* Removes a NAT map rule from the hash table of NAT map rules.             */
421/* ------------------------------------------------------------------------ */
422static void nat_delnat(n)
423ipnat_t *n;
424{
425	if (n->in_mnext != NULL)
426		n->in_mnext->in_pmnext = n->in_pmnext;
427	*n->in_pmnext = n->in_mnext;
428}
429
430
431/* ------------------------------------------------------------------------ */
432/* Function:    nat_hostmap                                                 */
433/* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
434/*                                else a pointer to the hostmapping to use  */
435/* Parameters:  np(I)   - pointer to NAT rule                               */
436/*              real(I) - real IP address                                   */
437/*              map(I)  - mapped IP address                                 */
438/*              port(I) - destination port number                           */
439/* Write Locks: ipf_nat                                                     */
440/*                                                                          */
441/* Check if an ip address has already been allocated for a given mapping    */
442/* that is not doing port based translation.  If is not yet allocated, then */
443/* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
444/* ------------------------------------------------------------------------ */
445static struct hostmap *nat_hostmap(np, src, dst, map, port)
446ipnat_t *np;
447struct in_addr src;
448struct in_addr dst;
449struct in_addr map;
450u_32_t port;
451{
452	hostmap_t *hm;
453	u_int hv;
454
455	hv = (src.s_addr ^ dst.s_addr);
456	hv += src.s_addr;
457	hv += dst.s_addr;
458	hv %= HOSTMAP_SIZE;
459	for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460		if ((hm->hm_srcip.s_addr == src.s_addr) &&
461		    (hm->hm_dstip.s_addr == dst.s_addr) &&
462		    ((np == NULL) || (np == hm->hm_ipnat)) &&
463		    ((port == 0) || (port == hm->hm_port))) {
464			hm->hm_ref++;
465			return hm;
466		}
467
468	if (np == NULL)
469		return NULL;
470
471	KMALLOC(hm, hostmap_t *);
472	if (hm) {
473		hm->hm_next = ipf_hm_maplist;
474		hm->hm_pnext = &ipf_hm_maplist;
475		if (ipf_hm_maplist != NULL)
476			ipf_hm_maplist->hm_pnext = &hm->hm_next;
477		ipf_hm_maplist = hm;
478		hm->hm_hnext = ipf_hm_maptable[hv];
479		hm->hm_phnext = ipf_hm_maptable + hv;
480		if (ipf_hm_maptable[hv] != NULL)
481			ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482		ipf_hm_maptable[hv] = hm;
483		hm->hm_ipnat = np;
484		hm->hm_srcip = src;
485		hm->hm_dstip = dst;
486		hm->hm_mapip = map;
487		hm->hm_ref = 1;
488		hm->hm_port = port;
489	}
490	return hm;
491}
492
493
494/* ------------------------------------------------------------------------ */
495/* Function:    fr_hostmapdel                                               */
496/* Returns:     Nil                                                         */
497/* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
498/* Write Locks: ipf_nat                                                     */
499/*                                                                          */
500/* Decrement the references to this hostmap structure by one.  If this      */
501/* reaches zero then remove it and free it.                                 */
502/* ------------------------------------------------------------------------ */
503void fr_hostmapdel(hmp)
504struct hostmap **hmp;
505{
506	struct hostmap *hm;
507
508	hm = *hmp;
509	*hmp = NULL;
510
511	hm->hm_ref--;
512	if (hm->hm_ref == 0) {
513		if (hm->hm_hnext)
514			hm->hm_hnext->hm_phnext = hm->hm_phnext;
515		*hm->hm_phnext = hm->hm_hnext;
516		if (hm->hm_next)
517			hm->hm_next->hm_pnext = hm->hm_pnext;
518		*hm->hm_pnext = hm->hm_next;
519		KFREE(hm);
520	}
521}
522
523
524/* ------------------------------------------------------------------------ */
525/* Function:    fix_outcksum                                                */
526/* Returns:     Nil                                                         */
527/* Parameters:  fin(I) - pointer to packet information                      */
528/*              sp(I)  - location of 16bit checksum to update               */
529/*              n((I)  - amount to adjust checksum by                       */
530/*                                                                          */
531/* Adjusts the 16bit checksum by "n" for packets going out.                 */
532/* ------------------------------------------------------------------------ */
533void fix_outcksum(fin, sp, n)
534fr_info_t *fin;
535u_short *sp;
536u_32_t n;
537{
538	u_short sumshort;
539	u_32_t sum1;
540
541	if (n == 0)
542		return;
543
544	if (n & NAT_HW_CKSUM) {
545		n &= 0xffff;
546		n += fin->fin_dlen;
547		n = (n & 0xffff) + (n >> 16);
548		*sp = n & 0xffff;
549		return;
550	}
551	sum1 = (~ntohs(*sp)) & 0xffff;
552	sum1 += (n);
553	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554	/* Again */
555	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556	sumshort = ~(u_short)sum1;
557	*(sp) = htons(sumshort);
558}
559
560
561/* ------------------------------------------------------------------------ */
562/* Function:    fix_incksum                                                 */
563/* Returns:     Nil                                                         */
564/* Parameters:  fin(I) - pointer to packet information                      */
565/*              sp(I)  - location of 16bit checksum to update               */
566/*              n((I)  - amount to adjust checksum by                       */
567/*                                                                          */
568/* Adjusts the 16bit checksum by "n" for packets going in.                  */
569/* ------------------------------------------------------------------------ */
570void fix_incksum(fin, sp, n)
571fr_info_t *fin;
572u_short *sp;
573u_32_t n;
574{
575	u_short sumshort;
576	u_32_t sum1;
577
578	if (n == 0)
579		return;
580
581	if (n & NAT_HW_CKSUM) {
582		n &= 0xffff;
583		n += fin->fin_dlen;
584		n = (n & 0xffff) + (n >> 16);
585		*sp = n & 0xffff;
586		return;
587	}
588	sum1 = (~ntohs(*sp)) & 0xffff;
589	sum1 += ~(n) & 0xffff;
590	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591	/* Again */
592	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593	sumshort = ~(u_short)sum1;
594	*(sp) = htons(sumshort);
595}
596
597
598/* ------------------------------------------------------------------------ */
599/* Function:    fix_datacksum                                               */
600/* Returns:     Nil                                                         */
601/* Parameters:  sp(I)  - location of 16bit checksum to update               */
602/*              n((I)  - amount to adjust checksum by                       */
603/*                                                                          */
604/* Fix_datacksum is used *only* for the adjustments of checksums in the     */
605/* data section of an IP packet.                                            */
606/*                                                                          */
607/* The only situation in which you need to do this is when NAT'ing an       */
608/* ICMP error message. Such a message, contains in its body the IP header   */
609/* of the original IP packet, that causes the error.                        */
610/*                                                                          */
611/* You can't use fix_incksum or fix_outcksum in that case, because for the  */
612/* kernel the data section of the ICMP error is just data, and no special   */
613/* processing like hardware cksum or ntohs processing have been done by the */
614/* kernel on the data section.                                              */
615/* ------------------------------------------------------------------------ */
616void fix_datacksum(sp, n)
617u_short *sp;
618u_32_t n;
619{
620	u_short sumshort;
621	u_32_t sum1;
622
623	if (n == 0)
624		return;
625
626	sum1 = (~ntohs(*sp)) & 0xffff;
627	sum1 += (n);
628	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629	/* Again */
630	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631	sumshort = ~(u_short)sum1;
632	*(sp) = htons(sumshort);
633}
634
635
636/* ------------------------------------------------------------------------ */
637/* Function:    fr_nat_ioctl                                                */
638/* Returns:     int - 0 == success, != 0 == failure                         */
639/* Parameters:  data(I) - pointer to ioctl data                             */
640/*              cmd(I)  - ioctl command integer                             */
641/*              mode(I) - file mode bits used with open                     */
642/*                                                                          */
643/* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644/* ------------------------------------------------------------------------ */
645int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646ioctlcmd_t cmd;
647caddr_t data;
648int mode, uid;
649void *ctx;
650{
651	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652	int error = 0, ret, arg, getlock;
653	ipnat_t natd;
654	SPL_INT(s);
655
656#if (BSD >= 199306) && defined(_KERNEL)
657# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658	if ((mode & FWRITE) &&
659	     kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660				     KAUTH_REQ_NETWORK_FIREWALL_FW,
661				     NULL, NULL, NULL)) {
662		return EPERM;
663	}
664# else
665	if ((securelevel >= 3) && (mode & FWRITE)) {
666		return EPERM;
667	}
668# endif
669#endif
670
671#if defined(__osf__) && defined(_KERNEL)
672	getlock = 0;
673#else
674	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
675#endif
676
677	nat = NULL;     /* XXX gcc -Wuninitialized */
678	if (cmd == (ioctlcmd_t)SIOCADNAT) {
679		KMALLOC(nt, ipnat_t *);
680	} else {
681		nt = NULL;
682	}
683
684	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
685		if (mode & NAT_SYSSPACE) {
686			bcopy(data, (char *)&natd, sizeof(natd));
687			error = 0;
688		} else {
689			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
690		}
691	}
692
693	if (error != 0)
694		goto done;
695
696	/*
697	 * For add/delete, look to see if the NAT entry is already present
698	 */
699	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
700		nat = &natd;
701		if (nat->in_v == 0)	/* For backward compat. */
702			nat->in_v = 4;
703		nat->in_flags &= IPN_USERFLAGS;
704		if ((nat->in_redir & NAT_MAPBLK) == 0) {
705			if ((nat->in_flags & IPN_SPLIT) == 0)
706				nat->in_inip &= nat->in_inmsk;
707			if ((nat->in_flags & IPN_IPRANGE) == 0)
708				nat->in_outip &= nat->in_outmsk;
709		}
710		MUTEX_ENTER(&ipf_natio);
711		for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
712			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
713					IPN_CMPSIZ) == 0) {
714				if (nat->in_redir == NAT_REDIRECT &&
715				    nat->in_pnext != n->in_pnext)
716					continue;
717				break;
718			}
719	}
720
721	switch (cmd)
722	{
723#ifdef  IPFILTER_LOG
724	case SIOCIPFFB :
725	{
726		int tmp;
727
728		if (!(mode & FWRITE))
729			error = EPERM;
730		else {
731			tmp = ipflog_clear(IPL_LOGNAT);
732			error = BCOPYOUT((char *)&tmp, (char *)data,
733					 sizeof(tmp));
734			if (error != 0)
735				error = EFAULT;
736		}
737		break;
738	}
739
740	case SIOCSETLG :
741		if (!(mode & FWRITE))
742			error = EPERM;
743		else {
744			error = BCOPYIN((char *)data, (char *)&nat_logging,
745					sizeof(nat_logging));
746			if (error != 0)
747				error = EFAULT;
748		}
749		break;
750
751	case SIOCGETLG :
752		error = BCOPYOUT((char *)&nat_logging, (char *)data,
753				 sizeof(nat_logging));
754		if (error != 0)
755			error = EFAULT;
756		break;
757
758	case FIONREAD :
759		arg = iplused[IPL_LOGNAT];
760		error = BCOPYOUT(&arg, data, sizeof(arg));
761		if (error != 0)
762			error = EFAULT;
763		break;
764#endif
765	case SIOCADNAT :
766		if (!(mode & FWRITE)) {
767			error = EPERM;
768		} else if (n != NULL) {
769			error = EEXIST;
770		} else if (nt == NULL) {
771			error = ENOMEM;
772		}
773		if (error != 0) {
774			MUTEX_EXIT(&ipf_natio);
775			break;
776		}
777		bcopy((char *)nat, (char *)nt, sizeof(*n));
778		error = nat_siocaddnat(nt, np, getlock);
779		MUTEX_EXIT(&ipf_natio);
780		if (error == 0)
781			nt = NULL;
782		break;
783
784	case SIOCRMNAT :
785		if (!(mode & FWRITE)) {
786			error = EPERM;
787			n = NULL;
788		} else if (n == NULL) {
789			error = ESRCH;
790		}
791
792		if (error != 0) {
793			MUTEX_EXIT(&ipf_natio);
794			break;
795		}
796		nat_siocdelnat(n, np, getlock);
797
798		MUTEX_EXIT(&ipf_natio);
799		n = NULL;
800		break;
801
802	case SIOCGNATS :
803		nat_stats.ns_table[0] = nat_table[0];
804		nat_stats.ns_table[1] = nat_table[1];
805		nat_stats.ns_list = nat_list;
806		nat_stats.ns_maptable = ipf_hm_maptable;
807		nat_stats.ns_maplist = ipf_hm_maplist;
808		nat_stats.ns_nattab_sz = ipf_nattable_sz;
809		nat_stats.ns_nattab_max = ipf_nattable_max;
810		nat_stats.ns_rultab_sz = ipf_natrules_sz;
811		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
812		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
813		nat_stats.ns_instances = nat_instances;
814		nat_stats.ns_apslist = ap_sess_list;
815		nat_stats.ns_ticks = fr_ticks;
816		error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
817		break;
818
819	case SIOCGNATL :
820	    {
821		natlookup_t nl;
822
823		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
824		if (error == 0) {
825			void *ptr;
826
827			if (getlock) {
828				READ_ENTER(&ipf_nat);
829			}
830			ptr = nat_lookupredir(&nl);
831			if (getlock) {
832				RWLOCK_EXIT(&ipf_nat);
833			}
834			if (ptr != NULL) {
835				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
836			} else {
837				error = ESRCH;
838			}
839		}
840		break;
841	    }
842
843	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
844		if (!(mode & FWRITE)) {
845			error = EPERM;
846			break;
847		}
848		if (getlock) {
849			WRITE_ENTER(&ipf_nat);
850		}
851
852		error = BCOPYIN(data, &arg, sizeof(arg));
853		if (error != 0)
854			error = EFAULT;
855		else {
856			if (arg == 0)
857				ret = nat_flushtable();
858			else if (arg == 1)
859				ret = nat_clearlist();
860			else
861				ret = nat_extraflush(arg);
862		}
863
864		if (getlock) {
865			RWLOCK_EXIT(&ipf_nat);
866		}
867		if (error == 0) {
868			error = BCOPYOUT(&ret, data, sizeof(ret));
869		}
870		break;
871
872	case SIOCPROXY :
873		error = appr_ioctl(data, cmd, mode, ctx);
874		break;
875
876	case SIOCSTLCK :
877		if (!(mode & FWRITE)) {
878			error = EPERM;
879		} else {
880			error = fr_lock(data, &fr_nat_lock);
881		}
882		break;
883
884	case SIOCSTPUT :
885		if ((mode & FWRITE) != 0) {
886			error = fr_natputent(data, getlock);
887		} else {
888			error = EACCES;
889		}
890		break;
891
892	case SIOCSTGSZ :
893		if (fr_nat_lock) {
894			error = fr_natgetsz(data, getlock);
895		} else
896			error = EACCES;
897		break;
898
899	case SIOCSTGET :
900		if (fr_nat_lock) {
901			error = fr_natgetent(data, getlock);
902		} else
903			error = EACCES;
904		break;
905
906	case SIOCGENITER :
907	    {
908		ipfgeniter_t iter;
909		ipftoken_t *token;
910
911		SPL_SCHED(s);
912		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
913		if (error == 0) {
914			token = ipf_findtoken(iter.igi_type, uid, ctx);
915			if (token != NULL) {
916				error  = nat_iterator(token, &iter);
917			}
918			RWLOCK_EXIT(&ipf_tokens);
919		}
920		SPL_X(s);
921		break;
922	    }
923
924	case SIOCIPFDELTOK :
925		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
926		if (error == 0) {
927			SPL_SCHED(s);
928			error = ipf_deltoken(arg, uid, ctx);
929			SPL_X(s);
930		} else {
931			error = EFAULT;
932		}
933		break;
934
935	case SIOCGTQTAB :
936		error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
937		break;
938
939	case SIOCGTABL :
940		error = nat_gettable(data);
941		break;
942
943	default :
944		error = EINVAL;
945		break;
946	}
947done:
948	if (nt != NULL)
949		KFREE(nt);
950	return error;
951}
952
953
954/* ------------------------------------------------------------------------ */
955/* Function:    nat_siocaddnat                                              */
956/* Returns:     int - 0 == success, != 0 == failure                         */
957/* Parameters:  n(I)       - pointer to new NAT rule                        */
958/*              np(I)      - pointer to where to insert new NAT rule        */
959/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
960/* Mutex Locks: ipf_natio                                                   */
961/*                                                                          */
962/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
963/* from information passed to the kernel, then add it  to the appropriate   */
964/* NAT rule table(s).                                                       */
965/* ------------------------------------------------------------------------ */
966static int nat_siocaddnat(n, np, getlock)
967ipnat_t *n, **np;
968int getlock;
969{
970	int error = 0, i, j;
971
972	if (nat_resolverule(n) != 0)
973		return ENOENT;
974
975	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
976		return EINVAL;
977
978	n->in_use = 0;
979	if (n->in_redir & NAT_MAPBLK)
980		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
981	else if (n->in_flags & IPN_AUTOPORTMAP)
982		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
983	else if (n->in_flags & IPN_IPRANGE)
984		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
985	else if (n->in_flags & IPN_SPLIT)
986		n->in_space = 2;
987	else if (n->in_outmsk != 0)
988		n->in_space = ~ntohl(n->in_outmsk);
989	else
990		n->in_space = 1;
991
992	/*
993	 * Calculate the number of valid IP addresses in the output
994	 * mapping range.  In all cases, the range is inclusive of
995	 * the start and ending IP addresses.
996	 * If to a CIDR address, lose 2: broadcast + network address
997	 *                               (so subtract 1)
998	 * If to a range, add one.
999	 * If to a single IP address, set to 1.
1000	 */
1001	if (n->in_space) {
1002		if ((n->in_flags & IPN_IPRANGE) != 0)
1003			n->in_space += 1;
1004		else
1005			n->in_space -= 1;
1006	} else
1007		n->in_space = 1;
1008
1009	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1010	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1011		n->in_nip = ntohl(n->in_outip) + 1;
1012	else if ((n->in_flags & IPN_SPLIT) &&
1013		 (n->in_redir & NAT_REDIRECT))
1014		n->in_nip = ntohl(n->in_inip);
1015	else
1016		n->in_nip = ntohl(n->in_outip);
1017	if (n->in_redir & NAT_MAP) {
1018		n->in_pnext = ntohs(n->in_pmin);
1019		/*
1020		 * Multiply by the number of ports made available.
1021		 */
1022		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1023			n->in_space *= (ntohs(n->in_pmax) -
1024					ntohs(n->in_pmin) + 1);
1025			/*
1026			 * Because two different sources can map to
1027			 * different destinations but use the same
1028			 * local IP#/port #.
1029			 * If the result is smaller than in_space, then
1030			 * we may have wrapped around 32bits.
1031			 */
1032			i = n->in_inmsk;
1033			if ((i != 0) && (i != 0xffffffff)) {
1034				j = n->in_space * (~ntohl(i) + 1);
1035				if (j >= n->in_space)
1036					n->in_space = j;
1037				else
1038					n->in_space = 0xffffffff;
1039			}
1040		}
1041		/*
1042		 * If no protocol is specified, multiple by 256 to allow for
1043		 * at least one IP:IP mapping per protocol.
1044		 */
1045		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1046				j = n->in_space * 256;
1047				if (j >= n->in_space)
1048					n->in_space = j;
1049				else
1050					n->in_space = 0xffffffff;
1051		}
1052	}
1053
1054	/* Otherwise, these fields are preset */
1055
1056	if (getlock) {
1057		WRITE_ENTER(&ipf_nat);
1058	}
1059	n->in_next = NULL;
1060	*np = n;
1061
1062	if (n->in_age[0] != 0)
1063		n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1064
1065	if (n->in_age[1] != 0)
1066		n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1067
1068	if (n->in_redir & NAT_REDIRECT) {
1069		n->in_flags &= ~IPN_NOTDST;
1070		nat_addrdr(n);
1071	}
1072	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1073		n->in_flags &= ~IPN_NOTSRC;
1074		nat_addnat(n);
1075	}
1076	MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1077
1078	n = NULL;
1079	nat_stats.ns_rules++;
1080#if SOLARIS && !defined(_INET_IP_STACK_H)
1081	pfil_delayed_copy = 0;
1082#endif
1083	if (getlock) {
1084		RWLOCK_EXIT(&ipf_nat);			/* WRITE */
1085	}
1086
1087	return error;
1088}
1089
1090
1091/* ------------------------------------------------------------------------ */
1092/* Function:    nat_resolvrule                                              */
1093/* Returns:     Nil                                                         */
1094/* Parameters:  n(I)  - pointer to NAT rule                                 */
1095/*                                                                          */
1096/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1097/* from information passed to the kernel, then add it  to the appropriate   */
1098/* NAT rule table(s).                                                       */
1099/* ------------------------------------------------------------------------ */
1100static int nat_resolverule(n)
1101ipnat_t *n;
1102{
1103	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1104	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1105
1106	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1107	if (n->in_ifnames[1][0] == '\0') {
1108		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1109		n->in_ifps[1] = n->in_ifps[0];
1110	} else {
1111		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1112	}
1113
1114	if (n->in_plabel[0] != '\0') {
1115		n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1116		if (n->in_apr == NULL)
1117			return -1;
1118	}
1119	return 0;
1120}
1121
1122
1123/* ------------------------------------------------------------------------ */
1124/* Function:    nat_siocdelnat                                              */
1125/* Returns:     int - 0 == success, != 0 == failure                         */
1126/* Parameters:  n(I)       - pointer to new NAT rule                        */
1127/*              np(I)      - pointer to where to insert new NAT rule        */
1128/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1129/* Mutex Locks: ipf_natio                                                   */
1130/*                                                                          */
1131/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1132/* from information passed to the kernel, then add it  to the appropriate   */
1133/* NAT rule table(s).                                                       */
1134/* ------------------------------------------------------------------------ */
1135static void nat_siocdelnat(n, np, getlock)
1136ipnat_t *n, **np;
1137int getlock;
1138{
1139	if (getlock) {
1140		WRITE_ENTER(&ipf_nat);
1141	}
1142	if (n->in_redir & NAT_REDIRECT)
1143		nat_delrdr(n);
1144	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1145		nat_delnat(n);
1146	if (nat_list == NULL) {
1147		nat_masks = 0;
1148		rdr_masks = 0;
1149	}
1150
1151	if (n->in_tqehead[0] != NULL) {
1152		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1153			fr_freetimeoutqueue(n->in_tqehead[1]);
1154		}
1155	}
1156
1157	if (n->in_tqehead[1] != NULL) {
1158		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1159			fr_freetimeoutqueue(n->in_tqehead[1]);
1160		}
1161	}
1162
1163	*np = n->in_next;
1164
1165	if (n->in_use == 0) {
1166		if (n->in_apr)
1167			appr_free(n->in_apr);
1168		MUTEX_DESTROY(&n->in_lock);
1169		KFREE(n);
1170		nat_stats.ns_rules--;
1171#if SOLARIS && !defined(_INET_IP_STACK_H)
1172		if (nat_stats.ns_rules == 0)
1173			pfil_delayed_copy = 1;
1174#endif
1175	} else {
1176		n->in_flags |= IPN_DELETE;
1177		n->in_next = NULL;
1178	}
1179	if (getlock) {
1180		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1181	}
1182}
1183
1184
1185/* ------------------------------------------------------------------------ */
1186/* Function:    fr_natgetsz                                                 */
1187/* Returns:     int - 0 == success, != 0 is the error value.                */
1188/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1189/*                        get the size of.                                  */
1190/*                                                                          */
1191/* Handle SIOCSTGSZ.                                                        */
1192/* Return the size of the nat list entry to be copied back to user space.   */
1193/* The size of the entry is stored in the ng_sz field and the enture natget */
1194/* structure is copied back to the user.                                    */
1195/* ------------------------------------------------------------------------ */
1196static int fr_natgetsz(data, getlock)
1197caddr_t data;
1198int getlock;
1199{
1200	ap_session_t *aps;
1201	nat_t *nat, *n;
1202	natget_t ng;
1203
1204	if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1205		return EFAULT;
1206
1207	if (getlock) {
1208		READ_ENTER(&ipf_nat);
1209	}
1210
1211	nat = ng.ng_ptr;
1212	if (!nat) {
1213		nat = nat_instances;
1214		ng.ng_sz = 0;
1215		/*
1216		 * Empty list so the size returned is 0.  Simple.
1217		 */
1218		if (nat == NULL) {
1219			if (getlock) {
1220				RWLOCK_EXIT(&ipf_nat);
1221			}
1222			if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1223				return EFAULT;
1224			return 0;
1225		}
1226	} else {
1227		/*
1228		 * Make sure the pointer we're copying from exists in the
1229		 * current list of entries.  Security precaution to prevent
1230		 * copying of random kernel data.
1231		 */
1232		for (n = nat_instances; n; n = n->nat_next)
1233			if (n == nat)
1234				break;
1235		if (n == NULL) {
1236			if (getlock) {
1237				RWLOCK_EXIT(&ipf_nat);
1238			}
1239			return ESRCH;
1240		}
1241	}
1242
1243	/*
1244	 * Incluse any space required for proxy data structures.
1245	 */
1246	ng.ng_sz = sizeof(nat_save_t);
1247	aps = nat->nat_aps;
1248	if (aps != NULL) {
1249		ng.ng_sz += sizeof(ap_session_t) - 4;
1250		if (aps->aps_data != 0)
1251			ng.ng_sz += aps->aps_psiz;
1252	}
1253	if (getlock) {
1254		RWLOCK_EXIT(&ipf_nat);
1255	}
1256
1257	if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1258		return EFAULT;
1259	return 0;
1260}
1261
1262
1263/* ------------------------------------------------------------------------ */
1264/* Function:    fr_natgetent                                                */
1265/* Returns:     int - 0 == success, != 0 is the error value.                */
1266/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1267/*                        to NAT structure to copy out.                     */
1268/*                                                                          */
1269/* Handle SIOCSTGET.                                                        */
1270/* Copies out NAT entry to user space.  Any additional data held for a      */
1271/* proxy is also copied, as to is the NAT rule which was responsible for it */
1272/* ------------------------------------------------------------------------ */
1273static int fr_natgetent(data, getlock)
1274caddr_t data;
1275int getlock;
1276{
1277	int error, outsize;
1278	ap_session_t *aps;
1279	nat_save_t *ipn, ipns;
1280	nat_t *n, *nat;
1281
1282	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1283	if (error != 0)
1284		return error;
1285
1286	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1287		return EINVAL;
1288
1289	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1290	if (ipn == NULL)
1291		return ENOMEM;
1292
1293	if (getlock) {
1294		READ_ENTER(&ipf_nat);
1295	}
1296
1297	ipn->ipn_dsize = ipns.ipn_dsize;
1298	nat = ipns.ipn_next;
1299	if (nat == NULL) {
1300		nat = nat_instances;
1301		if (nat == NULL) {
1302			if (nat_instances == NULL)
1303				error = ENOENT;
1304			goto finished;
1305		}
1306	} else {
1307		/*
1308		 * Make sure the pointer we're copying from exists in the
1309		 * current list of entries.  Security precaution to prevent
1310		 * copying of random kernel data.
1311		 */
1312		for (n = nat_instances; n; n = n->nat_next)
1313			if (n == nat)
1314				break;
1315		if (n == NULL) {
1316			error = ESRCH;
1317			goto finished;
1318		}
1319	}
1320	ipn->ipn_next = nat->nat_next;
1321
1322	/*
1323	 * Copy the NAT structure.
1324	 */
1325	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1326
1327	/*
1328	 * If we have a pointer to the NAT rule it belongs to, save that too.
1329	 */
1330	if (nat->nat_ptr != NULL)
1331		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1332		      sizeof(ipn->ipn_ipnat));
1333
1334	/*
1335	 * If we also know the NAT entry has an associated filter rule,
1336	 * save that too.
1337	 */
1338	if (nat->nat_fr != NULL)
1339		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1340		      sizeof(ipn->ipn_fr));
1341
1342	/*
1343	 * Last but not least, if there is an application proxy session set
1344	 * up for this NAT entry, then copy that out too, including any
1345	 * private data saved along side it by the proxy.
1346	 */
1347	aps = nat->nat_aps;
1348	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1349	if (aps != NULL) {
1350		char *s;
1351
1352		if (outsize < sizeof(*aps)) {
1353			error = ENOBUFS;
1354			goto finished;
1355		}
1356
1357		s = ipn->ipn_data;
1358		bcopy((char *)aps, s, sizeof(*aps));
1359		s += sizeof(*aps);
1360		outsize -= sizeof(*aps);
1361		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1362			bcopy(aps->aps_data, s, aps->aps_psiz);
1363		else
1364			error = ENOBUFS;
1365	}
1366	if (error == 0) {
1367		if (getlock) {
1368			RWLOCK_EXIT(&ipf_nat);
1369			getlock = 0;
1370		}
1371		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1372	}
1373
1374finished:
1375	if (getlock) {
1376		RWLOCK_EXIT(&ipf_nat);
1377	}
1378	if (ipn != NULL) {
1379		KFREES(ipn, ipns.ipn_dsize);
1380	}
1381	return error;
1382}
1383
1384
1385/* ------------------------------------------------------------------------ */
1386/* Function:    fr_natputent                                                */
1387/* Returns:     int - 0 == success, != 0 is the error value.                */
1388/* Parameters:  data(I) -     pointer to natget structure with NAT          */
1389/*                            structure information to load into the kernel */
1390/*              getlock(I) - flag indicating whether or not a write lock    */
1391/*                           on ipf_nat is already held.                    */
1392/*                                                                          */
1393/* Handle SIOCSTPUT.                                                        */
1394/* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1395/* firewall rule data structures, if pointers to them indicate so.          */
1396/* ------------------------------------------------------------------------ */
1397static int fr_natputent(data, getlock)
1398caddr_t data;
1399int getlock;
1400{
1401	nat_save_t ipn, *ipnn;
1402	ap_session_t *aps;
1403	nat_t *n, *nat;
1404	frentry_t *fr;
1405	fr_info_t fin;
1406	ipnat_t *in;
1407	int error;
1408
1409	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1410	if (error != 0)
1411		return error;
1412
1413	/*
1414	 * Initialise early because of code at junkput label.
1415	 */
1416	in = NULL;
1417	aps = NULL;
1418	nat = NULL;
1419	ipnn = NULL;
1420	fr = NULL;
1421
1422	/*
1423	 * New entry, copy in the rest of the NAT entry if it's size is more
1424	 * than just the nat_t structure.
1425	 */
1426	if (ipn.ipn_dsize > sizeof(ipn)) {
1427		if (ipn.ipn_dsize > 81920) {
1428			error = ENOMEM;
1429			goto junkput;
1430		}
1431
1432		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1433		if (ipnn == NULL)
1434			return ENOMEM;
1435
1436		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1437		if (error != 0) {
1438			error = EFAULT;
1439			goto junkput;
1440		}
1441	} else
1442		ipnn = &ipn;
1443
1444	KMALLOC(nat, nat_t *);
1445	if (nat == NULL) {
1446		error = ENOMEM;
1447		goto junkput;
1448	}
1449
1450	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1451	/*
1452	 * Initialize all these so that nat_delete() doesn't cause a crash.
1453	 */
1454	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1455	nat->nat_tqe.tqe_pnext = NULL;
1456	nat->nat_tqe.tqe_next = NULL;
1457	nat->nat_tqe.tqe_ifq = NULL;
1458	nat->nat_tqe.tqe_parent = nat;
1459
1460	/*
1461	 * Restore the rule associated with this nat session
1462	 */
1463	in = ipnn->ipn_nat.nat_ptr;
1464	if (in != NULL) {
1465		KMALLOC(in, ipnat_t *);
1466		nat->nat_ptr = in;
1467		if (in == NULL) {
1468			error = ENOMEM;
1469			goto junkput;
1470		}
1471		bzero((char *)in, offsetof(struct ipnat, in_next6));
1472		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1473		in->in_use = 1;
1474		in->in_flags |= IPN_DELETE;
1475
1476		ATOMIC_INC(nat_stats.ns_rules);
1477
1478		if (nat_resolverule(in) != 0) {
1479			error = ESRCH;
1480			goto junkput;
1481		}
1482	}
1483
1484	/*
1485	 * Check that the NAT entry doesn't already exist in the kernel.
1486	 *
1487	 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1488	 * this, we check to see if the inbound combination of addresses and
1489	 * ports is already known.  Similar logic is applied for NAT_INBOUND.
1490	 *
1491	 */
1492	bzero((char *)&fin, sizeof(fin));
1493	fin.fin_p = nat->nat_p;
1494	if (nat->nat_dir == NAT_OUTBOUND) {
1495		fin.fin_ifp = nat->nat_ifps[0];
1496		fin.fin_data[0] = ntohs(nat->nat_oport);
1497		fin.fin_data[1] = ntohs(nat->nat_outport);
1498		if (getlock) {
1499			READ_ENTER(&ipf_nat);
1500		}
1501		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1502				 nat->nat_oip, nat->nat_inip);
1503		if (getlock) {
1504			RWLOCK_EXIT(&ipf_nat);
1505		}
1506		if (n != NULL) {
1507			error = EEXIST;
1508			goto junkput;
1509		}
1510	} else if (nat->nat_dir == NAT_INBOUND) {
1511		fin.fin_ifp = nat->nat_ifps[0];
1512		fin.fin_data[0] = ntohs(nat->nat_outport);
1513		fin.fin_data[1] = ntohs(nat->nat_oport);
1514		if (getlock) {
1515			READ_ENTER(&ipf_nat);
1516		}
1517		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1518				  nat->nat_outip, nat->nat_oip);
1519		if (getlock) {
1520			RWLOCK_EXIT(&ipf_nat);
1521		}
1522		if (n != NULL) {
1523			error = EEXIST;
1524			goto junkput;
1525		}
1526	} else {
1527		error = EINVAL;
1528		goto junkput;
1529	}
1530
1531	/*
1532	 * Restore ap_session_t structure.  Include the private data allocated
1533	 * if it was there.
1534	 */
1535	aps = nat->nat_aps;
1536	if (aps != NULL) {
1537		KMALLOC(aps, ap_session_t *);
1538		nat->nat_aps = aps;
1539		if (aps == NULL) {
1540			error = ENOMEM;
1541			goto junkput;
1542		}
1543		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1544		if (in != NULL)
1545			aps->aps_apr = in->in_apr;
1546		else
1547			aps->aps_apr = NULL;
1548		if (aps->aps_psiz != 0) {
1549			if (aps->aps_psiz > 81920) {
1550				error = ENOMEM;
1551				goto junkput;
1552			}
1553			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1554			if (aps->aps_data == NULL) {
1555				error = ENOMEM;
1556				goto junkput;
1557			}
1558			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1559			      aps->aps_psiz);
1560		} else {
1561			aps->aps_psiz = 0;
1562			aps->aps_data = NULL;
1563		}
1564	}
1565
1566	/*
1567	 * If there was a filtering rule associated with this entry then
1568	 * build up a new one.
1569	 */
1570	fr = nat->nat_fr;
1571	if (fr != NULL) {
1572		if ((nat->nat_flags & SI_NEWFR) != 0) {
1573			KMALLOC(fr, frentry_t *);
1574			nat->nat_fr = fr;
1575			if (fr == NULL) {
1576				error = ENOMEM;
1577				goto junkput;
1578			}
1579			ipnn->ipn_nat.nat_fr = fr;
1580			fr->fr_ref = 1;
1581			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1582			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1583
1584			fr->fr_ref = 1;
1585			fr->fr_dsize = 0;
1586			fr->fr_data = NULL;
1587			fr->fr_type = FR_T_NONE;
1588
1589			MUTEX_NUKE(&fr->fr_lock);
1590			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1591		} else {
1592			if (getlock) {
1593				READ_ENTER(&ipf_nat);
1594			}
1595			for (n = nat_instances; n; n = n->nat_next)
1596				if (n->nat_fr == fr)
1597					break;
1598
1599			if (n != NULL) {
1600				MUTEX_ENTER(&fr->fr_lock);
1601				fr->fr_ref++;
1602				MUTEX_EXIT(&fr->fr_lock);
1603			}
1604			if (getlock) {
1605				RWLOCK_EXIT(&ipf_nat);
1606			}
1607
1608			if (!n) {
1609				error = ESRCH;
1610				goto junkput;
1611			}
1612		}
1613	}
1614
1615	if (ipnn != &ipn) {
1616		KFREES(ipnn, ipn.ipn_dsize);
1617		ipnn = NULL;
1618	}
1619
1620	if (getlock) {
1621		WRITE_ENTER(&ipf_nat);
1622	}
1623	error = nat_insert(nat, nat->nat_rev);
1624	if ((error == 0) && (aps != NULL)) {
1625		aps->aps_next = ap_sess_list;
1626		ap_sess_list = aps;
1627	}
1628	if (getlock) {
1629		RWLOCK_EXIT(&ipf_nat);
1630	}
1631
1632	if (error == 0)
1633		return 0;
1634
1635	error = ENOMEM;
1636
1637junkput:
1638	if (fr != NULL)
1639		(void) fr_derefrule(&fr);
1640
1641	if ((ipnn != NULL) && (ipnn != &ipn)) {
1642		KFREES(ipnn, ipn.ipn_dsize);
1643	}
1644	if (nat != NULL) {
1645		if (aps != NULL) {
1646			if (aps->aps_data != NULL) {
1647				KFREES(aps->aps_data, aps->aps_psiz);
1648			}
1649			KFREE(aps);
1650		}
1651		if (in != NULL) {
1652			if (in->in_apr)
1653				appr_free(in->in_apr);
1654			KFREE(in);
1655		}
1656		KFREE(nat);
1657	}
1658	return error;
1659}
1660
1661
1662/* ------------------------------------------------------------------------ */
1663/* Function:    nat_delete                                                  */
1664/* Returns:     Nil                                                         */
1665/* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1666/*              logtype(I) - type of LOG record to create before deleting   */
1667/* Write Lock:  ipf_nat                                                     */
1668/*                                                                          */
1669/* Delete a nat entry from the various lists and table.  If NAT logging is  */
1670/* enabled then generate a NAT log record for this event.                   */
1671/* ------------------------------------------------------------------------ */
1672void nat_delete(nat, logtype)
1673struct nat *nat;
1674int logtype;
1675{
1676	struct ipnat *ipn;
1677	int removed = 0;
1678
1679	if (logtype != 0 && nat_logging != 0)
1680		nat_log(nat, logtype);
1681#if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1682	ipf_rand_push(nat, sizeof(*nat));
1683#endif
1684
1685	/*
1686	 * Take it as a general indication that all the pointers are set if
1687	 * nat_pnext is set.
1688	 */
1689	if (nat->nat_pnext != NULL) {
1690		removed = 1;
1691
1692		nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1693		nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1694
1695		*nat->nat_pnext = nat->nat_next;
1696		if (nat->nat_next != NULL) {
1697			nat->nat_next->nat_pnext = nat->nat_pnext;
1698			nat->nat_next = NULL;
1699		}
1700		nat->nat_pnext = NULL;
1701
1702		*nat->nat_phnext[0] = nat->nat_hnext[0];
1703		if (nat->nat_hnext[0] != NULL) {
1704			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1705			nat->nat_hnext[0] = NULL;
1706		}
1707		nat->nat_phnext[0] = NULL;
1708
1709		*nat->nat_phnext[1] = nat->nat_hnext[1];
1710		if (nat->nat_hnext[1] != NULL) {
1711			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1712			nat->nat_hnext[1] = NULL;
1713		}
1714		nat->nat_phnext[1] = NULL;
1715
1716		if ((nat->nat_flags & SI_WILDP) != 0)
1717			nat_stats.ns_wilds--;
1718	}
1719
1720	if (nat->nat_me != NULL) {
1721		*nat->nat_me = NULL;
1722		nat->nat_me = NULL;
1723	}
1724
1725	if (nat->nat_tqe.tqe_ifq != NULL)
1726		fr_deletequeueentry(&nat->nat_tqe);
1727
1728	if (logtype == NL_EXPIRE)
1729		nat_stats.ns_expire++;
1730
1731	MUTEX_ENTER(&nat->nat_lock);
1732	/*
1733	 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1734	 * This happens when a nat'd packet is blocked and we want to throw
1735	 * away the NAT session.
1736	 */
1737	if (logtype == NL_DESTROY) {
1738		if (nat->nat_ref > 2) {
1739			nat->nat_ref -= 2;
1740			MUTEX_EXIT(&nat->nat_lock);
1741			if (removed)
1742				nat_stats.ns_orphans++;
1743			return;
1744		}
1745	} else if (nat->nat_ref > 1) {
1746		nat->nat_ref--;
1747		MUTEX_EXIT(&nat->nat_lock);
1748		if (removed)
1749			nat_stats.ns_orphans++;
1750		return;
1751	}
1752	MUTEX_EXIT(&nat->nat_lock);
1753
1754	/*
1755	 * At this point, nat_ref is 1, doing "--" would make it 0..
1756	 */
1757	nat->nat_ref = 0;
1758	if (!removed)
1759		nat_stats.ns_orphans--;
1760
1761#ifdef	IPFILTER_SYNC
1762	if (nat->nat_sync)
1763		ipfsync_del(nat->nat_sync);
1764#endif
1765
1766	if (nat->nat_fr != NULL)
1767		(void) fr_derefrule(&nat->nat_fr);
1768
1769	if (nat->nat_hm != NULL)
1770		fr_hostmapdel(&nat->nat_hm);
1771
1772	/*
1773	 * If there is an active reference from the nat entry to its parent
1774	 * rule, decrement the rule's reference count and free it too if no
1775	 * longer being used.
1776	 */
1777	ipn = nat->nat_ptr;
1778	if (ipn != NULL) {
1779		fr_ipnatderef(&ipn);
1780	}
1781
1782	MUTEX_DESTROY(&nat->nat_lock);
1783
1784	aps_free(nat->nat_aps);
1785	nat_stats.ns_inuse--;
1786
1787	/*
1788	 * If there's a fragment table entry too for this nat entry, then
1789	 * dereference that as well.  This is after nat_lock is released
1790	 * because of Tru64.
1791	 */
1792	fr_forgetnat((void *)nat);
1793
1794	KFREE(nat);
1795}
1796
1797
1798/* ------------------------------------------------------------------------ */
1799/* Function:    nat_flushtable                                              */
1800/* Returns:     int - number of NAT rules deleted                           */
1801/* Parameters:  Nil                                                         */
1802/*                                                                          */
1803/* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1804/* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1805/* ------------------------------------------------------------------------ */
1806/*
1807 * nat_flushtable - clear the NAT table of all mapping entries.
1808 */
1809static int nat_flushtable()
1810{
1811	nat_t *nat;
1812	int j = 0;
1813
1814	/*
1815	 * ALL NAT mappings deleted, so lets just make the deletions
1816	 * quicker.
1817	 */
1818	if (nat_table[0] != NULL)
1819		bzero((char *)nat_table[0],
1820		      sizeof(nat_table[0]) * ipf_nattable_sz);
1821	if (nat_table[1] != NULL)
1822		bzero((char *)nat_table[1],
1823		      sizeof(nat_table[1]) * ipf_nattable_sz);
1824
1825	while ((nat = nat_instances) != NULL) {
1826		nat_delete(nat, NL_FLUSH);
1827		j++;
1828	}
1829
1830	nat_stats.ns_inuse = 0;
1831	return j;
1832}
1833
1834
1835/* ------------------------------------------------------------------------ */
1836/* Function:    nat_clearlist                                               */
1837/* Returns:     int - number of NAT/RDR rules deleted                       */
1838/* Parameters:  Nil                                                         */
1839/*                                                                          */
1840/* Delete all rules in the current list of rules.  There is nothing elegant */
1841/* about this cleanup: simply free all entries on the list of rules and     */
1842/* clear out the tables used for hashed NAT rule lookups.                   */
1843/* ------------------------------------------------------------------------ */
1844static int nat_clearlist()
1845{
1846	ipnat_t *n, **np = &nat_list;
1847	int i = 0;
1848
1849	if (nat_rules != NULL)
1850		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1851	if (rdr_rules != NULL)
1852		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1853
1854	while ((n = *np) != NULL) {
1855		*np = n->in_next;
1856		if (n->in_use == 0) {
1857			if (n->in_apr != NULL)
1858				appr_free(n->in_apr);
1859			MUTEX_DESTROY(&n->in_lock);
1860			KFREE(n);
1861			nat_stats.ns_rules--;
1862		} else {
1863			n->in_flags |= IPN_DELETE;
1864			n->in_next = NULL;
1865		}
1866		i++;
1867	}
1868#if SOLARIS && !defined(_INET_IP_STACK_H)
1869	pfil_delayed_copy = 1;
1870#endif
1871	nat_masks = 0;
1872	rdr_masks = 0;
1873	return i;
1874}
1875
1876
1877/* ------------------------------------------------------------------------ */
1878/* Function:    nat_newmap                                                  */
1879/* Returns:     int - -1 == error, 0 == success                             */
1880/* Parameters:  fin(I) - pointer to packet information                      */
1881/*              nat(I) - pointer to NAT entry                               */
1882/*              ni(I)  - pointer to structure with misc. information needed */
1883/*                       to create new NAT entry.                           */
1884/*                                                                          */
1885/* Given an empty NAT structure, populate it with new information about a   */
1886/* new NAT session, as defined by the matching NAT rule.                    */
1887/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1888/* to the new IP address for the translation.                               */
1889/* ------------------------------------------------------------------------ */
1890static INLINE int nat_newmap(fin, nat, ni)
1891fr_info_t *fin;
1892nat_t *nat;
1893natinfo_t *ni;
1894{
1895	u_short st_port, dport, sport, port, sp, dp;
1896	struct in_addr in, inb;
1897	hostmap_t *hm;
1898	u_32_t flags;
1899	u_32_t st_ip;
1900	ipnat_t *np;
1901	nat_t *natl;
1902	int l;
1903
1904	/*
1905	 * If it's an outbound packet which doesn't match any existing
1906	 * record, then create a new port
1907	 */
1908	l = 0;
1909	hm = NULL;
1910	np = ni->nai_np;
1911	st_ip = np->in_nip;
1912	st_port = np->in_pnext;
1913	flags = ni->nai_flags;
1914	sport = ni->nai_sport;
1915	dport = ni->nai_dport;
1916
1917	/*
1918	 * Do a loop until we either run out of entries to try or we find
1919	 * a NAT mapping that isn't currently being used.  This is done
1920	 * because the change to the source is not (usually) being fixed.
1921	 */
1922	do {
1923		port = 0;
1924		in.s_addr = htonl(np->in_nip);
1925		if (l == 0) {
1926			/*
1927			 * Check to see if there is an existing NAT
1928			 * setup for this IP address pair.
1929			 */
1930			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1931					 in, 0);
1932			if (hm != NULL)
1933				in.s_addr = hm->hm_mapip.s_addr;
1934		} else if ((l == 1) && (hm != NULL)) {
1935			fr_hostmapdel(&hm);
1936		}
1937		in.s_addr = ntohl(in.s_addr);
1938
1939		nat->nat_hm = hm;
1940
1941		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1942			if (l > 0)
1943				return -1;
1944		}
1945
1946		if (np->in_redir == NAT_BIMAP &&
1947		    np->in_inmsk == np->in_outmsk) {
1948			/*
1949			 * map the address block in a 1:1 fashion
1950			 */
1951			in.s_addr = np->in_outip;
1952			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1953			in.s_addr = ntohl(in.s_addr);
1954
1955		} else if (np->in_redir & NAT_MAPBLK) {
1956			if ((l >= np->in_ppip) || ((l > 0) &&
1957			     !(flags & IPN_TCPUDP)))
1958				return -1;
1959			/*
1960			 * map-block - Calculate destination address.
1961			 */
1962			in.s_addr = ntohl(fin->fin_saddr);
1963			in.s_addr &= ntohl(~np->in_inmsk);
1964			inb.s_addr = in.s_addr;
1965			in.s_addr /= np->in_ippip;
1966			in.s_addr &= ntohl(~np->in_outmsk);
1967			in.s_addr += ntohl(np->in_outip);
1968			/*
1969			 * Calculate destination port.
1970			 */
1971			if ((flags & IPN_TCPUDP) &&
1972			    (np->in_ppip != 0)) {
1973				port = ntohs(sport) + l;
1974				port %= np->in_ppip;
1975				port += np->in_ppip *
1976					(inb.s_addr % np->in_ippip);
1977				port += MAPBLK_MINPORT;
1978				port = htons(port);
1979			}
1980
1981		} else if ((np->in_outip == 0) &&
1982			   (np->in_outmsk == 0xffffffff)) {
1983			/*
1984			 * 0/32 - use the interface's IP address.
1985			 */
1986			if ((l > 0) ||
1987			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1988				       &in, NULL) == -1)
1989				return -1;
1990			in.s_addr = ntohl(in.s_addr);
1991
1992		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1993			/*
1994			 * 0/0 - use the original source address/port.
1995			 */
1996			if (l > 0)
1997				return -1;
1998			in.s_addr = ntohl(fin->fin_saddr);
1999
2000		} else if ((np->in_outmsk != 0xffffffff) &&
2001			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2002			np->in_nip++;
2003
2004		natl = NULL;
2005
2006		if ((flags & IPN_TCPUDP) &&
2007		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2008		    (np->in_flags & IPN_AUTOPORTMAP)) {
2009			/*
2010			 * "ports auto" (without map-block)
2011			 */
2012			if ((l > 0) && (l % np->in_ppip == 0)) {
2013				if (l > np->in_space) {
2014					return -1;
2015				} else if ((l > np->in_ppip) &&
2016					   np->in_outmsk != 0xffffffff)
2017					np->in_nip++;
2018			}
2019			if (np->in_ppip != 0) {
2020				port = ntohs(sport);
2021				port += (l % np->in_ppip);
2022				port %= np->in_ppip;
2023				port += np->in_ppip *
2024					(ntohl(fin->fin_saddr) %
2025					 np->in_ippip);
2026				port += MAPBLK_MINPORT;
2027				port = htons(port);
2028			}
2029
2030		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2031			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2032			/*
2033			 * Standard port translation.  Select next port.
2034			 */
2035			if (np->in_flags & IPN_SEQUENTIAL) {
2036				port = htons(np->in_pnext);
2037			} else {
2038				port = ipf_random() % (ntohs(np->in_pmax) -
2039						       ntohs(np->in_pmin));
2040			}
2041			np->in_pnext++;
2042
2043			if (np->in_pnext > ntohs(np->in_pmax)) {
2044				np->in_pnext = ntohs(np->in_pmin);
2045				if (np->in_outmsk != 0xffffffff)
2046					np->in_nip++;
2047			}
2048		}
2049
2050		if (np->in_flags & IPN_IPRANGE) {
2051			if (np->in_nip > ntohl(np->in_outmsk))
2052				np->in_nip = ntohl(np->in_outip);
2053		} else {
2054			if ((np->in_outmsk != 0xffffffff) &&
2055			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2056			    ntohl(np->in_outip))
2057				np->in_nip = ntohl(np->in_outip) + 1;
2058		}
2059
2060		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2061			port = sport;
2062
2063		/*
2064		 * Here we do a lookup of the connection as seen from
2065		 * the outside.  If an IP# pair already exists, try
2066		 * again.  So if you have A->B becomes C->B, you can
2067		 * also have D->E become C->E but not D->B causing
2068		 * another C->B.  Also take protocol and ports into
2069		 * account when determining whether a pre-existing
2070		 * NAT setup will cause an external conflict where
2071		 * this is appropriate.
2072		 */
2073		inb.s_addr = htonl(in.s_addr);
2074		sp = fin->fin_data[0];
2075		dp = fin->fin_data[1];
2076		fin->fin_data[0] = fin->fin_data[1];
2077		fin->fin_data[1] = htons(port);
2078		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2079				    (u_int)fin->fin_p, fin->fin_dst, inb);
2080		fin->fin_data[0] = sp;
2081		fin->fin_data[1] = dp;
2082
2083		/*
2084		 * Has the search wrapped around and come back to the
2085		 * start ?
2086		 */
2087		if ((natl != NULL) &&
2088		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2089		    (np->in_nip != 0) && (st_ip == np->in_nip))
2090			return -1;
2091		l++;
2092	} while (natl != NULL);
2093
2094	if (np->in_space > 0)
2095		np->in_space--;
2096
2097	/* Setup the NAT table */
2098	nat->nat_inip = fin->fin_src;
2099	nat->nat_outip.s_addr = htonl(in.s_addr);
2100	nat->nat_oip = fin->fin_dst;
2101	if (nat->nat_hm == NULL)
2102		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2103					  nat->nat_outip, 0);
2104
2105	/*
2106	 * The ICMP checksum does not have a pseudo header containing
2107	 * the IP addresses
2108	 */
2109	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2110	ni->nai_sum2 = LONG_SUM(in.s_addr);
2111	if ((flags & IPN_TCPUDP)) {
2112		ni->nai_sum1 += ntohs(sport);
2113		ni->nai_sum2 += ntohs(port);
2114	}
2115
2116	if (flags & IPN_TCPUDP) {
2117		nat->nat_inport = sport;
2118		nat->nat_outport = port;	/* sport */
2119		nat->nat_oport = dport;
2120		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2121	} else if (flags & IPN_ICMPQUERY) {
2122		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2123		nat->nat_inport = port;
2124		nat->nat_outport = port;
2125	} else if (fin->fin_p == IPPROTO_GRE) {
2126#if 0
2127		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2128		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2129			nat->nat_oport = 0;/*fin->fin_data[1];*/
2130			nat->nat_inport = 0;/*fin->fin_data[0];*/
2131			nat->nat_outport = 0;/*fin->fin_data[0];*/
2132			nat->nat_call[0] = fin->fin_data[0];
2133			nat->nat_call[1] = fin->fin_data[0];
2134		}
2135#endif
2136	}
2137	ni->nai_ip.s_addr = in.s_addr;
2138	ni->nai_port = port;
2139	ni->nai_nport = dport;
2140	return 0;
2141}
2142
2143
2144/* ------------------------------------------------------------------------ */
2145/* Function:    nat_newrdr                                                  */
2146/* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2147/*                    allow rule to be moved if IPN_ROUNDR is set.          */
2148/* Parameters:  fin(I) - pointer to packet information                      */
2149/*              nat(I) - pointer to NAT entry                               */
2150/*              ni(I)  - pointer to structure with misc. information needed */
2151/*                       to create new NAT entry.                           */
2152/*                                                                          */
2153/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2154/* to the new IP address for the translation.                               */
2155/* ------------------------------------------------------------------------ */
2156static INLINE int nat_newrdr(fin, nat, ni)
2157fr_info_t *fin;
2158nat_t *nat;
2159natinfo_t *ni;
2160{
2161	u_short nport, dport, sport;
2162	struct in_addr in, inb;
2163	u_short sp, dp;
2164	hostmap_t *hm;
2165	u_32_t flags;
2166	ipnat_t *np;
2167	nat_t *natl;
2168	int move;
2169
2170	move = 1;
2171	hm = NULL;
2172	in.s_addr = 0;
2173	np = ni->nai_np;
2174	flags = ni->nai_flags;
2175	sport = ni->nai_sport;
2176	dport = ni->nai_dport;
2177
2178	/*
2179	 * If the matching rule has IPN_STICKY set, then we want to have the
2180	 * same rule kick in as before.  Why would this happen?  If you have
2181	 * a collection of rdr rules with "round-robin sticky", the current
2182	 * packet might match a different one to the previous connection but
2183	 * we want the same destination to be used.
2184	 */
2185	if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2186	    ((np->in_flags & IPN_STICKY) != 0)) {
2187		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2188				 (u_32_t)dport);
2189		if (hm != NULL) {
2190			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2191			np = hm->hm_ipnat;
2192			ni->nai_np = np;
2193			move = 0;
2194		}
2195	}
2196
2197	/*
2198	 * Otherwise, it's an inbound packet. Most likely, we don't
2199	 * want to rewrite source ports and source addresses. Instead,
2200	 * we want to rewrite to a fixed internal address and fixed
2201	 * internal port.
2202	 */
2203	if (np->in_flags & IPN_SPLIT) {
2204		in.s_addr = np->in_nip;
2205
2206		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2207			hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2208					 in, (u_32_t)dport);
2209			if (hm != NULL) {
2210				in.s_addr = hm->hm_mapip.s_addr;
2211				move = 0;
2212			}
2213		}
2214
2215		if (hm == NULL || hm->hm_ref == 1) {
2216			if (np->in_inip == htonl(in.s_addr)) {
2217				np->in_nip = ntohl(np->in_inmsk);
2218				move = 0;
2219			} else {
2220				np->in_nip = ntohl(np->in_inip);
2221			}
2222		}
2223
2224	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2225		/*
2226		 * 0/32 - use the interface's IP address.
2227		 */
2228		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2229			return -1;
2230		in.s_addr = ntohl(in.s_addr);
2231
2232	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2233		/*
2234		 * 0/0 - use the original destination address/port.
2235		 */
2236		in.s_addr = ntohl(fin->fin_daddr);
2237
2238	} else if (np->in_redir == NAT_BIMAP &&
2239		   np->in_inmsk == np->in_outmsk) {
2240		/*
2241		 * map the address block in a 1:1 fashion
2242		 */
2243		in.s_addr = np->in_inip;
2244		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2245		in.s_addr = ntohl(in.s_addr);
2246	} else {
2247		in.s_addr = ntohl(np->in_inip);
2248	}
2249
2250	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2251		nport = dport;
2252	else {
2253		/*
2254		 * Whilst not optimized for the case where
2255		 * pmin == pmax, the gain is not significant.
2256		 */
2257		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2258		    (np->in_pmin != np->in_pmax)) {
2259			nport = ntohs(dport) - ntohs(np->in_pmin) +
2260				ntohs(np->in_pnext);
2261			nport = htons(nport);
2262		} else
2263			nport = np->in_pnext;
2264	}
2265
2266	/*
2267	 * When the redirect-to address is set to 0.0.0.0, just
2268	 * assume a blank `forwarding' of the packet.  We don't
2269	 * setup any translation for this either.
2270	 */
2271	if (in.s_addr == 0) {
2272		if (nport == dport)
2273			return -1;
2274		in.s_addr = ntohl(fin->fin_daddr);
2275	}
2276
2277	/*
2278	 * Check to see if this redirect mapping already exists and if
2279	 * it does, return "failure" (allowing it to be created will just
2280	 * cause one or both of these "connections" to stop working.)
2281	 */
2282	inb.s_addr = htonl(in.s_addr);
2283	sp = fin->fin_data[0];
2284	dp = fin->fin_data[1];
2285	fin->fin_data[1] = fin->fin_data[0];
2286	fin->fin_data[0] = ntohs(nport);
2287	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2288			     (u_int)fin->fin_p, inb, fin->fin_src);
2289	fin->fin_data[0] = sp;
2290	fin->fin_data[1] = dp;
2291	if (natl != NULL)
2292		return -1;
2293
2294	nat->nat_inip.s_addr = htonl(in.s_addr);
2295	nat->nat_outip = fin->fin_dst;
2296	nat->nat_oip = fin->fin_src;
2297	if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2298		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2299					  (u_32_t)dport);
2300
2301	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2302	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2303
2304	ni->nai_ip.s_addr = in.s_addr;
2305	ni->nai_nport = nport;
2306	ni->nai_port = sport;
2307
2308	if (flags & IPN_TCPUDP) {
2309		nat->nat_inport = nport;
2310		nat->nat_outport = dport;
2311		nat->nat_oport = sport;
2312		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2313	} else if (flags & IPN_ICMPQUERY) {
2314		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2315		nat->nat_inport = nport;
2316		nat->nat_outport = nport;
2317	} else if (fin->fin_p == IPPROTO_GRE) {
2318#if 0
2319		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2320		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2321			nat->nat_call[0] = fin->fin_data[0];
2322			nat->nat_call[1] = fin->fin_data[1];
2323			nat->nat_oport = 0; /*fin->fin_data[0];*/
2324			nat->nat_inport = 0; /*fin->fin_data[1];*/
2325			nat->nat_outport = 0; /*fin->fin_data[1];*/
2326		}
2327#endif
2328	}
2329
2330	return move;
2331}
2332
2333/* ------------------------------------------------------------------------ */
2334/* Function:    nat_new                                                     */
2335/* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2336/*                       else pointer to new NAT structure                  */
2337/* Parameters:  fin(I)       - pointer to packet information                */
2338/*              np(I)        - pointer to NAT rule                          */
2339/*              natsave(I)   - pointer to where to store NAT struct pointer */
2340/*              flags(I)     - flags describing the current packet          */
2341/*              direction(I) - direction of packet (in/out)                 */
2342/* Write Lock:  ipf_nat                                                     */
2343/*                                                                          */
2344/* Attempts to create a new NAT entry.  Does not actually change the packet */
2345/* in any way.                                                              */
2346/*                                                                          */
2347/* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2348/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2349/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2350/* and (3) building that structure and putting it into the NAT table(s).    */
2351/*                                                                          */
2352/* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2353/*       as it can result in memory being corrupted.                        */
2354/* ------------------------------------------------------------------------ */
2355nat_t *nat_new(fin, np, natsave, flags, direction)
2356fr_info_t *fin;
2357ipnat_t *np;
2358nat_t **natsave;
2359u_int flags;
2360int direction;
2361{
2362	u_short port = 0, sport = 0, dport = 0, nport = 0;
2363	tcphdr_t *tcp = NULL;
2364	hostmap_t *hm = NULL;
2365	struct in_addr in;
2366	nat_t *nat, *natl;
2367	u_int nflags;
2368	natinfo_t ni;
2369	u_32_t sumd;
2370	int move;
2371#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2372	qpktinfo_t *qpi = fin->fin_qpi;
2373#endif
2374
2375	if (nat_stats.ns_inuse >= ipf_nattable_max) {
2376		nat_stats.ns_memfail++;
2377		fr_nat_doflush = 1;
2378		return NULL;
2379	}
2380
2381	move = 1;
2382	nflags = np->in_flags & flags;
2383	nflags &= NAT_FROMRULE;
2384
2385	ni.nai_np = np;
2386	ni.nai_nflags = nflags;
2387	ni.nai_flags = flags;
2388	ni.nai_dport = 0;
2389	ni.nai_sport = 0;
2390
2391	/* Give me a new nat */
2392	KMALLOC(nat, nat_t *);
2393	if (nat == NULL) {
2394		nat_stats.ns_memfail++;
2395		/*
2396		 * Try to automatically tune the max # of entries in the
2397		 * table allowed to be less than what will cause kmem_alloc()
2398		 * to fail and try to eliminate panics due to out of memory
2399		 * conditions arising.
2400		 */
2401		if (ipf_nattable_max > ipf_nattable_sz) {
2402			ipf_nattable_max = nat_stats.ns_inuse - 100;
2403			printf("ipf_nattable_max reduced to %d\n",
2404				ipf_nattable_max);
2405		}
2406		return NULL;
2407	}
2408
2409	if (flags & IPN_TCPUDP) {
2410		tcp = fin->fin_dp;
2411		ni.nai_sport = htons(fin->fin_sport);
2412		ni.nai_dport = htons(fin->fin_dport);
2413	} else if (flags & IPN_ICMPQUERY) {
2414		/*
2415		 * In the ICMP query NAT code, we translate the ICMP id fields
2416		 * to make them unique. This is indepedent of the ICMP type
2417		 * (e.g. in the unlikely event that a host sends an echo and
2418		 * an tstamp request with the same id, both packets will have
2419		 * their ip address/id field changed in the same way).
2420		 */
2421		/* The icmp_id field is used by the sender to identify the
2422		 * process making the icmp request. (the receiver justs
2423		 * copies it back in its response). So, it closely matches
2424		 * the concept of source port. We overlay sport, so we can
2425		 * maximally reuse the existing code.
2426		 */
2427		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2428		ni.nai_dport = ni.nai_sport;
2429	}
2430
2431	bzero((char *)nat, sizeof(*nat));
2432	nat->nat_flags = flags;
2433	nat->nat_redir = np->in_redir;
2434
2435	if ((flags & NAT_SLAVE) == 0) {
2436		MUTEX_ENTER(&ipf_nat_new);
2437	}
2438
2439	/*
2440	 * Search the current table for a match.
2441	 */
2442	if (direction == NAT_OUTBOUND) {
2443		/*
2444		 * We can now arrange to call this for the same connection
2445		 * because ipf_nat_new doesn't protect the code path into
2446		 * this function.
2447		 */
2448		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2449				     fin->fin_src, fin->fin_dst);
2450		if (natl != NULL) {
2451			KFREE(nat);
2452			nat = natl;
2453			goto done;
2454		}
2455
2456		move = nat_newmap(fin, nat, &ni);
2457		if (move == -1)
2458			goto badnat;
2459
2460		np = ni.nai_np;
2461		in = ni.nai_ip;
2462	} else {
2463		/*
2464		 * NAT_INBOUND is used only for redirects rules
2465		 */
2466		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2467				    fin->fin_src, fin->fin_dst);
2468		if (natl != NULL) {
2469			KFREE(nat);
2470			nat = natl;
2471			goto done;
2472		}
2473
2474		move = nat_newrdr(fin, nat, &ni);
2475		if (move == -1)
2476			goto badnat;
2477
2478		np = ni.nai_np;
2479		in = ni.nai_ip;
2480	}
2481	port = ni.nai_port;
2482	nport = ni.nai_nport;
2483
2484	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2485		if (np->in_redir == NAT_REDIRECT) {
2486			nat_delrdr(np);
2487			nat_addrdr(np);
2488		} else if (np->in_redir == NAT_MAP) {
2489			nat_delnat(np);
2490			nat_addnat(np);
2491		}
2492	}
2493
2494	if (flags & IPN_TCPUDP) {
2495		sport = ni.nai_sport;
2496		dport = ni.nai_dport;
2497	} else if (flags & IPN_ICMPQUERY) {
2498		sport = ni.nai_sport;
2499		dport = 0;
2500	}
2501
2502	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2503	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2504#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2505	if ((flags & IPN_TCP) && dohwcksum &&
2506	    (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2507		if (direction == NAT_OUTBOUND)
2508			ni.nai_sum1 = LONG_SUM(in.s_addr);
2509		else
2510			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2511		ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2512		ni.nai_sum1 += 30;
2513		ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2514		nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2515	} else
2516#endif
2517		nat->nat_sumd[1] = nat->nat_sumd[0];
2518
2519	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2520		if (direction == NAT_OUTBOUND)
2521			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2522		else
2523			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2524
2525		ni.nai_sum2 = LONG_SUM(in.s_addr);
2526
2527		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2528		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2529	} else {
2530		nat->nat_ipsumd = nat->nat_sumd[0];
2531		if (!(flags & IPN_TCPUDPICMP)) {
2532			nat->nat_sumd[0] = 0;
2533			nat->nat_sumd[1] = 0;
2534		}
2535	}
2536
2537	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2538		fr_nat_doflush = 1;
2539		goto badnat;
2540	}
2541	if (flags & SI_WILDP)
2542		nat_stats.ns_wilds++;
2543	fin->fin_flx |= FI_NEWNAT;
2544	goto done;
2545badnat:
2546	nat_stats.ns_badnat++;
2547	if ((hm = nat->nat_hm) != NULL)
2548		fr_hostmapdel(&hm);
2549	KFREE(nat);
2550	nat = NULL;
2551done:
2552	if ((flags & NAT_SLAVE) == 0) {
2553		MUTEX_EXIT(&ipf_nat_new);
2554	}
2555	return nat;
2556}
2557
2558
2559/* ------------------------------------------------------------------------ */
2560/* Function:    nat_finalise                                                */
2561/* Returns:     int - 0 == sucess, -1 == failure                            */
2562/* Parameters:  fin(I) - pointer to packet information                      */
2563/*              nat(I) - pointer to NAT entry                               */
2564/*              ni(I)  - pointer to structure with misc. information needed */
2565/*                       to create new NAT entry.                           */
2566/* Write Lock:  ipf_nat                                                     */
2567/*                                                                          */
2568/* This is the tail end of constructing a new NAT entry and is the same     */
2569/* for both IPv4 and IPv6.                                                  */
2570/* ------------------------------------------------------------------------ */
2571/*ARGSUSED*/
2572static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2573fr_info_t *fin;
2574nat_t *nat;
2575natinfo_t *ni;
2576tcphdr_t *tcp;
2577nat_t **natsave;
2578int direction;
2579{
2580	frentry_t *fr;
2581	ipnat_t *np;
2582
2583	np = ni->nai_np;
2584
2585	if (np->in_ifps[0] != NULL) {
2586		COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2587	}
2588	if (np->in_ifps[1] != NULL) {
2589		COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2590	}
2591#ifdef	IPFILTER_SYNC
2592	if ((nat->nat_flags & SI_CLONE) == 0)
2593		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2594#endif
2595
2596	nat->nat_me = natsave;
2597	nat->nat_dir = direction;
2598	nat->nat_ifps[0] = np->in_ifps[0];
2599	nat->nat_ifps[1] = np->in_ifps[1];
2600	nat->nat_ptr = np;
2601	nat->nat_p = fin->fin_p;
2602	nat->nat_mssclamp = np->in_mssclamp;
2603	if (nat->nat_p == IPPROTO_TCP)
2604		nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2605
2606	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2607		if (appr_new(fin, nat) == -1)
2608			return -1;
2609
2610	if (nat_insert(nat, fin->fin_rev) == 0) {
2611		if (nat_logging)
2612			nat_log(nat, (u_int)np->in_redir);
2613		np->in_use++;
2614		fr = fin->fin_fr;
2615		nat->nat_fr = fr;
2616		if (fr != NULL) {
2617			MUTEX_ENTER(&fr->fr_lock);
2618			fr->fr_ref++;
2619			MUTEX_EXIT(&fr->fr_lock);
2620		}
2621		return 0;
2622	}
2623
2624	/*
2625	 * nat_insert failed, so cleanup time...
2626	 */
2627	return -1;
2628}
2629
2630
2631/* ------------------------------------------------------------------------ */
2632/* Function:   nat_insert                                                   */
2633/* Returns:    int - 0 == sucess, -1 == failure                             */
2634/* Parameters: nat(I) - pointer to NAT structure                            */
2635/*             rev(I) - flag indicating forward/reverse direction of packet */
2636/* Write Lock: ipf_nat                                                      */
2637/*                                                                          */
2638/* Insert a NAT entry into the hash tables for searching and add it to the  */
2639/* list of active NAT entries.  Adjust global counters when complete.       */
2640/* ------------------------------------------------------------------------ */
2641int	nat_insert(nat, rev)
2642nat_t	*nat;
2643int	rev;
2644{
2645	u_int hv1, hv2;
2646	nat_t **natp;
2647
2648	/*
2649	 * Try and return an error as early as possible, so calculate the hash
2650	 * entry numbers first and then proceed.
2651	 */
2652	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2653		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2654				  0xffffffff);
2655		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2656				  ipf_nattable_sz);
2657		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2658				  0xffffffff);
2659		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2660				  ipf_nattable_sz);
2661	} else {
2662		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2663		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2664		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2665		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2666	}
2667
2668	if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2669	    nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2670		return -1;
2671	}
2672
2673	nat->nat_hv[0] = hv1;
2674	nat->nat_hv[1] = hv2;
2675
2676	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2677
2678	nat->nat_rev = rev;
2679	nat->nat_ref = 1;
2680	nat->nat_bytes[0] = 0;
2681	nat->nat_pkts[0] = 0;
2682	nat->nat_bytes[1] = 0;
2683	nat->nat_pkts[1] = 0;
2684
2685	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2686	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2687
2688	if (nat->nat_ifnames[1][0] != '\0') {
2689		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2690		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2691	} else {
2692		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2693			       LIFNAMSIZ);
2694		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2695		nat->nat_ifps[1] = nat->nat_ifps[0];
2696	}
2697
2698	nat->nat_next = nat_instances;
2699	nat->nat_pnext = &nat_instances;
2700	if (nat_instances)
2701		nat_instances->nat_pnext = &nat->nat_next;
2702	nat_instances = nat;
2703
2704	natp = &nat_table[0][hv1];
2705	if (*natp)
2706		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2707	nat->nat_phnext[0] = natp;
2708	nat->nat_hnext[0] = *natp;
2709	*natp = nat;
2710	nat_stats.ns_bucketlen[0][hv1]++;
2711
2712	natp = &nat_table[1][hv2];
2713	if (*natp)
2714		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2715	nat->nat_phnext[1] = natp;
2716	nat->nat_hnext[1] = *natp;
2717	*natp = nat;
2718	nat_stats.ns_bucketlen[1][hv2]++;
2719
2720	fr_setnatqueue(nat, rev);
2721
2722	nat_stats.ns_added++;
2723	nat_stats.ns_inuse++;
2724	return 0;
2725}
2726
2727
2728/* ------------------------------------------------------------------------ */
2729/* Function:    nat_icmperrorlookup                                         */
2730/* Returns:     nat_t* - point to matching NAT structure                    */
2731/* Parameters:  fin(I) - pointer to packet information                      */
2732/*              dir(I) - direction of packet (in/out)                       */
2733/*                                                                          */
2734/* Check if the ICMP error message is related to an existing TCP, UDP or    */
2735/* ICMP query nat entry.  It is assumed that the packet is already of the   */
2736/* the required length.                                                     */
2737/* ------------------------------------------------------------------------ */
2738nat_t *nat_icmperrorlookup(fin, dir)
2739fr_info_t *fin;
2740int dir;
2741{
2742	int flags = 0, type, minlen;
2743	icmphdr_t *icmp, *orgicmp;
2744	tcphdr_t *tcp = NULL;
2745	u_short data[2];
2746	nat_t *nat;
2747	ip_t *oip;
2748	u_int p;
2749
2750	icmp = fin->fin_dp;
2751	type = icmp->icmp_type;
2752	/*
2753	 * Does it at least have the return (basic) IP header ?
2754	 * Only a basic IP header (no options) should be with an ICMP error
2755	 * header.  Also, if it's not an error type, then return.
2756	 */
2757	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2758		return NULL;
2759
2760	/*
2761	 * Check packet size
2762	 */
2763	oip = (ip_t *)((char *)fin->fin_dp + 8);
2764	minlen = IP_HL(oip) << 2;
2765	if ((minlen < sizeof(ip_t)) ||
2766	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2767		return NULL;
2768	/*
2769	 * Is the buffer big enough for all of it ?  It's the size of the IP
2770	 * header claimed in the encapsulated part which is of concern.  It
2771	 * may be too big to be in this buffer but not so big that it's
2772	 * outside the ICMP packet, leading to TCP deref's causing problems.
2773	 * This is possible because we don't know how big oip_hl is when we
2774	 * do the pullup early in fr_check() and thus can't gaurantee it is
2775	 * all here now.
2776	 */
2777#ifdef  _KERNEL
2778	{
2779	mb_t *m;
2780
2781	m = fin->fin_m;
2782# if defined(MENTAT)
2783	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2784		return NULL;
2785# else
2786	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2787	    (char *)fin->fin_ip + M_LEN(m))
2788		return NULL;
2789# endif
2790	}
2791#endif
2792
2793	if (fin->fin_daddr != oip->ip_src.s_addr)
2794		return NULL;
2795
2796	p = oip->ip_p;
2797	if (p == IPPROTO_TCP)
2798		flags = IPN_TCP;
2799	else if (p == IPPROTO_UDP)
2800		flags = IPN_UDP;
2801	else if (p == IPPROTO_ICMP) {
2802		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2803
2804		/* see if this is related to an ICMP query */
2805		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2806			data[0] = fin->fin_data[0];
2807			data[1] = fin->fin_data[1];
2808			fin->fin_data[0] = 0;
2809			fin->fin_data[1] = orgicmp->icmp_id;
2810
2811			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2812			/*
2813			 * NOTE : dir refers to the direction of the original
2814			 *        ip packet. By definition the icmp error
2815			 *        message flows in the opposite direction.
2816			 */
2817			if (dir == NAT_INBOUND)
2818				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2819						   oip->ip_src);
2820			else
2821				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2822						    oip->ip_src);
2823			fin->fin_data[0] = data[0];
2824			fin->fin_data[1] = data[1];
2825			return nat;
2826		}
2827	}
2828
2829	if (flags & IPN_TCPUDP) {
2830		minlen += 8;		/* + 64bits of data to get ports */
2831		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2832			return NULL;
2833
2834		data[0] = fin->fin_data[0];
2835		data[1] = fin->fin_data[1];
2836		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2837		fin->fin_data[0] = ntohs(tcp->th_dport);
2838		fin->fin_data[1] = ntohs(tcp->th_sport);
2839
2840		if (dir == NAT_INBOUND) {
2841			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2842					   oip->ip_src);
2843		} else {
2844			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2845					    oip->ip_src);
2846		}
2847		fin->fin_data[0] = data[0];
2848		fin->fin_data[1] = data[1];
2849		return nat;
2850	}
2851	if (dir == NAT_INBOUND)
2852		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2853	else
2854		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2855}
2856
2857
2858/* ------------------------------------------------------------------------ */
2859/* Function:    nat_icmperror                                               */
2860/* Returns:     nat_t* - point to matching NAT structure                    */
2861/* Parameters:  fin(I)    - pointer to packet information                   */
2862/*              nflags(I) - NAT flags for this packet                       */
2863/*              dir(I)    - direction of packet (in/out)                    */
2864/*                                                                          */
2865/* Fix up an ICMP packet which is an error message for an existing NAT      */
2866/* session.  This will correct both packet header data and checksums.       */
2867/*                                                                          */
2868/* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2869/* a NAT'd ICMP packet gets correctly recognised.                           */
2870/* ------------------------------------------------------------------------ */
2871nat_t *nat_icmperror(fin, nflags, dir)
2872fr_info_t *fin;
2873u_int *nflags;
2874int dir;
2875{
2876	u_32_t sum1, sum2, sumd, sumd2;
2877	struct in_addr a1, a2;
2878	int flags, dlen, odst;
2879	icmphdr_t *icmp;
2880	u_short *csump;
2881	tcphdr_t *tcp;
2882	nat_t *nat;
2883	ip_t *oip;
2884	void *dp;
2885
2886	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2887		return NULL;
2888	/*
2889	 * nat_icmperrorlookup() will return NULL for `defective' packets.
2890	 */
2891	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2892		return NULL;
2893
2894	tcp = NULL;
2895	csump = NULL;
2896	flags = 0;
2897	sumd2 = 0;
2898	*nflags = IPN_ICMPERR;
2899	icmp = fin->fin_dp;
2900	oip = (ip_t *)&icmp->icmp_ip;
2901	dp = (((char *)oip) + (IP_HL(oip) << 2));
2902	if (oip->ip_p == IPPROTO_TCP) {
2903		tcp = (tcphdr_t *)dp;
2904		csump = (u_short *)&tcp->th_sum;
2905		flags = IPN_TCP;
2906	} else if (oip->ip_p == IPPROTO_UDP) {
2907		udphdr_t *udp;
2908
2909		udp = (udphdr_t *)dp;
2910		tcp = (tcphdr_t *)dp;
2911		csump = (u_short *)&udp->uh_sum;
2912		flags = IPN_UDP;
2913	} else if (oip->ip_p == IPPROTO_ICMP)
2914		flags = IPN_ICMPQUERY;
2915	dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2916
2917	/*
2918	 * Need to adjust ICMP header to include the real IP#'s and
2919	 * port #'s.  Only apply a checksum change relative to the
2920	 * IP address change as it will be modified again in fr_checknatout
2921	 * for both address and port.  Two checksum changes are
2922	 * necessary for the two header address changes.  Be careful
2923	 * to only modify the checksum once for the port # and twice
2924	 * for the IP#.
2925	 */
2926
2927	/*
2928	 * Step 1
2929	 * Fix the IP addresses in the offending IP packet. You also need
2930	 * to adjust the IP header checksum of that offending IP packet.
2931	 *
2932	 * Normally, you would expect that the ICMP checksum of the
2933	 * ICMP error message needs to be adjusted as well for the
2934	 * IP address change in oip.
2935	 * However, this is a NOP, because the ICMP checksum is
2936	 * calculated over the complete ICMP packet, which includes the
2937	 * changed oip IP addresses and oip->ip_sum. However, these
2938	 * two changes cancel each other out (if the delta for
2939	 * the IP address is x, then the delta for ip_sum is minus x),
2940	 * so no change in the icmp_cksum is necessary.
2941	 *
2942	 * Inbound ICMP
2943	 * ------------
2944	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2945	 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2946	 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2947	 *
2948	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2949	 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2950	 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2951	 *
2952	 * Outbound ICMP
2953	 * -------------
2954	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2955	 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2956	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2957	 *
2958	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2959	 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2960	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2961	 *
2962	 */
2963	odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2964	if (odst == 1) {
2965		a1.s_addr = ntohl(nat->nat_inip.s_addr);
2966		a2.s_addr = ntohl(oip->ip_src.s_addr);
2967		oip->ip_src.s_addr = htonl(a1.s_addr);
2968	} else {
2969		a1.s_addr = ntohl(nat->nat_outip.s_addr);
2970		a2.s_addr = ntohl(oip->ip_dst.s_addr);
2971		oip->ip_dst.s_addr = htonl(a1.s_addr);
2972	}
2973
2974	sumd = a2.s_addr - a1.s_addr;
2975	if (sumd != 0) {
2976		if (a1.s_addr > a2.s_addr)
2977			sumd--;
2978		sumd = ~sumd;
2979
2980		fix_datacksum(&oip->ip_sum, sumd);
2981	}
2982
2983	sumd2 = sumd;
2984	sum1 = 0;
2985	sum2 = 0;
2986
2987	/*
2988	 * Fix UDP pseudo header checksum to compensate for the
2989	 * IP address change.
2990	 */
2991	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2992		/*
2993		 * Step 2 :
2994		 * For offending TCP/UDP IP packets, translate the ports as
2995		 * well, based on the NAT specification. Of course such
2996		 * a change may be reflected in the ICMP checksum as well.
2997		 *
2998		 * Since the port fields are part of the TCP/UDP checksum
2999		 * of the offending IP packet, you need to adjust that checksum
3000		 * as well... except that the change in the port numbers should
3001		 * be offset by the checksum change.  However, the TCP/UDP
3002		 * checksum will also need to change if there has been an
3003		 * IP address change.
3004		 */
3005		if (odst == 1) {
3006			sum1 = ntohs(nat->nat_inport);
3007			sum2 = ntohs(tcp->th_sport);
3008
3009			tcp->th_sport = htons(sum1);
3010		} else {
3011			sum1 = ntohs(nat->nat_outport);
3012			sum2 = ntohs(tcp->th_dport);
3013
3014			tcp->th_dport = htons(sum1);
3015		}
3016
3017		sumd += sum1 - sum2;
3018		if (sumd != 0 || sumd2 != 0) {
3019			/*
3020			 * At this point, sumd is the delta to apply to the
3021			 * TCP/UDP header, given the changes in both the IP
3022			 * address and the ports and sumd2 is the delta to
3023			 * apply to the ICMP header, given the IP address
3024			 * change delta that may need to be applied to the
3025			 * TCP/UDP checksum instead.
3026			 *
3027			 * If we will both the IP and TCP/UDP checksums
3028			 * then the ICMP checksum changes by the address
3029			 * delta applied to the TCP/UDP checksum.  If we
3030			 * do not change the TCP/UDP checksum them we
3031			 * apply the delta in ports to the ICMP checksum.
3032			 */
3033			if (oip->ip_p == IPPROTO_UDP) {
3034				if ((dlen >= 8) && (*csump != 0)) {
3035					fix_datacksum(csump, sumd);
3036				} else {
3037					sumd2 = sum1 - sum2;
3038					if (sum2 > sum1)
3039						sumd2--;
3040				}
3041			} else if (oip->ip_p == IPPROTO_TCP) {
3042				if (dlen >= 18) {
3043					fix_datacksum(csump, sumd);
3044				} else {
3045					sumd2 = sum2 - sum1;
3046					if (sum1 > sum2)
3047						sumd2--;
3048				}
3049			}
3050
3051			if (sumd2 != 0) {
3052				ipnat_t *np;
3053
3054				np = nat->nat_ptr;
3055				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3056				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3057				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3058
3059				if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3060				    (fin->fin_rev == 0) && (np != NULL) &&
3061				    (np->in_redir & NAT_REDIRECT)) {
3062					fix_outcksum(fin, &icmp->icmp_cksum,
3063						     sumd2);
3064				} else {
3065					fix_incksum(fin, &icmp->icmp_cksum,
3066						    sumd2);
3067				}
3068			}
3069		}
3070	} else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3071		icmphdr_t *orgicmp;
3072
3073		/*
3074		 * XXX - what if this is bogus hl and we go off the end ?
3075		 * In this case, nat_icmperrorlookup() will have returned NULL.
3076		 */
3077		orgicmp = (icmphdr_t *)dp;
3078
3079		if (odst == 1) {
3080			if (orgicmp->icmp_id != nat->nat_inport) {
3081
3082				/*
3083				 * Fix ICMP checksum (of the offening ICMP
3084				 * query packet) to compensate the change
3085				 * in the ICMP id of the offending ICMP
3086				 * packet.
3087				 *
3088				 * Since you modify orgicmp->icmp_id with
3089				 * a delta (say x) and you compensate that
3090				 * in origicmp->icmp_cksum with a delta
3091				 * minus x, you don't have to adjust the
3092				 * overall icmp->icmp_cksum
3093				 */
3094				sum1 = ntohs(orgicmp->icmp_id);
3095				sum2 = ntohs(nat->nat_inport);
3096				CALC_SUMD(sum1, sum2, sumd);
3097				orgicmp->icmp_id = nat->nat_inport;
3098				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3099			}
3100		} /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3101	}
3102	return nat;
3103}
3104
3105
3106/*
3107 * NB: these lookups don't lock access to the list, it assumed that it has
3108 * already been done!
3109 */
3110
3111/* ------------------------------------------------------------------------ */
3112/* Function:    nat_inlookup                                                */
3113/* Returns:     nat_t* - NULL == no match,                                  */
3114/*                       else pointer to matching NAT entry                 */
3115/* Parameters:  fin(I)    - pointer to packet information                   */
3116/*              flags(I)  - NAT flags for this packet                       */
3117/*              p(I)      - protocol for this packet                        */
3118/*              src(I)    - source IP address                               */
3119/*              mapdst(I) - destination IP address                          */
3120/*                                                                          */
3121/* Lookup a nat entry based on the mapped destination ip address/port and   */
3122/* real source address/port.  We use this lookup when receiving a packet,   */
3123/* we're looking for a table entry, based on the destination address.       */
3124/*                                                                          */
3125/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3126/*                                                                          */
3127/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3128/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3129/*                                                                          */
3130/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3131/*            the packet is of said protocol                                */
3132/* ------------------------------------------------------------------------ */
3133nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3134fr_info_t *fin;
3135u_int flags, p;
3136struct in_addr src , mapdst;
3137{
3138	u_short sport, dport;
3139	grehdr_t *gre;
3140	ipnat_t *ipn;
3141	u_int sflags;
3142	nat_t *nat;
3143	int nflags;
3144	u_32_t dst;
3145	void *ifp;
3146	u_int hv;
3147
3148	ifp = fin->fin_ifp;
3149	sport = 0;
3150	dport = 0;
3151	gre = NULL;
3152	dst = mapdst.s_addr;
3153	sflags = flags & NAT_TCPUDPICMP;
3154
3155	switch (p)
3156	{
3157	case IPPROTO_TCP :
3158	case IPPROTO_UDP :
3159		sport = htons(fin->fin_data[0]);
3160		dport = htons(fin->fin_data[1]);
3161		break;
3162	case IPPROTO_ICMP :
3163		if (flags & IPN_ICMPERR)
3164			sport = fin->fin_data[1];
3165		else
3166			dport = fin->fin_data[1];
3167		break;
3168	default :
3169		break;
3170	}
3171
3172
3173	if ((flags & SI_WILDP) != 0)
3174		goto find_in_wild_ports;
3175
3176	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3177	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3178	nat = nat_table[1][hv];
3179	for (; nat; nat = nat->nat_hnext[1]) {
3180		if (nat->nat_ifps[0] != NULL) {
3181			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3182				continue;
3183		} else if (ifp != NULL)
3184			nat->nat_ifps[0] = ifp;
3185
3186		nflags = nat->nat_flags;
3187
3188		if (nat->nat_oip.s_addr == src.s_addr &&
3189		    nat->nat_outip.s_addr == dst &&
3190		    (((p == 0) &&
3191		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3192		     || (p == nat->nat_p))) {
3193			switch (p)
3194			{
3195#if 0
3196			case IPPROTO_GRE :
3197				if (nat->nat_call[1] != fin->fin_data[0])
3198					continue;
3199				break;
3200#endif
3201			case IPPROTO_ICMP :
3202				if ((flags & IPN_ICMPERR) != 0) {
3203					if (nat->nat_outport != sport)
3204						continue;
3205				} else {
3206					if (nat->nat_outport != dport)
3207						continue;
3208				}
3209				break;
3210			case IPPROTO_TCP :
3211			case IPPROTO_UDP :
3212				if (nat->nat_oport != sport)
3213					continue;
3214				if (nat->nat_outport != dport)
3215					continue;
3216				break;
3217			default :
3218				break;
3219			}
3220
3221			ipn = nat->nat_ptr;
3222			if ((ipn != NULL) && (nat->nat_aps != NULL))
3223				if (appr_match(fin, nat) != 0)
3224					continue;
3225			return nat;
3226		}
3227	}
3228
3229	/*
3230	 * So if we didn't find it but there are wildcard members in the hash
3231	 * table, go back and look for them.  We do this search and update here
3232	 * because it is modifying the NAT table and we want to do this only
3233	 * for the first packet that matches.  The exception, of course, is
3234	 * for "dummy" (FI_IGNORE) lookups.
3235	 */
3236find_in_wild_ports:
3237	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3238		return NULL;
3239	if (nat_stats.ns_wilds == 0)
3240		return NULL;
3241
3242	RWLOCK_EXIT(&ipf_nat);
3243
3244	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3245	hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3246
3247	WRITE_ENTER(&ipf_nat);
3248
3249	nat = nat_table[1][hv];
3250	for (; nat; nat = nat->nat_hnext[1]) {
3251		if (nat->nat_ifps[0] != NULL) {
3252			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3253				continue;
3254		} else if (ifp != NULL)
3255			nat->nat_ifps[0] = ifp;
3256
3257		if (nat->nat_p != fin->fin_p)
3258			continue;
3259		if (nat->nat_oip.s_addr != src.s_addr ||
3260		    nat->nat_outip.s_addr != dst)
3261			continue;
3262
3263		nflags = nat->nat_flags;
3264		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3265			continue;
3266
3267		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3268			       NAT_INBOUND) == 1) {
3269			if ((fin->fin_flx & FI_IGNORE) != 0)
3270				break;
3271			if ((nflags & SI_CLONE) != 0) {
3272				nat = fr_natclone(fin, nat);
3273				if (nat == NULL)
3274					break;
3275			} else {
3276				MUTEX_ENTER(&ipf_nat_new);
3277				nat_stats.ns_wilds--;
3278				MUTEX_EXIT(&ipf_nat_new);
3279			}
3280			nat->nat_oport = sport;
3281			nat->nat_outport = dport;
3282			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3283			nat_tabmove(nat);
3284			break;
3285		}
3286	}
3287
3288	MUTEX_DOWNGRADE(&ipf_nat);
3289
3290	return nat;
3291}
3292
3293
3294/* ------------------------------------------------------------------------ */
3295/* Function:    nat_tabmove                                                 */
3296/* Returns:     Nil                                                         */
3297/* Parameters:  nat(I) - pointer to NAT structure                           */
3298/* Write Lock:  ipf_nat                                                     */
3299/*                                                                          */
3300/* This function is only called for TCP/UDP NAT table entries where the     */
3301/* original was placed in the table without hashing on the ports and we now */
3302/* want to include hashing on port numbers.                                 */
3303/* ------------------------------------------------------------------------ */
3304static void nat_tabmove(nat)
3305nat_t *nat;
3306{
3307	nat_t **natp;
3308	u_int hv;
3309
3310	if (nat->nat_flags & SI_CLONE)
3311		return;
3312
3313	/*
3314	 * Remove the NAT entry from the old location
3315	 */
3316	if (nat->nat_hnext[0])
3317		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3318	*nat->nat_phnext[0] = nat->nat_hnext[0];
3319	nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3320
3321	if (nat->nat_hnext[1])
3322		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3323	*nat->nat_phnext[1] = nat->nat_hnext[1];
3324	nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3325
3326	/*
3327	 * Add into the NAT table in the new position
3328	 */
3329	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3330	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3331			 ipf_nattable_sz);
3332	nat->nat_hv[0] = hv;
3333	natp = &nat_table[0][hv];
3334	if (*natp)
3335		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3336	nat->nat_phnext[0] = natp;
3337	nat->nat_hnext[0] = *natp;
3338	*natp = nat;
3339	nat_stats.ns_bucketlen[0][hv]++;
3340
3341	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3342	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3343			 ipf_nattable_sz);
3344	nat->nat_hv[1] = hv;
3345	natp = &nat_table[1][hv];
3346	if (*natp)
3347		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3348	nat->nat_phnext[1] = natp;
3349	nat->nat_hnext[1] = *natp;
3350	*natp = nat;
3351	nat_stats.ns_bucketlen[1][hv]++;
3352}
3353
3354
3355/* ------------------------------------------------------------------------ */
3356/* Function:    nat_outlookup                                               */
3357/* Returns:     nat_t* - NULL == no match,                                  */
3358/*                       else pointer to matching NAT entry                 */
3359/* Parameters:  fin(I)   - pointer to packet information                    */
3360/*              flags(I) - NAT flags for this packet                        */
3361/*              p(I)     - protocol for this packet                         */
3362/*              src(I)   - source IP address                                */
3363/*              dst(I)   - destination IP address                           */
3364/*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3365/*                                                                          */
3366/* Lookup a nat entry based on the source 'real' ip address/port and        */
3367/* destination address/port.  We use this lookup when sending a packet out, */
3368/* we're looking for a table entry, based on the source address.            */
3369/*                                                                          */
3370/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3371/*                                                                          */
3372/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3373/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3374/*                                                                          */
3375/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3376/*            the packet is of said protocol                                */
3377/* ------------------------------------------------------------------------ */
3378nat_t *nat_outlookup(fin, flags, p, src, dst)
3379fr_info_t *fin;
3380u_int flags, p;
3381struct in_addr src , dst;
3382{
3383	u_short sport, dport;
3384	u_int sflags;
3385	ipnat_t *ipn;
3386	u_32_t srcip;
3387	nat_t *nat;
3388	int nflags;
3389	void *ifp;
3390	u_int hv;
3391
3392	ifp = fin->fin_ifp;
3393	srcip = src.s_addr;
3394	sflags = flags & IPN_TCPUDPICMP;
3395	sport = 0;
3396	dport = 0;
3397
3398	switch (p)
3399	{
3400	case IPPROTO_TCP :
3401	case IPPROTO_UDP :
3402		sport = htons(fin->fin_data[0]);
3403		dport = htons(fin->fin_data[1]);
3404		break;
3405	case IPPROTO_ICMP :
3406		if (flags & IPN_ICMPERR)
3407			sport = fin->fin_data[1];
3408		else
3409			dport = fin->fin_data[1];
3410		break;
3411	default :
3412		break;
3413	}
3414
3415	if ((flags & SI_WILDP) != 0)
3416		goto find_out_wild_ports;
3417
3418	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3419	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3420	nat = nat_table[0][hv];
3421	for (; nat; nat = nat->nat_hnext[0]) {
3422		if (nat->nat_ifps[1] != NULL) {
3423			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3424				continue;
3425		} else if (ifp != NULL)
3426			nat->nat_ifps[1] = ifp;
3427
3428		nflags = nat->nat_flags;
3429
3430		if (nat->nat_inip.s_addr == srcip &&
3431		    nat->nat_oip.s_addr == dst.s_addr &&
3432		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3433		     || (p == nat->nat_p))) {
3434			switch (p)
3435			{
3436#if 0
3437			case IPPROTO_GRE :
3438				if (nat->nat_call[1] != fin->fin_data[0])
3439					continue;
3440				break;
3441#endif
3442			case IPPROTO_TCP :
3443			case IPPROTO_UDP :
3444				if (nat->nat_oport != dport)
3445					continue;
3446				if (nat->nat_inport != sport)
3447					continue;
3448				break;
3449			default :
3450				break;
3451			}
3452
3453			ipn = nat->nat_ptr;
3454			if ((ipn != NULL) && (nat->nat_aps != NULL))
3455				if (appr_match(fin, nat) != 0)
3456					continue;
3457			return nat;
3458		}
3459	}
3460
3461	/*
3462	 * So if we didn't find it but there are wildcard members in the hash
3463	 * table, go back and look for them.  We do this search and update here
3464	 * because it is modifying the NAT table and we want to do this only
3465	 * for the first packet that matches.  The exception, of course, is
3466	 * for "dummy" (FI_IGNORE) lookups.
3467	 */
3468find_out_wild_ports:
3469	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3470		return NULL;
3471	if (nat_stats.ns_wilds == 0)
3472		return NULL;
3473
3474	RWLOCK_EXIT(&ipf_nat);
3475
3476	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3477	hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3478
3479	WRITE_ENTER(&ipf_nat);
3480
3481	nat = nat_table[0][hv];
3482	for (; nat; nat = nat->nat_hnext[0]) {
3483		if (nat->nat_ifps[1] != NULL) {
3484			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3485				continue;
3486		} else if (ifp != NULL)
3487			nat->nat_ifps[1] = ifp;
3488
3489		if (nat->nat_p != fin->fin_p)
3490			continue;
3491		if ((nat->nat_inip.s_addr != srcip) ||
3492		    (nat->nat_oip.s_addr != dst.s_addr))
3493			continue;
3494
3495		nflags = nat->nat_flags;
3496		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3497			continue;
3498
3499		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3500			       NAT_OUTBOUND) == 1) {
3501			if ((fin->fin_flx & FI_IGNORE) != 0)
3502				break;
3503			if ((nflags & SI_CLONE) != 0) {
3504				nat = fr_natclone(fin, nat);
3505				if (nat == NULL)
3506					break;
3507			} else {
3508				MUTEX_ENTER(&ipf_nat_new);
3509				nat_stats.ns_wilds--;
3510				MUTEX_EXIT(&ipf_nat_new);
3511			}
3512			nat->nat_inport = sport;
3513			nat->nat_oport = dport;
3514			if (nat->nat_outport == 0)
3515				nat->nat_outport = sport;
3516			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3517			nat_tabmove(nat);
3518			break;
3519		}
3520	}
3521
3522	MUTEX_DOWNGRADE(&ipf_nat);
3523
3524	return nat;
3525}
3526
3527
3528/* ------------------------------------------------------------------------ */
3529/* Function:    nat_lookupredir                                             */
3530/* Returns:     nat_t* - NULL == no match,                                  */
3531/*                       else pointer to matching NAT entry                 */
3532/* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3533/*                      entry for.                                          */
3534/*                                                                          */
3535/* Lookup the NAT tables to search for a matching redirect                  */
3536/* The contents of natlookup_t should imitate those found in a packet that  */
3537/* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3538/* We can do the lookup in one of two ways, imitating an inbound or         */
3539/* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3540/* For IN, the fields are set as follows:                                   */
3541/*     nl_real* = source information                                        */
3542/*     nl_out* = destination information (translated)                       */
3543/* For an out packet, the fields are set like this:                         */
3544/*     nl_in* = source information (untranslated)                           */
3545/*     nl_out* = destination information (translated)                       */
3546/* ------------------------------------------------------------------------ */
3547nat_t *nat_lookupredir(np)
3548natlookup_t *np;
3549{
3550	fr_info_t fi;
3551	nat_t *nat;
3552
3553	bzero((char *)&fi, sizeof(fi));
3554	if (np->nl_flags & IPN_IN) {
3555		fi.fin_data[0] = ntohs(np->nl_realport);
3556		fi.fin_data[1] = ntohs(np->nl_outport);
3557	} else {
3558		fi.fin_data[0] = ntohs(np->nl_inport);
3559		fi.fin_data[1] = ntohs(np->nl_outport);
3560	}
3561	if (np->nl_flags & IPN_TCP)
3562		fi.fin_p = IPPROTO_TCP;
3563	else if (np->nl_flags & IPN_UDP)
3564		fi.fin_p = IPPROTO_UDP;
3565	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3566		fi.fin_p = IPPROTO_ICMP;
3567
3568	/*
3569	 * We can do two sorts of lookups:
3570	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3571	 * - default: we have the `in' and `out' address, look for `real'.
3572	 */
3573	if (np->nl_flags & IPN_IN) {
3574		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3575					np->nl_realip, np->nl_outip))) {
3576			np->nl_inip = nat->nat_inip;
3577			np->nl_inport = nat->nat_inport;
3578		}
3579	} else {
3580		/*
3581		 * If nl_inip is non null, this is a lookup based on the real
3582		 * ip address. Else, we use the fake.
3583		 */
3584		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3585					 np->nl_inip, np->nl_outip))) {
3586
3587			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3588				fr_info_t fin;
3589				bzero((char *)&fin, sizeof(fin));
3590				fin.fin_p = nat->nat_p;
3591				fin.fin_data[0] = ntohs(nat->nat_outport);
3592				fin.fin_data[1] = ntohs(nat->nat_oport);
3593				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3594						 nat->nat_outip,
3595						 nat->nat_oip) != NULL) {
3596					np->nl_flags &= ~IPN_FINDFORWARD;
3597				}
3598			}
3599
3600			np->nl_realip = nat->nat_outip;
3601			np->nl_realport = nat->nat_outport;
3602		}
3603 	}
3604
3605	return nat;
3606}
3607
3608
3609/* ------------------------------------------------------------------------ */
3610/* Function:    nat_match                                                   */
3611/* Returns:     int - 0 == no match, 1 == match                             */
3612/* Parameters:  fin(I)   - pointer to packet information                    */
3613/*              np(I)    - pointer to NAT rule                              */
3614/*                                                                          */
3615/* Pull the matching of a packet against a NAT rule out of that complex     */
3616/* loop inside fr_checknatin() and lay it out properly in its own function. */
3617/* ------------------------------------------------------------------------ */
3618static int nat_match(fin, np)
3619fr_info_t *fin;
3620ipnat_t *np;
3621{
3622	frtuc_t *ft;
3623
3624	if (fin->fin_v != 4)
3625		return 0;
3626
3627	if (np->in_p && fin->fin_p != np->in_p)
3628		return 0;
3629
3630	if (fin->fin_out) {
3631		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3632			return 0;
3633		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3634		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3635			return 0;
3636		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3637		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3638			return 0;
3639	} else {
3640		if (!(np->in_redir & NAT_REDIRECT))
3641			return 0;
3642		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3643		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3644			return 0;
3645		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3646		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3647			return 0;
3648	}
3649
3650	ft = &np->in_tuc;
3651	if (!(fin->fin_flx & FI_TCPUDP) ||
3652	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3653		if (ft->ftu_scmp || ft->ftu_dcmp)
3654			return 0;
3655		return 1;
3656	}
3657
3658	return fr_tcpudpchk(fin, ft);
3659}
3660
3661
3662/* ------------------------------------------------------------------------ */
3663/* Function:    nat_update                                                  */
3664/* Returns:     Nil                                                         */
3665/* Parameters:  nat(I)    - pointer to NAT structure                        */
3666/*              np(I)     - pointer to NAT rule                             */
3667/*                                                                          */
3668/* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3669/* called with fin_rev updated - i.e. after calling nat_proto().            */
3670/* ------------------------------------------------------------------------ */
3671void nat_update(fin, nat, np)
3672fr_info_t *fin;
3673nat_t *nat;
3674ipnat_t *np;
3675{
3676	ipftq_t *ifq, *ifq2;
3677	ipftqent_t *tqe;
3678
3679	MUTEX_ENTER(&nat->nat_lock);
3680	tqe = &nat->nat_tqe;
3681	ifq = tqe->tqe_ifq;
3682
3683	/*
3684	 * We allow over-riding of NAT timeouts from NAT rules, even for
3685	 * TCP, however, if it is TCP and there is no rule timeout set,
3686	 * then do not update the timeout here.
3687	 */
3688	if (np != NULL)
3689		ifq2 = np->in_tqehead[fin->fin_rev];
3690	else
3691		ifq2 = NULL;
3692
3693	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3694		u_32_t end, ack;
3695		u_char tcpflags;
3696		tcphdr_t *tcp;
3697		int dsize;
3698
3699		tcp = fin->fin_dp;
3700		tcpflags = tcp->th_flags;
3701		dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3702			((tcpflags & TH_SYN) ? 1 : 0) +
3703			((tcpflags & TH_FIN) ? 1 : 0);
3704
3705		ack = ntohl(tcp->th_ack);
3706		end = ntohl(tcp->th_seq) + dsize;
3707
3708		if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3709			nat->nat_seqnext[1 - fin->fin_rev] = ack;
3710
3711		if (nat->nat_seqnext[fin->fin_rev] == 0)
3712			nat->nat_seqnext[fin->fin_rev] = end;
3713
3714		(void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3715	} else {
3716		if (ifq2 == NULL) {
3717			if (nat->nat_p == IPPROTO_UDP)
3718				ifq2 = &nat_udptq;
3719			else if (nat->nat_p == IPPROTO_ICMP)
3720				ifq2 = &nat_icmptq;
3721			else
3722				ifq2 = &nat_iptq;
3723		}
3724
3725		fr_movequeue(tqe, ifq, ifq2);
3726	}
3727	MUTEX_EXIT(&nat->nat_lock);
3728}
3729
3730
3731/* ------------------------------------------------------------------------ */
3732/* Function:    fr_checknatout                                              */
3733/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3734/*                     0 == no packet translation occurred,                 */
3735/*                     1 == packet was successfully translated.             */
3736/* Parameters:  fin(I)   - pointer to packet information                    */
3737/*              passp(I) - pointer to filtering result flags                */
3738/*                                                                          */
3739/* Check to see if an outcoming packet should be changed.  ICMP packets are */
3740/* first checked to see if they match an existing entry (if an error),      */
3741/* otherwise a search of the current NAT table is made.  If neither results */
3742/* in a match then a search for a matching NAT rule is made.  Create a new  */
3743/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3744/* packet header(s) as required.                                            */
3745/* ------------------------------------------------------------------------ */
3746int fr_checknatout(fin, passp)
3747fr_info_t *fin;
3748u_32_t *passp;
3749{
3750	struct ifnet *ifp, *sifp;
3751	icmphdr_t *icmp = NULL;
3752	tcphdr_t *tcp = NULL;
3753	int rval, natfailed;
3754	ipnat_t *np = NULL;
3755	u_int nflags = 0;
3756	u_32_t ipa, iph;
3757	int natadd = 1;
3758	frentry_t *fr;
3759	nat_t *nat;
3760
3761	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3762		return 0;
3763
3764	natfailed = 0;
3765	fr = fin->fin_fr;
3766	sifp = fin->fin_ifp;
3767	if (fr != NULL) {
3768		ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3769		if ((ifp != NULL) && (ifp != (void *)-1))
3770			fin->fin_ifp = ifp;
3771	}
3772	ifp = fin->fin_ifp;
3773
3774	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3775		switch (fin->fin_p)
3776		{
3777		case IPPROTO_TCP :
3778			nflags = IPN_TCP;
3779			break;
3780		case IPPROTO_UDP :
3781			nflags = IPN_UDP;
3782			break;
3783		case IPPROTO_ICMP :
3784			icmp = fin->fin_dp;
3785
3786			/*
3787			 * This is an incoming packet, so the destination is
3788			 * the icmp_id and the source port equals 0
3789			 */
3790			if (nat_icmpquerytype4(icmp->icmp_type))
3791				nflags = IPN_ICMPQUERY;
3792			break;
3793		default :
3794			break;
3795		}
3796
3797		if ((nflags & IPN_TCPUDP))
3798			tcp = fin->fin_dp;
3799	}
3800
3801	ipa = fin->fin_saddr;
3802
3803	READ_ENTER(&ipf_nat);
3804
3805	if (((fin->fin_flx & FI_ICMPERR) != 0) &&
3806	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3807		/*EMPTY*/;
3808	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3809		natadd = 0;
3810	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3811				      fin->fin_src, fin->fin_dst))) {
3812		nflags = nat->nat_flags;
3813	} else {
3814		u_32_t hv, msk, nmsk;
3815
3816		/*
3817		 * If there is no current entry in the nat table for this IP#,
3818		 * create one for it (if there is a matching rule).
3819		 */
3820		RWLOCK_EXIT(&ipf_nat);
3821		msk = 0xffffffff;
3822		nmsk = nat_masks;
3823		WRITE_ENTER(&ipf_nat);
3824maskloop:
3825		iph = ipa & htonl(msk);
3826		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3827		for (np = nat_rules[hv]; np; np = np->in_mnext)
3828		{
3829			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3830				continue;
3831			if (np->in_v != fin->fin_v)
3832				continue;
3833			if (np->in_p && (np->in_p != fin->fin_p))
3834				continue;
3835			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3836				continue;
3837			if (np->in_flags & IPN_FILTER) {
3838				if (!nat_match(fin, np))
3839					continue;
3840			} else if ((ipa & np->in_inmsk) != np->in_inip)
3841				continue;
3842
3843			if ((fr != NULL) &&
3844			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3845				continue;
3846
3847			if (*np->in_plabel != '\0') {
3848				if (((np->in_flags & IPN_FILTER) == 0) &&
3849				    (np->in_dport != tcp->th_dport))
3850					continue;
3851				if (appr_ok(fin, tcp, np) == 0)
3852					continue;
3853			}
3854
3855			if ((nat = nat_new(fin, np, NULL, nflags,
3856					   NAT_OUTBOUND))) {
3857				np->in_hits++;
3858				break;
3859			} else
3860				natfailed = -1;
3861		}
3862		if ((np == NULL) && (nmsk != 0)) {
3863			while (nmsk) {
3864				msk <<= 1;
3865				if (nmsk & 0x80000000)
3866					break;
3867				nmsk <<= 1;
3868			}
3869			if (nmsk != 0) {
3870				nmsk <<= 1;
3871				goto maskloop;
3872			}
3873		}
3874		MUTEX_DOWNGRADE(&ipf_nat);
3875	}
3876
3877	if (nat != NULL) {
3878		rval = fr_natout(fin, nat, natadd, nflags);
3879		if (rval == 1) {
3880			MUTEX_ENTER(&nat->nat_lock);
3881			nat->nat_ref++;
3882			MUTEX_EXIT(&nat->nat_lock);
3883			nat->nat_touched = fr_ticks;
3884			fin->fin_nat = nat;
3885		}
3886	} else
3887		rval = natfailed;
3888	RWLOCK_EXIT(&ipf_nat);
3889
3890	if (rval == -1) {
3891		if (passp != NULL)
3892			*passp = FR_BLOCK;
3893		fin->fin_flx |= FI_BADNAT;
3894	}
3895	fin->fin_ifp = sifp;
3896	return rval;
3897}
3898
3899/* ------------------------------------------------------------------------ */
3900/* Function:    fr_natout                                                   */
3901/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3902/*                     1 == packet was successfully translated.             */
3903/* Parameters:  fin(I)    - pointer to packet information                   */
3904/*              nat(I)    - pointer to NAT structure                        */
3905/*              natadd(I) - flag indicating if it is safe to add frag cache */
3906/*              nflags(I) - NAT flags set for this packet                   */
3907/*                                                                          */
3908/* Translate a packet coming "out" on an interface.                         */
3909/* ------------------------------------------------------------------------ */
3910int fr_natout(fin, nat, natadd, nflags)
3911fr_info_t *fin;
3912nat_t *nat;
3913int natadd;
3914u_32_t nflags;
3915{
3916	icmphdr_t *icmp;
3917	u_short *csump;
3918	tcphdr_t *tcp;
3919	ipnat_t *np;
3920	int i;
3921
3922	tcp = NULL;
3923	icmp = NULL;
3924	csump = NULL;
3925	np = nat->nat_ptr;
3926
3927	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3928		(void) fr_nat_newfrag(fin, 0, nat);
3929
3930	MUTEX_ENTER(&nat->nat_lock);
3931	nat->nat_bytes[1] += fin->fin_plen;
3932	nat->nat_pkts[1]++;
3933	MUTEX_EXIT(&nat->nat_lock);
3934
3935	/*
3936	 * Fix up checksums, not by recalculating them, but
3937	 * simply computing adjustments.
3938	 * This is only done for STREAMS based IP implementations where the
3939	 * checksum has already been calculated by IP.  In all other cases,
3940	 * IPFilter is called before the checksum needs calculating so there
3941	 * is no call to modify whatever is in the header now.
3942	 */
3943	if (fin->fin_v == 4) {
3944		if (nflags == IPN_ICMPERR) {
3945			u_32_t s1, s2, sumd;
3946
3947			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3948			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3949			CALC_SUMD(s1, s2, sumd);
3950			fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3951		}
3952#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3953    defined(linux) || defined(BRIDGE_IPF)
3954		else {
3955			/*
3956			 * Strictly speaking, this isn't necessary on BSD
3957			 * kernels because they do checksum calculation after
3958			 * this code has run BUT if ipfilter is being used
3959			 * to do NAT as a bridge, that code doesn't exist.
3960			 */
3961			if (nat->nat_dir == NAT_OUTBOUND)
3962				fix_outcksum(fin, &fin->fin_ip->ip_sum,
3963					     nat->nat_ipsumd);
3964			else
3965				fix_incksum(fin, &fin->fin_ip->ip_sum,
3966					    nat->nat_ipsumd);
3967		}
3968#endif
3969	}
3970
3971	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3972		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3973			tcp = fin->fin_dp;
3974
3975			tcp->th_sport = nat->nat_outport;
3976			fin->fin_data[0] = ntohs(nat->nat_outport);
3977		}
3978
3979		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3980			icmp = fin->fin_dp;
3981			icmp->icmp_id = nat->nat_outport;
3982		}
3983
3984		csump = nat_proto(fin, nat, nflags);
3985	}
3986
3987	fin->fin_ip->ip_src = nat->nat_outip;
3988
3989	nat_update(fin, nat, np);
3990
3991	/*
3992	 * The above comments do not hold for layer 4 (or higher) checksums...
3993	 */
3994	if (csump != NULL) {
3995		if (nat->nat_dir == NAT_OUTBOUND)
3996			fix_outcksum(fin, csump, nat->nat_sumd[1]);
3997		else
3998			fix_incksum(fin, csump, nat->nat_sumd[1]);
3999	}
4000#ifdef	IPFILTER_SYNC
4001	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4002#endif
4003	/* ------------------------------------------------------------- */
4004	/* A few quick notes:						 */
4005	/*	Following are test conditions prior to calling the 	 */
4006	/*	appr_check routine.					 */
4007	/*								 */
4008	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4009	/*	with a redirect rule, we attempt to match the packet's	 */
4010	/*	source port against in_dport, otherwise	we'd compare the */
4011	/*	packet's destination.			 		 */
4012	/* ------------------------------------------------------------- */
4013	if ((np != NULL) && (np->in_apr != NULL)) {
4014		i = appr_check(fin, nat);
4015		if (i == 0)
4016			i = 1;
4017	} else
4018		i = 1;
4019	ATOMIC_INCL(nat_stats.ns_mapped[1]);
4020	fin->fin_flx |= FI_NATED;
4021	return i;
4022}
4023
4024
4025/* ------------------------------------------------------------------------ */
4026/* Function:    fr_checknatin                                               */
4027/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4028/*                     0 == no packet translation occurred,                 */
4029/*                     1 == packet was successfully translated.             */
4030/* Parameters:  fin(I)   - pointer to packet information                    */
4031/*              passp(I) - pointer to filtering result flags                */
4032/*                                                                          */
4033/* Check to see if an incoming packet should be changed.  ICMP packets are  */
4034/* first checked to see if they match an existing entry (if an error),      */
4035/* otherwise a search of the current NAT table is made.  If neither results */
4036/* in a match then a search for a matching NAT rule is made.  Create a new  */
4037/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4038/* packet header(s) as required.                                            */
4039/* ------------------------------------------------------------------------ */
4040int fr_checknatin(fin, passp)
4041fr_info_t *fin;
4042u_32_t *passp;
4043{
4044	u_int nflags, natadd;
4045	int rval, natfailed;
4046	struct ifnet *ifp;
4047	struct in_addr in;
4048	icmphdr_t *icmp;
4049	tcphdr_t *tcp;
4050	u_short dport;
4051	ipnat_t *np;
4052	nat_t *nat;
4053	u_32_t iph;
4054
4055	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4056		return 0;
4057
4058	tcp = NULL;
4059	icmp = NULL;
4060	dport = 0;
4061	natadd = 1;
4062	nflags = 0;
4063	natfailed = 0;
4064	ifp = fin->fin_ifp;
4065
4066	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4067		switch (fin->fin_p)
4068		{
4069		case IPPROTO_TCP :
4070			nflags = IPN_TCP;
4071			break;
4072		case IPPROTO_UDP :
4073			nflags = IPN_UDP;
4074			break;
4075		case IPPROTO_ICMP :
4076			icmp = fin->fin_dp;
4077
4078			/*
4079			 * This is an incoming packet, so the destination is
4080			 * the icmp_id and the source port equals 0
4081			 */
4082			if (nat_icmpquerytype4(icmp->icmp_type)) {
4083				nflags = IPN_ICMPQUERY;
4084				dport = icmp->icmp_id;
4085			} break;
4086		default :
4087			break;
4088		}
4089
4090		if ((nflags & IPN_TCPUDP)) {
4091			tcp = fin->fin_dp;
4092			dport = tcp->th_dport;
4093		}
4094	}
4095
4096	in = fin->fin_dst;
4097
4098	READ_ENTER(&ipf_nat);
4099
4100	if (((fin->fin_flx & FI_ICMPERR) != 0) &&
4101	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4102		/*EMPTY*/;
4103	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4104		natadd = 0;
4105	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4106				     fin->fin_src, in))) {
4107		nflags = nat->nat_flags;
4108	} else {
4109		u_32_t hv, msk, rmsk;
4110
4111		RWLOCK_EXIT(&ipf_nat);
4112		rmsk = rdr_masks;
4113		msk = 0xffffffff;
4114		WRITE_ENTER(&ipf_nat);
4115		/*
4116		 * If there is no current entry in the nat table for this IP#,
4117		 * create one for it (if there is a matching rule).
4118		 */
4119maskloop:
4120		iph = in.s_addr & htonl(msk);
4121		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4122		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4123			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4124				continue;
4125			if (np->in_v != fin->fin_v)
4126				continue;
4127			if (np->in_p && (np->in_p != fin->fin_p))
4128				continue;
4129			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4130				continue;
4131			if (np->in_flags & IPN_FILTER) {
4132				if (!nat_match(fin, np))
4133					continue;
4134			} else {
4135				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4136					continue;
4137				if (np->in_pmin &&
4138				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4139				     (ntohs(dport) < ntohs(np->in_pmin))))
4140					continue;
4141			}
4142
4143			if (*np->in_plabel != '\0') {
4144				if (!appr_ok(fin, tcp, np)) {
4145					continue;
4146				}
4147			}
4148
4149			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4150			if (nat != NULL) {
4151				np->in_hits++;
4152				break;
4153			} else
4154				natfailed = -1;
4155		}
4156
4157		if ((np == NULL) && (rmsk != 0)) {
4158			while (rmsk) {
4159				msk <<= 1;
4160				if (rmsk & 0x80000000)
4161					break;
4162				rmsk <<= 1;
4163			}
4164			if (rmsk != 0) {
4165				rmsk <<= 1;
4166				goto maskloop;
4167			}
4168		}
4169		MUTEX_DOWNGRADE(&ipf_nat);
4170	}
4171	if (nat != NULL) {
4172		rval = fr_natin(fin, nat, natadd, nflags);
4173		if (rval == 1) {
4174			MUTEX_ENTER(&nat->nat_lock);
4175			nat->nat_ref++;
4176			MUTEX_EXIT(&nat->nat_lock);
4177			nat->nat_touched = fr_ticks;
4178			fin->fin_nat = nat;
4179		}
4180	} else
4181		rval = natfailed;
4182	RWLOCK_EXIT(&ipf_nat);
4183
4184	if (rval == -1) {
4185		if (passp != NULL)
4186			*passp = FR_BLOCK;
4187		fin->fin_flx |= FI_BADNAT;
4188	}
4189	return rval;
4190}
4191
4192
4193/* ------------------------------------------------------------------------ */
4194/* Function:    fr_natin                                                    */
4195/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4196/*                     1 == packet was successfully translated.             */
4197/* Parameters:  fin(I)    - pointer to packet information                   */
4198/*              nat(I)    - pointer to NAT structure                        */
4199/*              natadd(I) - flag indicating if it is safe to add frag cache */
4200/*              nflags(I) - NAT flags set for this packet                   */
4201/* Locks Held:  ipf_nat (READ)                                              */
4202/*                                                                          */
4203/* Translate a packet coming "in" on an interface.                          */
4204/* ------------------------------------------------------------------------ */
4205int fr_natin(fin, nat, natadd, nflags)
4206fr_info_t *fin;
4207nat_t *nat;
4208int natadd;
4209u_32_t nflags;
4210{
4211	icmphdr_t *icmp;
4212	u_short *csump;
4213	tcphdr_t *tcp;
4214	ipnat_t *np;
4215	int i;
4216
4217	tcp = NULL;
4218	csump = NULL;
4219	np = nat->nat_ptr;
4220	fin->fin_fr = nat->nat_fr;
4221
4222	if (np != NULL) {
4223		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4224			(void) fr_nat_newfrag(fin, 0, nat);
4225
4226	/* ------------------------------------------------------------- */
4227	/* A few quick notes:						 */
4228	/*	Following are test conditions prior to calling the 	 */
4229	/*	appr_check routine.					 */
4230	/*								 */
4231	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4232	/*	with a map rule, we attempt to match the packet's	 */
4233	/*	source port against in_dport, otherwise	we'd compare the */
4234	/*	packet's destination.			 		 */
4235	/* ------------------------------------------------------------- */
4236		if (np->in_apr != NULL) {
4237			i = appr_check(fin, nat);
4238			if (i == -1) {
4239				return -1;
4240			}
4241		}
4242	}
4243
4244#ifdef	IPFILTER_SYNC
4245	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4246#endif
4247
4248	MUTEX_ENTER(&nat->nat_lock);
4249	nat->nat_bytes[0] += fin->fin_plen;
4250	nat->nat_pkts[0]++;
4251	MUTEX_EXIT(&nat->nat_lock);
4252
4253	fin->fin_ip->ip_dst = nat->nat_inip;
4254	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4255	if (nflags & IPN_TCPUDP)
4256		tcp = fin->fin_dp;
4257
4258	/*
4259	 * Fix up checksums, not by recalculating them, but
4260	 * simply computing adjustments.
4261	 * Why only do this for some platforms on inbound packets ?
4262	 * Because for those that it is done, IP processing is yet to happen
4263	 * and so the IPv4 header checksum has not yet been evaluated.
4264	 * Perhaps it should always be done for the benefit of things like
4265	 * fast forwarding (so that it doesn't need to be recomputed) but with
4266	 * header checksum offloading, perhaps it is a moot point.
4267	 */
4268#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4269     defined(__osf__) || defined(linux)
4270	if (nat->nat_dir == NAT_OUTBOUND)
4271		fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4272	else
4273		fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4274#endif
4275
4276	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4277		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4278			tcp->th_dport = nat->nat_inport;
4279			fin->fin_data[1] = ntohs(nat->nat_inport);
4280		}
4281
4282
4283		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4284			icmp = fin->fin_dp;
4285
4286			icmp->icmp_id = nat->nat_inport;
4287		}
4288
4289		csump = nat_proto(fin, nat, nflags);
4290	}
4291
4292	nat_update(fin, nat, np);
4293
4294	/*
4295	 * The above comments do not hold for layer 4 (or higher) checksums...
4296	 */
4297	if (csump != NULL) {
4298		if (nat->nat_dir == NAT_OUTBOUND)
4299			fix_incksum(fin, csump, nat->nat_sumd[0]);
4300		else
4301			fix_outcksum(fin, csump, nat->nat_sumd[0]);
4302	}
4303	ATOMIC_INCL(nat_stats.ns_mapped[0]);
4304	fin->fin_flx |= FI_NATED;
4305	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4306		fin->fin_nattag = &np->in_tag;
4307	return 1;
4308}
4309
4310
4311/* ------------------------------------------------------------------------ */
4312/* Function:    nat_proto                                                   */
4313/* Returns:     u_short* - pointer to transport header checksum to update,  */
4314/*                         NULL if the transport protocol is not recognised */
4315/*                         as needing a checksum update.                    */
4316/* Parameters:  fin(I)    - pointer to packet information                   */
4317/*              nat(I)    - pointer to NAT structure                        */
4318/*              nflags(I) - NAT flags set for this packet                   */
4319/*                                                                          */
4320/* Return the pointer to the checksum field for each protocol so understood.*/
4321/* If support for making other changes to a protocol header is required,    */
4322/* that is not strictly 'address' translation, such as clamping the MSS in  */
4323/* TCP down to a specific value, then do it from here.                      */
4324/* ------------------------------------------------------------------------ */
4325u_short *nat_proto(fin, nat, nflags)
4326fr_info_t *fin;
4327nat_t *nat;
4328u_int nflags;
4329{
4330	icmphdr_t *icmp;
4331	u_short *csump;
4332	tcphdr_t *tcp;
4333	udphdr_t *udp;
4334
4335	csump = NULL;
4336	if (fin->fin_out == 0) {
4337		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4338	} else {
4339		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4340	}
4341
4342	switch (fin->fin_p)
4343	{
4344	case IPPROTO_TCP :
4345		tcp = fin->fin_dp;
4346
4347		csump = &tcp->th_sum;
4348
4349		/*
4350		 * Do a MSS CLAMPING on a SYN packet,
4351		 * only deal IPv4 for now.
4352		 */
4353		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4354			nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4355
4356		break;
4357
4358	case IPPROTO_UDP :
4359		udp = fin->fin_dp;
4360
4361		if (udp->uh_sum)
4362			csump = &udp->uh_sum;
4363		break;
4364
4365	case IPPROTO_ICMP :
4366		icmp = fin->fin_dp;
4367
4368		if ((nflags & IPN_ICMPQUERY) != 0) {
4369			if (icmp->icmp_cksum != 0)
4370				csump = &icmp->icmp_cksum;
4371		}
4372		break;
4373	}
4374	return csump;
4375}
4376
4377
4378/* ------------------------------------------------------------------------ */
4379/* Function:    fr_natunload                                                */
4380/* Returns:     Nil                                                         */
4381/* Parameters:  Nil                                                         */
4382/*                                                                          */
4383/* Free all memory used by NAT structures allocated at runtime.             */
4384/* ------------------------------------------------------------------------ */
4385void fr_natunload()
4386{
4387	ipftq_t *ifq, *ifqnext;
4388
4389	(void) nat_clearlist();
4390	(void) nat_flushtable();
4391
4392	/*
4393	 * Proxy timeout queues are not cleaned here because although they
4394	 * exist on the NAT list, appr_unload is called after fr_natunload
4395	 * and the proxies actually are responsible for them being created.
4396	 * Should the proxy timeouts have their own list?  There's no real
4397	 * justification as this is the only complication.
4398	 */
4399	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4400		ifqnext = ifq->ifq_next;
4401		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4402		    (fr_deletetimeoutqueue(ifq) == 0))
4403			fr_freetimeoutqueue(ifq);
4404	}
4405
4406	if (nat_table[0] != NULL) {
4407		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4408		nat_table[0] = NULL;
4409	}
4410	if (nat_table[1] != NULL) {
4411		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4412		nat_table[1] = NULL;
4413	}
4414	if (nat_rules != NULL) {
4415		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4416		nat_rules = NULL;
4417	}
4418	if (rdr_rules != NULL) {
4419		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4420		rdr_rules = NULL;
4421	}
4422	if (ipf_hm_maptable != NULL) {
4423		KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4424		ipf_hm_maptable = NULL;
4425	}
4426	if (nat_stats.ns_bucketlen[0] != NULL) {
4427		KFREES(nat_stats.ns_bucketlen[0],
4428		       sizeof(u_long *) * ipf_nattable_sz);
4429		nat_stats.ns_bucketlen[0] = NULL;
4430	}
4431	if (nat_stats.ns_bucketlen[1] != NULL) {
4432		KFREES(nat_stats.ns_bucketlen[1],
4433		       sizeof(u_long *) * ipf_nattable_sz);
4434		nat_stats.ns_bucketlen[1] = NULL;
4435	}
4436
4437	if (fr_nat_maxbucket_reset == 1)
4438		fr_nat_maxbucket = 0;
4439
4440	if (fr_nat_init == 1) {
4441		fr_nat_init = 0;
4442		fr_sttab_destroy(nat_tqb);
4443
4444		RW_DESTROY(&ipf_natfrag);
4445		RW_DESTROY(&ipf_nat);
4446
4447		MUTEX_DESTROY(&ipf_nat_new);
4448		MUTEX_DESTROY(&ipf_natio);
4449
4450		MUTEX_DESTROY(&nat_udptq.ifq_lock);
4451		MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4452		MUTEX_DESTROY(&nat_iptq.ifq_lock);
4453	}
4454}
4455
4456
4457/* ------------------------------------------------------------------------ */
4458/* Function:    fr_natexpire                                                */
4459/* Returns:     Nil                                                         */
4460/* Parameters:  Nil                                                         */
4461/*                                                                          */
4462/* Check all of the timeout queues for entries at the top which need to be  */
4463/* expired.                                                                 */
4464/* ------------------------------------------------------------------------ */
4465void fr_natexpire()
4466{
4467	ipftq_t *ifq, *ifqnext;
4468	ipftqent_t *tqe, *tqn;
4469	int i;
4470	SPL_INT(s);
4471
4472	SPL_NET(s);
4473	WRITE_ENTER(&ipf_nat);
4474	for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4475		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4476			if (tqe->tqe_die > fr_ticks)
4477				break;
4478			tqn = tqe->tqe_next;
4479			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4480		}
4481	}
4482
4483	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4484		ifqnext = ifq->ifq_next;
4485
4486		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4487			if (tqe->tqe_die > fr_ticks)
4488				break;
4489			tqn = tqe->tqe_next;
4490			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4491		}
4492	}
4493
4494	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4495		ifqnext = ifq->ifq_next;
4496
4497		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4498		    (ifq->ifq_ref == 0)) {
4499			fr_freetimeoutqueue(ifq);
4500		}
4501	}
4502
4503	if (fr_nat_doflush != 0) {
4504		nat_extraflush(2);
4505		fr_nat_doflush = 0;
4506	}
4507
4508	RWLOCK_EXIT(&ipf_nat);
4509	SPL_X(s);
4510}
4511
4512
4513/* ------------------------------------------------------------------------ */
4514/* Function:    fr_natsync                                                  */
4515/* Returns:     Nil                                                         */
4516/* Parameters:  ifp(I) - pointer to network interface                       */
4517/*                                                                          */
4518/* Walk through all of the currently active NAT sessions, looking for those */
4519/* which need to have their translated address updated.                     */
4520/* ------------------------------------------------------------------------ */
4521void fr_natsync(ifp)
4522void *ifp;
4523{
4524	u_32_t sum1, sum2, sumd;
4525	struct in_addr in;
4526	ipnat_t *n;
4527	nat_t *nat;
4528	void *ifp2;
4529	SPL_INT(s);
4530
4531	if (fr_running <= 0)
4532		return;
4533
4534	/*
4535	 * Change IP addresses for NAT sessions for any protocol except TCP
4536	 * since it will break the TCP connection anyway.  The only rules
4537	 * which will get changed are those which are "map ... -> 0/32",
4538	 * where the rule specifies the address is taken from the interface.
4539	 */
4540	SPL_NET(s);
4541	WRITE_ENTER(&ipf_nat);
4542
4543	if (fr_running <= 0) {
4544		RWLOCK_EXIT(&ipf_nat);
4545		return;
4546	}
4547
4548	for (nat = nat_instances; nat; nat = nat->nat_next) {
4549		if ((nat->nat_flags & IPN_TCP) != 0)
4550			continue;
4551		n = nat->nat_ptr;
4552		if ((n == NULL) ||
4553		    (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4554			continue;
4555		if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4556		     (ifp == nat->nat_ifps[1]))) {
4557			nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4558			if (nat->nat_ifnames[1][0] != '\0') {
4559				nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4560							  4);
4561			} else
4562				nat->nat_ifps[1] = nat->nat_ifps[0];
4563			ifp2 = nat->nat_ifps[0];
4564			if (ifp2 == NULL)
4565				continue;
4566
4567			/*
4568			 * Change the map-to address to be the same as the
4569			 * new one.
4570			 */
4571			sum1 = nat->nat_outip.s_addr;
4572			if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4573				nat->nat_outip = in;
4574			sum2 = nat->nat_outip.s_addr;
4575
4576			if (sum1 == sum2)
4577				continue;
4578			/*
4579			 * Readjust the checksum adjustment to take into
4580			 * account the new IP#.
4581			 */
4582			CALC_SUMD(sum1, sum2, sumd);
4583			/* XXX - dont change for TCP when solaris does
4584			 * hardware checksumming.
4585			 */
4586			sumd += nat->nat_sumd[0];
4587			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4588			nat->nat_sumd[1] = nat->nat_sumd[0];
4589		}
4590	}
4591
4592	for (n = nat_list; (n != NULL); n = n->in_next) {
4593		if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4594			n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4595		if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4596			n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4597	}
4598	RWLOCK_EXIT(&ipf_nat);
4599	SPL_X(s);
4600}
4601
4602
4603/* ------------------------------------------------------------------------ */
4604/* Function:    nat_icmpquerytype4                                          */
4605/* Returns:     int - 1 == success, 0 == failure                            */
4606/* Parameters:  icmptype(I) - ICMP type number                              */
4607/*                                                                          */
4608/* Tests to see if the ICMP type number passed is a query/response type or  */
4609/* not.                                                                     */
4610/* ------------------------------------------------------------------------ */
4611static int nat_icmpquerytype4(icmptype)
4612int icmptype;
4613{
4614
4615	/*
4616	 * For the ICMP query NAT code, it is essential that both the query
4617	 * and the reply match on the NAT rule. Because the NAT structure
4618	 * does not keep track of the icmptype, and a single NAT structure
4619	 * is used for all icmp types with the same src, dest and id, we
4620	 * simply define the replies as queries as well. The funny thing is,
4621	 * altough it seems silly to call a reply a query, this is exactly
4622	 * as it is defined in the IPv4 specification
4623	 */
4624
4625	switch (icmptype)
4626	{
4627
4628	case ICMP_ECHOREPLY:
4629	case ICMP_ECHO:
4630	/* route aedvertisement/solliciation is currently unsupported: */
4631	/* it would require rewriting the ICMP data section            */
4632	case ICMP_TSTAMP:
4633	case ICMP_TSTAMPREPLY:
4634	case ICMP_IREQ:
4635	case ICMP_IREQREPLY:
4636	case ICMP_MASKREQ:
4637	case ICMP_MASKREPLY:
4638		return 1;
4639	default:
4640		return 0;
4641	}
4642}
4643
4644
4645/* ------------------------------------------------------------------------ */
4646/* Function:    nat_log                                                     */
4647/* Returns:     Nil                                                         */
4648/* Parameters:  nat(I)  - pointer to NAT structure                          */
4649/*              type(I) - type of log entry to create                       */
4650/*                                                                          */
4651/* Creates a NAT log entry.                                                 */
4652/* ------------------------------------------------------------------------ */
4653void nat_log(nat, type)
4654struct nat *nat;
4655u_int type;
4656{
4657#ifdef	IPFILTER_LOG
4658# ifndef LARGE_NAT
4659	struct ipnat *np;
4660	int rulen;
4661# endif
4662	struct natlog natl;
4663	void *items[1];
4664	size_t sizes[1];
4665	int types[1];
4666
4667	natl.nl_inip = nat->nat_inip;
4668	natl.nl_outip = nat->nat_outip;
4669	natl.nl_origip = nat->nat_oip;
4670	natl.nl_bytes[0] = nat->nat_bytes[0];
4671	natl.nl_bytes[1] = nat->nat_bytes[1];
4672	natl.nl_pkts[0] = nat->nat_pkts[0];
4673	natl.nl_pkts[1] = nat->nat_pkts[1];
4674	natl.nl_origport = nat->nat_oport;
4675	natl.nl_inport = nat->nat_inport;
4676	natl.nl_outport = nat->nat_outport;
4677	natl.nl_p = nat->nat_p;
4678	natl.nl_type = type;
4679	natl.nl_rule = -1;
4680# ifndef LARGE_NAT
4681	if (nat->nat_ptr != NULL) {
4682		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4683			if (np == nat->nat_ptr) {
4684				natl.nl_rule = rulen;
4685				break;
4686			}
4687	}
4688# endif
4689	items[0] = &natl;
4690	sizes[0] = sizeof(natl);
4691	types[0] = 0;
4692
4693	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4694#endif
4695}
4696
4697
4698#if defined(__OpenBSD__)
4699/* ------------------------------------------------------------------------ */
4700/* Function:    nat_ifdetach                                                */
4701/* Returns:     Nil                                                         */
4702/* Parameters:  ifp(I) - pointer to network interface                       */
4703/*                                                                          */
4704/* Compatibility interface for OpenBSD to trigger the correct updating of   */
4705/* interface references within IPFilter.                                    */
4706/* ------------------------------------------------------------------------ */
4707void nat_ifdetach(ifp)
4708void *ifp;
4709{
4710	frsync(ifp);
4711	return;
4712}
4713#endif
4714
4715
4716/* ------------------------------------------------------------------------ */
4717/* Function:    fr_ipnatderef                                               */
4718/* Returns:     Nil                                                         */
4719/* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4720/* Write Locks: ipf_nat                                                     */
4721/*                                                                          */
4722/* ------------------------------------------------------------------------ */
4723void fr_ipnatderef(inp)
4724ipnat_t **inp;
4725{
4726	ipnat_t *in;
4727
4728	in = *inp;
4729	*inp = NULL;
4730	in->in_space++;
4731	in->in_use--;
4732	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4733		if (in->in_apr)
4734			appr_free(in->in_apr);
4735		MUTEX_DESTROY(&in->in_lock);
4736		KFREE(in);
4737		nat_stats.ns_rules--;
4738#if SOLARIS && !defined(_INET_IP_STACK_H)
4739		if (nat_stats.ns_rules == 0)
4740			pfil_delayed_copy = 1;
4741#endif
4742	}
4743}
4744
4745
4746/* ------------------------------------------------------------------------ */
4747/* Function:    fr_natderef                                                 */
4748/* Returns:     Nil                                                         */
4749/* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4750/*                                                                          */
4751/* Decrement the reference counter for this NAT table entry and free it if  */
4752/* there are no more things using it.                                       */
4753/*                                                                          */
4754/* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4755/* structure *because* it only gets called on paths _after_ nat_ref has been*/
4756/* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4757/* because nat_delete() will do that and send nat_ref to -1.                */
4758/*                                                                          */
4759/* Holding the lock on nat_lock is required to serialise nat_delete() being */
4760/* called from a NAT flush ioctl with a deref happening because of a packet.*/
4761/* ------------------------------------------------------------------------ */
4762void fr_natderef(natp)
4763nat_t **natp;
4764{
4765	nat_t *nat;
4766
4767	nat = *natp;
4768	*natp = NULL;
4769
4770	MUTEX_ENTER(&nat->nat_lock);
4771	if (nat->nat_ref > 1) {
4772		nat->nat_ref--;
4773		MUTEX_EXIT(&nat->nat_lock);
4774		return;
4775	}
4776	MUTEX_EXIT(&nat->nat_lock);
4777
4778	WRITE_ENTER(&ipf_nat);
4779	nat_delete(nat, NL_EXPIRE);
4780	RWLOCK_EXIT(&ipf_nat);
4781}
4782
4783
4784/* ------------------------------------------------------------------------ */
4785/* Function:    fr_natclone                                                 */
4786/* Returns:     ipstate_t* - NULL == cloning failed,                        */
4787/*                           else pointer to new state structure            */
4788/* Parameters:  fin(I) - pointer to packet information                      */
4789/*              is(I)  - pointer to master state structure                  */
4790/* Write Lock:  ipf_nat                                                     */
4791/*                                                                          */
4792/* Create a "duplcate" state table entry from the master.                   */
4793/* ------------------------------------------------------------------------ */
4794static nat_t *fr_natclone(fin, nat)
4795fr_info_t *fin;
4796nat_t *nat;
4797{
4798	frentry_t *fr;
4799	nat_t *clone;
4800	ipnat_t *np;
4801
4802	KMALLOC(clone, nat_t *);
4803	if (clone == NULL)
4804		return NULL;
4805	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4806
4807	MUTEX_NUKE(&clone->nat_lock);
4808
4809	clone->nat_aps = NULL;
4810	/*
4811	 * Initialize all these so that nat_delete() doesn't cause a crash.
4812	 */
4813	clone->nat_tqe.tqe_pnext = NULL;
4814	clone->nat_tqe.tqe_next = NULL;
4815	clone->nat_tqe.tqe_ifq = NULL;
4816	clone->nat_tqe.tqe_parent = clone;
4817
4818	clone->nat_flags &= ~SI_CLONE;
4819	clone->nat_flags |= SI_CLONED;
4820
4821	if (clone->nat_hm)
4822		clone->nat_hm->hm_ref++;
4823
4824	if (nat_insert(clone, fin->fin_rev) == -1) {
4825		KFREE(clone);
4826		return NULL;
4827	}
4828	np = clone->nat_ptr;
4829	if (np != NULL) {
4830		if (nat_logging)
4831			nat_log(clone, (u_int)np->in_redir);
4832		np->in_use++;
4833	}
4834	fr = clone->nat_fr;
4835	if (fr != NULL) {
4836		MUTEX_ENTER(&fr->fr_lock);
4837		fr->fr_ref++;
4838		MUTEX_EXIT(&fr->fr_lock);
4839	}
4840
4841	/*
4842	 * Because the clone is created outside the normal loop of things and
4843	 * TCP has special needs in terms of state, initialise the timeout
4844	 * state of the new NAT from here.
4845	 */
4846	if (clone->nat_p == IPPROTO_TCP) {
4847		(void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4848				  clone->nat_flags);
4849	}
4850#ifdef	IPFILTER_SYNC
4851	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4852#endif
4853	if (nat_logging)
4854		nat_log(clone, NL_CLONE);
4855	return clone;
4856}
4857
4858
4859/* ------------------------------------------------------------------------ */
4860/* Function:   nat_wildok                                                   */
4861/* Returns:    int - 1 == packet's ports match wildcards                    */
4862/*                   0 == packet's ports don't match wildcards              */
4863/* Parameters: nat(I)   - NAT entry                                         */
4864/*             sport(I) - source port                                       */
4865/*             dport(I) - destination port                                  */
4866/*             flags(I) - wildcard flags                                    */
4867/*             dir(I)   - packet direction                                  */
4868/*                                                                          */
4869/* Use NAT entry and packet direction to determine which combination of     */
4870/* wildcard flags should be used.                                           */
4871/* ------------------------------------------------------------------------ */
4872static int nat_wildok(nat, sport, dport, flags, dir)
4873nat_t *nat;
4874int sport;
4875int dport;
4876int flags;
4877int dir;
4878{
4879	/*
4880	 * When called by       dir is set to
4881	 * nat_inlookup         NAT_INBOUND (0)
4882	 * nat_outlookup        NAT_OUTBOUND (1)
4883	 *
4884	 * We simply combine the packet's direction in dir with the original
4885	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4886	 * which combination of wildcard flags to allow.
4887	 */
4888
4889	switch ((dir << 1) | nat->nat_dir)
4890	{
4891	case 3: /* outbound packet / outbound entry */
4892		if (((nat->nat_inport == sport) ||
4893		    (flags & SI_W_SPORT)) &&
4894		    ((nat->nat_oport == dport) ||
4895		    (flags & SI_W_DPORT)))
4896			return 1;
4897		break;
4898	case 2: /* outbound packet / inbound entry */
4899		if (((nat->nat_outport == sport) ||
4900		    (flags & SI_W_DPORT)) &&
4901		    ((nat->nat_oport == dport) ||
4902		    (flags & SI_W_SPORT)))
4903			return 1;
4904		break;
4905	case 1: /* inbound packet / outbound entry */
4906		if (((nat->nat_oport == sport) ||
4907		    (flags & SI_W_DPORT)) &&
4908		    ((nat->nat_outport == dport) ||
4909		    (flags & SI_W_SPORT)))
4910			return 1;
4911		break;
4912	case 0: /* inbound packet / inbound entry */
4913		if (((nat->nat_oport == sport) ||
4914		    (flags & SI_W_SPORT)) &&
4915		    ((nat->nat_outport == dport) ||
4916		    (flags & SI_W_DPORT)))
4917			return 1;
4918		break;
4919	default:
4920		break;
4921	}
4922
4923	return(0);
4924}
4925
4926
4927/* ------------------------------------------------------------------------ */
4928/* Function:    nat_mssclamp                                                */
4929/* Returns:     Nil                                                         */
4930/* Parameters:  tcp(I)    - pointer to TCP header                           */
4931/*              maxmss(I) - value to clamp the TCP MSS to                   */
4932/*              fin(I)    - pointer to packet information                   */
4933/*              csump(I)  - pointer to TCP checksum                         */
4934/*                                                                          */
4935/* Check for MSS option and clamp it if necessary.  If found and changed,   */
4936/* then the TCP header checksum will be updated to reflect the change in    */
4937/* the MSS.                                                                 */
4938/* ------------------------------------------------------------------------ */
4939static void nat_mssclamp(tcp, maxmss, fin, csump)
4940tcphdr_t *tcp;
4941u_32_t maxmss;
4942fr_info_t *fin;
4943u_short *csump;
4944{
4945	u_char *cp, *ep, opt;
4946	int hlen, advance;
4947	u_32_t mss, sumd;
4948
4949	hlen = TCP_OFF(tcp) << 2;
4950	if (hlen > sizeof(*tcp)) {
4951		cp = (u_char *)tcp + sizeof(*tcp);
4952		ep = (u_char *)tcp + hlen;
4953
4954		while (cp < ep) {
4955			opt = cp[0];
4956			if (opt == TCPOPT_EOL)
4957				break;
4958			else if (opt == TCPOPT_NOP) {
4959				cp++;
4960				continue;
4961			}
4962
4963			if (cp + 1 >= ep)
4964				break;
4965			advance = cp[1];
4966			if ((cp + advance > ep) || (advance <= 0))
4967				break;
4968			switch (opt)
4969			{
4970			case TCPOPT_MAXSEG:
4971				if (advance != 4)
4972					break;
4973				mss = cp[2] * 256 + cp[3];
4974				if (mss > maxmss) {
4975					cp[2] = maxmss / 256;
4976					cp[3] = maxmss & 0xff;
4977					CALC_SUMD(mss, maxmss, sumd);
4978					fix_outcksum(fin, csump, sumd);
4979				}
4980				break;
4981			default:
4982				/* ignore unknown options */
4983				break;
4984			}
4985
4986			cp += advance;
4987		}
4988	}
4989}
4990
4991
4992/* ------------------------------------------------------------------------ */
4993/* Function:    fr_setnatqueue                                              */
4994/* Returns:     Nil                                                         */
4995/* Parameters:  nat(I)- pointer to NAT structure                            */
4996/*              rev(I) - forward(0) or reverse(1) direction                 */
4997/* Locks:       ipf_nat (read or write)                                     */
4998/*                                                                          */
4999/* Put the NAT entry on its default queue entry, using rev as a helped in   */
5000/* determining which queue it should be placed on.                          */
5001/* ------------------------------------------------------------------------ */
5002void fr_setnatqueue(nat, rev)
5003nat_t *nat;
5004int rev;
5005{
5006	ipftq_t *oifq, *nifq;
5007
5008	if (nat->nat_ptr != NULL)
5009		nifq = nat->nat_ptr->in_tqehead[rev];
5010	else
5011		nifq = NULL;
5012
5013	if (nifq == NULL) {
5014		switch (nat->nat_p)
5015		{
5016		case IPPROTO_UDP :
5017			nifq = &nat_udptq;
5018			break;
5019		case IPPROTO_ICMP :
5020			nifq = &nat_icmptq;
5021			break;
5022		case IPPROTO_TCP :
5023			nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5024			break;
5025		default :
5026			nifq = &nat_iptq;
5027			break;
5028		}
5029	}
5030
5031	oifq = nat->nat_tqe.tqe_ifq;
5032	/*
5033	 * If it's currently on a timeout queue, move it from one queue to
5034	 * another, else put it on the end of the newly determined queue.
5035	 */
5036	if (oifq != NULL)
5037		fr_movequeue(&nat->nat_tqe, oifq, nifq);
5038	else
5039		fr_queueappend(&nat->nat_tqe, nifq, nat);
5040	return;
5041}
5042
5043
5044/* ------------------------------------------------------------------------ */
5045/* Function:    nat_getnext                                                 */
5046/* Returns:     int - 0 == ok, else error                                   */
5047/* Parameters:  t(I)   - pointer to ipftoken structure                      */
5048/*              itp(I) - pointer to ipfgeniter_t structure                  */
5049/*                                                                          */
5050/* Fetch the next nat/ipnat structure pointer from the linked list and      */
5051/* copy it out to the storage space pointed to by itp_data.  The next item  */
5052/* in the list to look at is put back in the ipftoken struture.             */
5053/* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5054/* ipf_freetoken will call a deref function for us and we dont want to call */
5055/* that twice (second time would be in the second switch statement below.   */
5056/* ------------------------------------------------------------------------ */
5057static int nat_getnext(t, itp)
5058ipftoken_t *t;
5059ipfgeniter_t *itp;
5060{
5061	hostmap_t *hm, *nexthm = NULL, zerohm;
5062	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5063	nat_t *nat, *nextnat = NULL, zeronat;
5064	int error = 0, count;
5065	char *dst;
5066
5067	count = itp->igi_nitems;
5068	if (count < 1)
5069		return ENOSPC;
5070
5071	READ_ENTER(&ipf_nat);
5072
5073	switch (itp->igi_type)
5074	{
5075	case IPFGENITER_HOSTMAP :
5076		hm = t->ipt_data;
5077		if (hm == NULL) {
5078			nexthm = ipf_hm_maplist;
5079		} else {
5080			nexthm = hm->hm_next;
5081		}
5082		break;
5083
5084	case IPFGENITER_IPNAT :
5085		ipn = t->ipt_data;
5086		if (ipn == NULL) {
5087			nextipnat = nat_list;
5088		} else {
5089			nextipnat = ipn->in_next;
5090		}
5091		break;
5092
5093	case IPFGENITER_NAT :
5094		nat = t->ipt_data;
5095		if (nat == NULL) {
5096			nextnat = nat_instances;
5097		} else {
5098			nextnat = nat->nat_next;
5099		}
5100		break;
5101	default :
5102		RWLOCK_EXIT(&ipf_nat);
5103		return EINVAL;
5104	}
5105
5106	dst = itp->igi_data;
5107	for (;;) {
5108		switch (itp->igi_type)
5109		{
5110		case IPFGENITER_HOSTMAP :
5111			if (nexthm != NULL) {
5112				if (count == 1) {
5113					ATOMIC_INC32(nexthm->hm_ref);
5114					t->ipt_data = nexthm;
5115				}
5116			} else {
5117				bzero(&zerohm, sizeof(zerohm));
5118				nexthm = &zerohm;
5119				count = 1;
5120				t->ipt_data = NULL;
5121			}
5122			break;
5123
5124		case IPFGENITER_IPNAT :
5125			if (nextipnat != NULL) {
5126				if (count == 1) {
5127					MUTEX_ENTER(&nextipnat->in_lock);
5128					nextipnat->in_use++;
5129					MUTEX_EXIT(&nextipnat->in_lock);
5130					t->ipt_data = nextipnat;
5131				}
5132			} else {
5133				bzero(&zeroipn, sizeof(zeroipn));
5134				nextipnat = &zeroipn;
5135				count = 1;
5136				t->ipt_data = NULL;
5137			}
5138			break;
5139
5140		case IPFGENITER_NAT :
5141			if (nextnat != NULL) {
5142				if (count == 1) {
5143					MUTEX_ENTER(&nextnat->nat_lock);
5144					nextnat->nat_ref++;
5145					MUTEX_EXIT(&nextnat->nat_lock);
5146					t->ipt_data = nextnat;
5147				}
5148			} else {
5149				bzero(&zeronat, sizeof(zeronat));
5150				nextnat = &zeronat;
5151				count = 1;
5152				t->ipt_data = NULL;
5153			}
5154			break;
5155		default :
5156			break;
5157		}
5158		RWLOCK_EXIT(&ipf_nat);
5159
5160		/*
5161		 * Copying out to user space needs to be done without the lock.
5162		 */
5163		switch (itp->igi_type)
5164		{
5165		case IPFGENITER_HOSTMAP :
5166			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5167			if (error != 0)
5168				error = EFAULT;
5169			else
5170				dst += sizeof(*nexthm);
5171			break;
5172
5173		case IPFGENITER_IPNAT :
5174			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5175			if (error != 0)
5176				error = EFAULT;
5177			else
5178				dst += sizeof(*nextipnat);
5179			break;
5180
5181		case IPFGENITER_NAT :
5182			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5183			if (error != 0)
5184				error = EFAULT;
5185			else
5186				dst += sizeof(*nextnat);
5187			break;
5188		}
5189
5190		if ((count == 1) || (error != 0))
5191			break;
5192
5193		count--;
5194
5195		READ_ENTER(&ipf_nat);
5196
5197		/*
5198		 * We need to have the lock again here to make sure that
5199		 * using _next is consistent.
5200		 */
5201		switch (itp->igi_type)
5202		{
5203		case IPFGENITER_HOSTMAP :
5204			nexthm = nexthm->hm_next;
5205			break;
5206		case IPFGENITER_IPNAT :
5207			nextipnat = nextipnat->in_next;
5208			break;
5209		case IPFGENITER_NAT :
5210			nextnat = nextnat->nat_next;
5211			break;
5212		}
5213	}
5214
5215
5216	switch (itp->igi_type)
5217	{
5218	case IPFGENITER_HOSTMAP :
5219		if (hm != NULL) {
5220			WRITE_ENTER(&ipf_nat);
5221			fr_hostmapdel(&hm);
5222			RWLOCK_EXIT(&ipf_nat);
5223		}
5224		break;
5225	case IPFGENITER_IPNAT :
5226		if (ipn != NULL) {
5227			fr_ipnatderef(&ipn);
5228		}
5229		break;
5230	case IPFGENITER_NAT :
5231		if (nat != NULL) {
5232			fr_natderef(&nat);
5233		}
5234		break;
5235	default :
5236		break;
5237	}
5238
5239	return error;
5240}
5241
5242
5243/* ------------------------------------------------------------------------ */
5244/* Function:    nat_iterator                                                */
5245/* Returns:     int - 0 == ok, else error                                   */
5246/* Parameters:  token(I) - pointer to ipftoken structure                    */
5247/*              itp(I) - pointer to ipfgeniter_t structure                  */
5248/*                                                                          */
5249/* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5250/* generic structure to iterate through a list.  There are three different  */
5251/* linked lists of NAT related information to go through: NAT rules, active */
5252/* NAT mappings and the NAT fragment cache.                                 */
5253/* ------------------------------------------------------------------------ */
5254static int nat_iterator(token, itp)
5255ipftoken_t *token;
5256ipfgeniter_t *itp;
5257{
5258	int error;
5259
5260	if (itp->igi_data == NULL)
5261		return EFAULT;
5262
5263	token->ipt_subtype = itp->igi_type;
5264
5265	switch (itp->igi_type)
5266	{
5267	case IPFGENITER_HOSTMAP :
5268	case IPFGENITER_IPNAT :
5269	case IPFGENITER_NAT :
5270		error = nat_getnext(token, itp);
5271		break;
5272
5273	case IPFGENITER_NATFRAG :
5274#ifdef USE_MUTEXES
5275		error = fr_nextfrag(token, itp, &ipfr_natlist,
5276				    &ipfr_nattail, &ipf_natfrag);
5277#else
5278		error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5279#endif
5280		break;
5281	default :
5282		error = EINVAL;
5283		break;
5284	}
5285
5286	return error;
5287}
5288
5289
5290/* ------------------------------------------------------------------------ */
5291/* Function:    nat_extraflush                                              */
5292/* Returns:     int - 0 == success, -1 == failure                           */
5293/* Parameters:  which(I) - how to flush the active NAT table                */
5294/* Write Locks: ipf_nat                                                     */
5295/*                                                                          */
5296/* Flush nat tables.  Three actions currently defined:                      */
5297/* which == 0 : flush all nat table entries                                 */
5298/* which == 1 : flush TCP connections which have started to close but are   */
5299/*	      stuck for some reason.                                        */
5300/* which == 2 : flush TCP connections which have been idle for a long time, */
5301/*	      starting at > 4 days idle and working back in successive half-*/
5302/*	      days to at most 12 hours old.  If this fails to free enough   */
5303/*            slots then work backwards in half hour slots to 30 minutes.   */
5304/*            If that too fails, then work backwards in 30 second intervals */
5305/*            for the last 30 minutes to at worst 30 seconds idle.          */
5306/* ------------------------------------------------------------------------ */
5307static int nat_extraflush(which)
5308int which;
5309{
5310	ipftq_t *ifq, *ifqnext;
5311	nat_t *nat, **natp;
5312	ipftqent_t *tqn;
5313	int removed;
5314	SPL_INT(s);
5315
5316	removed = 0;
5317
5318	SPL_NET(s);
5319
5320	switch (which)
5321	{
5322	case 0 :
5323		/*
5324		 * Style 0 flush removes everything...
5325		 */
5326		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5327			nat_delete(nat, NL_FLUSH);
5328			removed++;
5329		}
5330		break;
5331
5332	case 1 :
5333		/*
5334		 * Since we're only interested in things that are closing,
5335		 * we can start with the appropriate timeout queue.
5336		 */
5337		for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5338		     ifq = ifq->ifq_next) {
5339
5340			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5341				nat = tqn->tqe_parent;
5342				tqn = tqn->tqe_next;
5343				if (nat->nat_p != IPPROTO_TCP)
5344					break;
5345				nat_delete(nat, NL_EXPIRE);
5346				removed++;
5347			}
5348		}
5349
5350		/*
5351		 * Also need to look through the user defined queues.
5352		 */
5353		for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5354			ifqnext = ifq->ifq_next;
5355			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5356				nat = tqn->tqe_parent;
5357				tqn = tqn->tqe_next;
5358				if (nat->nat_p != IPPROTO_TCP)
5359					continue;
5360
5361				if ((nat->nat_tcpstate[0] >
5362				     IPF_TCPS_ESTABLISHED) &&
5363				    (nat->nat_tcpstate[1] >
5364				     IPF_TCPS_ESTABLISHED)) {
5365					nat_delete(nat, NL_EXPIRE);
5366					removed++;
5367				}
5368			}
5369		}
5370		break;
5371
5372		/*
5373		 * Args 5-11 correspond to flushing those particular states
5374		 * for TCP connections.
5375		 */
5376	case IPF_TCPS_CLOSE_WAIT :
5377	case IPF_TCPS_FIN_WAIT_1 :
5378	case IPF_TCPS_CLOSING :
5379	case IPF_TCPS_LAST_ACK :
5380	case IPF_TCPS_FIN_WAIT_2 :
5381	case IPF_TCPS_TIME_WAIT :
5382	case IPF_TCPS_CLOSED :
5383		tqn = nat_tqb[which].ifq_head;
5384		while (tqn != NULL) {
5385			nat = tqn->tqe_parent;
5386			tqn = tqn->tqe_next;
5387			nat_delete(nat, NL_FLUSH);
5388			removed++;
5389		}
5390		break;
5391
5392	default :
5393		if (which < 30)
5394			break;
5395
5396		/*
5397		 * Take a large arbitrary number to mean the number of seconds
5398		 * for which which consider to be the maximum value we'll allow
5399		 * the expiration to be.
5400		 */
5401		which = IPF_TTLVAL(which);
5402		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5403			if (fr_ticks - nat->nat_touched > which) {
5404				nat_delete(nat, NL_FLUSH);
5405				removed++;
5406			} else
5407				natp = &nat->nat_next;
5408		}
5409		break;
5410	}
5411
5412	if (which != 2) {
5413		SPL_X(s);
5414		return removed;
5415	}
5416
5417	/*
5418	 * Asked to remove inactive entries because the table is full.
5419	 */
5420	if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5421		nat_last_force_flush = fr_ticks;
5422		removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5423	}
5424
5425	SPL_X(s);
5426	return removed;
5427}
5428
5429
5430/* ------------------------------------------------------------------------ */
5431/* Function:    nat_flush_entry                                             */
5432/* Returns:     0 - always succeeds                                         */
5433/* Parameters:  entry(I) - pointer to NAT entry                             */
5434/* Write Locks: ipf_nat                                                     */
5435/*                                                                          */
5436/* This function is a stepping stone between ipf_queueflush() and           */
5437/* nat_dlete().  It is used so we can provide a uniform interface via the   */
5438/* ipf_queueflush() function.  Since the nat_delete() function returns void */
5439/* we translate that to mean it always succeeds in deleting something.      */
5440/* ------------------------------------------------------------------------ */
5441static int nat_flush_entry(entry)
5442void *entry;
5443{
5444	nat_delete(entry, NL_FLUSH);
5445	return 0;
5446}
5447
5448
5449/* ------------------------------------------------------------------------ */
5450/* Function:    nat_gettable                                                */
5451/* Returns:     int     - 0 = success, else error                           */
5452/* Parameters:  data(I) - pointer to ioctl data                             */
5453/*                                                                          */
5454/* This function handles ioctl requests for tables of nat information.      */
5455/* At present the only table it deals with is the hash bucket statistics.   */
5456/* ------------------------------------------------------------------------ */
5457static int nat_gettable(data)
5458char *data;
5459{
5460	ipftable_t table;
5461	int error;
5462
5463	error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5464	if (error != 0)
5465		return error;
5466
5467	switch (table.ita_type)
5468	{
5469	case IPFTABLE_BUCKETS_NATIN :
5470		error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5471				ipf_nattable_sz * sizeof(u_long));
5472		break;
5473
5474	case IPFTABLE_BUCKETS_NATOUT :
5475		error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5476				ipf_nattable_sz * sizeof(u_long));
5477		break;
5478
5479	default :
5480		return EINVAL;
5481	}
5482
5483	if (error != 0) {
5484		error = EFAULT;
5485	}
5486	return error;
5487}
5488