ip_nat.c revision 145522
1/*	$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 145522 2005-04-25 18:43:14Z darrenr $	*/
2
3/*
4 * Copyright (C) 1995-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#if defined(KERNEL) || defined(_KERNEL)
9# undef KERNEL
10# undef _KERNEL
11# define        KERNEL	1
12# define        _KERNEL	1
13#endif
14#include <sys/errno.h>
15#include <sys/types.h>
16#include <sys/param.h>
17#include <sys/time.h>
18#include <sys/file.h>
19#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
20    defined(_KERNEL)
21# include "opt_ipfilter_log.h"
22#endif
23#if !defined(_KERNEL)
24# include <stdio.h>
25# include <string.h>
26# include <stdlib.h>
27# define _KERNEL
28# ifdef __OpenBSD__
29struct file;
30# endif
31# include <sys/uio.h>
32# undef _KERNEL
33#endif
34#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
35# include <sys/filio.h>
36# include <sys/fcntl.h>
37#else
38# include <sys/ioctl.h>
39#endif
40#include <sys/fcntl.h>
41#if !defined(linux)
42# include <sys/protosw.h>
43#endif
44#include <sys/socket.h>
45#if defined(_KERNEL)
46# include <sys/systm.h>
47# if !defined(__SVR4) && !defined(__svr4__)
48#  include <sys/mbuf.h>
49# endif
50#endif
51#if defined(__SVR4) || defined(__svr4__)
52# include <sys/filio.h>
53# include <sys/byteorder.h>
54# ifdef _KERNEL
55#  include <sys/dditypes.h>
56# endif
57# include <sys/stream.h>
58# include <sys/kmem.h>
59#endif
60#if __FreeBSD_version >= 300000
61# include <sys/queue.h>
62#endif
63#include <net/if.h>
64#if __FreeBSD_version >= 300000
65# include <net/if_var.h>
66# if defined(_KERNEL) && !defined(IPFILTER_LKM)
67#  include "opt_ipfilter.h"
68# endif
69#endif
70#ifdef sun
71# include <net/af.h>
72#endif
73#include <net/route.h>
74#include <netinet/in.h>
75#include <netinet/in_systm.h>
76#include <netinet/ip.h>
77
78#ifdef RFC1825
79# include <vpn/md5.h>
80# include <vpn/ipsec.h>
81extern struct ifnet vpnif;
82#endif
83
84#if !defined(linux)
85# include <netinet/ip_var.h>
86#endif
87#include <netinet/tcp.h>
88#include <netinet/udp.h>
89#include <netinet/ip_icmp.h>
90#include "netinet/ip_compat.h"
91#include <netinet/tcpip.h>
92#include "netinet/ip_fil.h"
93#include "netinet/ip_nat.h"
94#include "netinet/ip_frag.h"
95#include "netinet/ip_state.h"
96#include "netinet/ip_proxy.h"
97#ifdef	IPFILTER_SYNC
98#include "netinet/ip_sync.h"
99#endif
100#if (__FreeBSD_version >= 300000)
101# include <sys/malloc.h>
102#endif
103/* END OF INCLUDES */
104
105#undef	SOCKADDR_IN
106#define	SOCKADDR_IN	struct sockaddr_in
107
108#if !defined(lint)
109static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
110static const char rcsid[] = "@(#)$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_nat.c 145522 2005-04-25 18:43:14Z darrenr $";
111static const char rcsid[] = "@(#)Id: ip_nat.c,v 2.195.2.38 2005/03/28 11:09:54 darrenr Exp";
112#endif
113
114
115/* ======================================================================== */
116/* How the NAT is organised and works.                                      */
117/*                                                                          */
118/* Inside (interface y) NAT       Outside (interface x)                     */
119/* -------------------- -+- -------------------------------------           */
120/* Packet going          |   out, processsed by fr_checknatout() for x      */
121/* ------------>         |   ------------>                                  */
122/* src=10.1.1.1          |   src=192.1.1.1                                  */
123/*                       |                                                  */
124/*                       |   in, processed by fr_checknatin() for x         */
125/* <------------         |   <------------                                  */
126/* dst=10.1.1.1          |   dst=192.1.1.1                                  */
127/* -------------------- -+- -------------------------------------           */
128/* fr_checknatout() - changes ip_src and if required, sport                 */
129/*             - creates a new mapping, if required.                        */
130/* fr_checknatin()  - changes ip_dst and if required, dport                 */
131/*                                                                          */
132/* In the NAT table, internal source is recorded as "in" and externally     */
133/* seen as "out".                                                           */
134/* ======================================================================== */
135
136
137nat_t	**nat_table[2] = { NULL, NULL },
138	*nat_instances = NULL;
139ipnat_t	*nat_list = NULL;
140u_int	ipf_nattable_max = NAT_TABLE_MAX;
141u_int	ipf_nattable_sz = NAT_TABLE_SZ;
142u_int	ipf_natrules_sz = NAT_SIZE;
143u_int	ipf_rdrrules_sz = RDR_SIZE;
144u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
145u_int	fr_nat_maxbucket = 0,
146	fr_nat_maxbucket_reset = 1;
147u_32_t	nat_masks = 0;
148u_32_t	rdr_masks = 0;
149ipnat_t	**nat_rules = NULL;
150ipnat_t	**rdr_rules = NULL;
151hostmap_t	**maptable  = NULL;
152ipftq_t	nat_tqb[IPF_TCP_NSTATES];
153ipftq_t	nat_udptq;
154ipftq_t	nat_icmptq;
155ipftq_t	nat_iptq;
156ipftq_t	*nat_utqe = NULL;
157#ifdef  IPFILTER_LOG
158int	nat_logging = 1;
159#else
160int	nat_logging = 0;
161#endif
162
163u_long	fr_defnatage = DEF_NAT_AGE,
164	fr_defnatipage = 120,		/* 60 seconds */
165	fr_defnaticmpage = 6;		/* 3 seconds */
166natstat_t nat_stats;
167int	fr_nat_lock = 0;
168int	fr_nat_init = 0;
169#if SOLARIS
170extern	int		pfil_delayed_copy;
171#endif
172
173static	int	nat_flushtable __P((void));
174static	int	nat_clearlist __P((void));
175static	void	nat_addnat __P((struct ipnat *));
176static	void	nat_addrdr __P((struct ipnat *));
177static	void	nat_delete __P((struct nat *, int));
178static	void	nat_delrdr __P((struct ipnat *));
179static	void	nat_delnat __P((struct ipnat *));
180static	int	fr_natgetent __P((caddr_t));
181static	int	fr_natgetsz __P((caddr_t));
182static	int	fr_natputent __P((caddr_t, int));
183static	void	nat_tabmove __P((nat_t *));
184static	int	nat_match __P((fr_info_t *, ipnat_t *));
185static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
186static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
187static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
188				    struct in_addr, struct in_addr, u_32_t));
189static	void	nat_hostmapdel __P((struct hostmap *));
190static	INLINE	int nat_icmpquerytype4 __P((int));
191static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
192static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
193static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
194				      tcphdr_t *, nat_t **, int));
195static	void	nat_resolverule __P((ipnat_t *));
196static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
197static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
198static	INLINE	int nat_wildok __P((nat_t *, int, int, int, int));
199
200
201/* ------------------------------------------------------------------------ */
202/* Function:    fr_natinit                                                  */
203/* Returns:     int - 0 == success, -1 == failure                           */
204/* Parameters:  Nil                                                         */
205/*                                                                          */
206/* Initialise all of the NAT locks, tables and other structures.            */
207/* ------------------------------------------------------------------------ */
208int fr_natinit()
209{
210	int i;
211
212	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
213	if (nat_table[0] != NULL)
214		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
215	else
216		return -1;
217
218	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
219	if (nat_table[1] != NULL)
220		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
221	else
222		return -2;
223
224	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
225	if (nat_rules != NULL)
226		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
227	else
228		return -3;
229
230	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
231	if (rdr_rules != NULL)
232		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
233	else
234		return -4;
235
236	KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
237	if (maptable != NULL)
238		bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
239	else
240		return -5;
241
242	KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
243		 ipf_nattable_sz * sizeof(u_long));
244	if (nat_stats.ns_bucketlen[0] == NULL)
245		return -6;
246	bzero((char *)nat_stats.ns_bucketlen[0],
247	      ipf_nattable_sz * sizeof(u_long));
248
249	KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
250		 ipf_nattable_sz * sizeof(u_long));
251	if (nat_stats.ns_bucketlen[1] == NULL)
252		return -7;
253
254	bzero((char *)nat_stats.ns_bucketlen[1],
255	      ipf_nattable_sz * sizeof(u_long));
256
257	if (fr_nat_maxbucket == 0) {
258		for (i = ipf_nattable_sz; i > 0; i >>= 1)
259			fr_nat_maxbucket++;
260		fr_nat_maxbucket *= 2;
261	}
262
263	fr_sttab_init(nat_tqb);
264	/*
265	 * Increase this because we may have "keep state" following this too
266	 * and packet storms can occur if this is removed too quickly.
267	 */
268	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
269	nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
270	nat_udptq.ifq_ttl = fr_defnatage;
271	nat_udptq.ifq_ref = 1;
272	nat_udptq.ifq_head = NULL;
273	nat_udptq.ifq_tail = &nat_udptq.ifq_head;
274	MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
275	nat_udptq.ifq_next = &nat_icmptq;
276	nat_icmptq.ifq_ttl = fr_defnaticmpage;
277	nat_icmptq.ifq_ref = 1;
278	nat_icmptq.ifq_head = NULL;
279	nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
280	MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
281	nat_icmptq.ifq_next = &nat_iptq;
282	nat_iptq.ifq_ttl = fr_defnatipage;
283	nat_iptq.ifq_ref = 1;
284	nat_iptq.ifq_head = NULL;
285	nat_iptq.ifq_tail = &nat_iptq.ifq_head;
286	MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
287	nat_iptq.ifq_next = NULL;
288
289	for (i = 0; i < IPF_TCP_NSTATES; i++) {
290		if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
291			nat_tqb[i].ifq_ttl = fr_defnaticmpage;
292#ifdef LARGE_NAT
293		else if (nat_tqb[i].ifq_ttl > fr_defnatage)
294			nat_tqb[i].ifq_ttl = fr_defnatage;
295#endif
296	}
297
298	/*
299	 * Increase this because we may have "keep state" following
300	 * this too and packet storms can occur if this is removed
301	 * too quickly.
302	 */
303	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
304
305	RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
306	RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
307	MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
308	MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
309
310	fr_nat_init = 1;
311
312	return 0;
313}
314
315
316/* ------------------------------------------------------------------------ */
317/* Function:    nat_addrdr                                                  */
318/* Returns:     Nil                                                         */
319/* Parameters:  n(I) - pointer to NAT rule to add                           */
320/*                                                                          */
321/* Adds a redirect rule to the hash table of redirect rules and the list of */
322/* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
323/* use by redirect rules.                                                   */
324/* ------------------------------------------------------------------------ */
325static void nat_addrdr(n)
326ipnat_t *n;
327{
328	ipnat_t **np;
329	u_32_t j;
330	u_int hv;
331	int k;
332
333	k = count4bits(n->in_outmsk);
334	if ((k >= 0) && (k != 32))
335		rdr_masks |= 1 << k;
336	j = (n->in_outip & n->in_outmsk);
337	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
338	np = rdr_rules + hv;
339	while (*np != NULL)
340		np = &(*np)->in_rnext;
341	n->in_rnext = NULL;
342	n->in_prnext = np;
343	n->in_hv = hv;
344	*np = n;
345}
346
347
348/* ------------------------------------------------------------------------ */
349/* Function:    nat_addnat                                                  */
350/* Returns:     Nil                                                         */
351/* Parameters:  n(I) - pointer to NAT rule to add                           */
352/*                                                                          */
353/* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
354/* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
355/* redirect rules.                                                          */
356/* ------------------------------------------------------------------------ */
357static void nat_addnat(n)
358ipnat_t *n;
359{
360	ipnat_t **np;
361	u_32_t j;
362	u_int hv;
363	int k;
364
365	k = count4bits(n->in_inmsk);
366	if ((k >= 0) && (k != 32))
367		nat_masks |= 1 << k;
368	j = (n->in_inip & n->in_inmsk);
369	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
370	np = nat_rules + hv;
371	while (*np != NULL)
372		np = &(*np)->in_mnext;
373	n->in_mnext = NULL;
374	n->in_pmnext = np;
375	n->in_hv = hv;
376	*np = n;
377}
378
379
380/* ------------------------------------------------------------------------ */
381/* Function:    nat_delrdr                                                  */
382/* Returns:     Nil                                                         */
383/* Parameters:  n(I) - pointer to NAT rule to delete                        */
384/*                                                                          */
385/* Removes a redirect rule from the hash table of redirect rules.           */
386/* ------------------------------------------------------------------------ */
387static void nat_delrdr(n)
388ipnat_t *n;
389{
390	if (n->in_rnext)
391		n->in_rnext->in_prnext = n->in_prnext;
392	*n->in_prnext = n->in_rnext;
393}
394
395
396/* ------------------------------------------------------------------------ */
397/* Function:    nat_delnat                                                  */
398/* Returns:     Nil                                                         */
399/* Parameters:  n(I) - pointer to NAT rule to delete                        */
400/*                                                                          */
401/* Removes a NAT map rule from the hash table of NAT map rules.             */
402/* ------------------------------------------------------------------------ */
403static void nat_delnat(n)
404ipnat_t *n;
405{
406	if (n->in_mnext != NULL)
407		n->in_mnext->in_pmnext = n->in_pmnext;
408	*n->in_pmnext = n->in_mnext;
409}
410
411
412/* ------------------------------------------------------------------------ */
413/* Function:    nat_hostmap                                                 */
414/* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
415/*                                else a pointer to the hostmapping to use  */
416/* Parameters:  np(I)   - pointer to NAT rule                               */
417/*              real(I) - real IP address                                   */
418/*              map(I)  - mapped IP address                                 */
419/*              port(I) - destination port number                           */
420/* Write Locks: ipf_nat                                                     */
421/*                                                                          */
422/* Check if an ip address has already been allocated for a given mapping    */
423/* that is not doing port based translation.  If is not yet allocated, then */
424/* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
425/* ------------------------------------------------------------------------ */
426static struct hostmap *nat_hostmap(np, src, dst, map, port)
427ipnat_t *np;
428struct in_addr src;
429struct in_addr dst;
430struct in_addr map;
431u_32_t port;
432{
433	hostmap_t *hm;
434	u_int hv;
435
436	hv = (src.s_addr ^ dst.s_addr);
437	hv += src.s_addr;
438	hv += dst.s_addr;
439	hv %= HOSTMAP_SIZE;
440	for (hm = maptable[hv]; hm; hm = hm->hm_next)
441		if ((hm->hm_srcip.s_addr == src.s_addr) &&
442		    (hm->hm_dstip.s_addr == dst.s_addr) &&
443		    ((np == NULL) || (np == hm->hm_ipnat)) &&
444		    ((port == 0) || (port == hm->hm_port))) {
445			hm->hm_ref++;
446			return hm;
447		}
448
449	if (np == NULL)
450		return NULL;
451
452	KMALLOC(hm, hostmap_t *);
453	if (hm) {
454		hm->hm_next = maptable[hv];
455		hm->hm_pnext = maptable + hv;
456		if (maptable[hv] != NULL)
457			maptable[hv]->hm_pnext = &hm->hm_next;
458		maptable[hv] = hm;
459		hm->hm_ipnat = np;
460		hm->hm_srcip = src;
461		hm->hm_dstip = dst;
462		hm->hm_mapip = map;
463		hm->hm_ref = 1;
464		hm->hm_port = port;
465	}
466	return hm;
467}
468
469
470/* ------------------------------------------------------------------------ */
471/* Function:    nat_hostmapdel                                              */
472/* Returns:     Nil                                                         */
473/* Parameters:  hm(I) - pointer to hostmap structure                        */
474/* Write Locks: ipf_nat                                                     */
475/*                                                                          */
476/* Decrement the references to this hostmap structure by one.  If this      */
477/* reaches zero then remove it and free it.                                 */
478/* ------------------------------------------------------------------------ */
479static void nat_hostmapdel(hm)
480struct hostmap *hm;
481{
482	hm->hm_ref--;
483	if (hm->hm_ref == 0) {
484		if (hm->hm_next)
485			hm->hm_next->hm_pnext = hm->hm_pnext;
486		*hm->hm_pnext = hm->hm_next;
487		KFREE(hm);
488	}
489}
490
491
492/* ------------------------------------------------------------------------ */
493/* Function:    fix_outcksum                                                */
494/* Returns:     Nil                                                         */
495/* Parameters:  fin(I) - pointer to packet information                      */
496/*              sp(I)  - location of 16bit checksum to update               */
497/*              n((I)  - amount to adjust checksum by                       */
498/*                                                                          */
499/* Adjusts the 16bit checksum by "n" for packets going out.                 */
500/* ------------------------------------------------------------------------ */
501void fix_outcksum(fin, sp, n)
502fr_info_t *fin;
503u_short *sp;
504u_32_t n;
505{
506	u_short sumshort;
507	u_32_t sum1;
508
509	if (n == 0)
510		return;
511
512	if (n & NAT_HW_CKSUM) {
513		n &= 0xffff;
514		n += fin->fin_dlen;
515		n = (n & 0xffff) + (n >> 16);
516		*sp = n & 0xffff;
517		return;
518	}
519	sum1 = (~ntohs(*sp)) & 0xffff;
520	sum1 += (n);
521	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
522	/* Again */
523	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
524	sumshort = ~(u_short)sum1;
525	*(sp) = htons(sumshort);
526}
527
528
529/* ------------------------------------------------------------------------ */
530/* Function:    fix_incksum                                                 */
531/* Returns:     Nil                                                         */
532/* Parameters:  fin(I) - pointer to packet information                      */
533/*              sp(I)  - location of 16bit checksum to update               */
534/*              n((I)  - amount to adjust checksum by                       */
535/*                                                                          */
536/* Adjusts the 16bit checksum by "n" for packets going in.                  */
537/* ------------------------------------------------------------------------ */
538void fix_incksum(fin, sp, n)
539fr_info_t *fin;
540u_short *sp;
541u_32_t n;
542{
543	u_short sumshort;
544	u_32_t sum1;
545
546	if (n == 0)
547		return;
548
549	if (n & NAT_HW_CKSUM) {
550		n &= 0xffff;
551		n += fin->fin_dlen;
552		n = (n & 0xffff) + (n >> 16);
553		*sp = n & 0xffff;
554		return;
555	}
556	sum1 = (~ntohs(*sp)) & 0xffff;
557	sum1 += ~(n) & 0xffff;
558	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
559	/* Again */
560	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
561	sumshort = ~(u_short)sum1;
562	*(sp) = htons(sumshort);
563}
564
565
566/* ------------------------------------------------------------------------ */
567/* Function:    fix_datacksum                                               */
568/* Returns:     Nil                                                         */
569/* Parameters:  sp(I)  - location of 16bit checksum to update               */
570/*              n((I)  - amount to adjust checksum by                       */
571/*                                                                          */
572/* Fix_datacksum is used *only* for the adjustments of checksums in the     */
573/* data section of an IP packet.                                            */
574/*                                                                          */
575/* The only situation in which you need to do this is when NAT'ing an       */
576/* ICMP error message. Such a message, contains in its body the IP header   */
577/* of the original IP packet, that causes the error.                        */
578/*                                                                          */
579/* You can't use fix_incksum or fix_outcksum in that case, because for the  */
580/* kernel the data section of the ICMP error is just data, and no special   */
581/* processing like hardware cksum or ntohs processing have been done by the */
582/* kernel on the data section.                                              */
583/* ------------------------------------------------------------------------ */
584void fix_datacksum(sp, n)
585u_short *sp;
586u_32_t n;
587{
588	u_short sumshort;
589	u_32_t sum1;
590
591	if (n == 0)
592		return;
593
594	sum1 = (~ntohs(*sp)) & 0xffff;
595	sum1 += (n);
596	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
597	/* Again */
598	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
599	sumshort = ~(u_short)sum1;
600	*(sp) = htons(sumshort);
601}
602
603
604/* ------------------------------------------------------------------------ */
605/* Function:    fr_nat_ioctl                                                */
606/* Returns:     int - 0 == success, != 0 == failure                         */
607/* Parameters:  data(I) - pointer to ioctl data                             */
608/*              cmd(I)  - ioctl command integer                             */
609/*              mode(I) - file mode bits used with open                     */
610/*                                                                          */
611/* Processes an ioctl call made to operate on the IP Filter NAT device.     */
612/* ------------------------------------------------------------------------ */
613int fr_nat_ioctl(data, cmd, mode)
614ioctlcmd_t cmd;
615caddr_t data;
616int mode;
617{
618	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
619	int error = 0, ret, arg, getlock;
620	ipnat_t natd;
621
622#if (BSD >= 199306) && defined(_KERNEL)
623	if ((securelevel >= 3) && (mode & FWRITE))
624		return EPERM;
625#endif
626
627#if defined(__osf__) && defined(_KERNEL)
628	getlock = 0;
629#else
630	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
631#endif
632
633	nat = NULL;     /* XXX gcc -Wuninitialized */
634	if (cmd == (ioctlcmd_t)SIOCADNAT) {
635		KMALLOC(nt, ipnat_t *);
636	} else {
637		nt = NULL;
638	}
639
640	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
641		if (mode & NAT_SYSSPACE) {
642			bcopy(data, (char *)&natd, sizeof(natd));
643			error = 0;
644		} else {
645			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
646		}
647
648	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
649		BCOPYIN(data, &arg, sizeof(arg));
650	}
651
652	if (error != 0)
653		goto done;
654
655	/*
656	 * For add/delete, look to see if the NAT entry is already present
657	 */
658	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
659		nat = &natd;
660		if (nat->in_v == 0)	/* For backward compat. */
661			nat->in_v = 4;
662		nat->in_flags &= IPN_USERFLAGS;
663		if ((nat->in_redir & NAT_MAPBLK) == 0) {
664			if ((nat->in_flags & IPN_SPLIT) == 0)
665				nat->in_inip &= nat->in_inmsk;
666			if ((nat->in_flags & IPN_IPRANGE) == 0)
667				nat->in_outip &= nat->in_outmsk;
668		}
669		MUTEX_ENTER(&ipf_natio);
670		for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
671			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
672					IPN_CMPSIZ))
673				break;
674	}
675
676	switch (cmd)
677	{
678#ifdef  IPFILTER_LOG
679	case SIOCIPFFB :
680	{
681		int tmp;
682
683		if (!(mode & FWRITE))
684			error = EPERM;
685		else {
686			tmp = ipflog_clear(IPL_LOGNAT);
687			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
688		}
689		break;
690	}
691	case SIOCSETLG :
692		if (!(mode & FWRITE))
693			error = EPERM;
694		else {
695			BCOPYIN((char *)data, (char *)&nat_logging,
696				sizeof(nat_logging));
697		}
698		break;
699	case SIOCGETLG :
700		BCOPYOUT((char *)&nat_logging, (char *)data,
701			 sizeof(nat_logging));
702		break;
703	case FIONREAD :
704		arg = iplused[IPL_LOGNAT];
705		BCOPYOUT(&arg, data, sizeof(arg));
706		break;
707#endif
708	case SIOCADNAT :
709		if (!(mode & FWRITE)) {
710			error = EPERM;
711		} else if (n != NULL) {
712			error = EEXIST;
713		} else if (nt == NULL) {
714			error = ENOMEM;
715		}
716		if (error != 0) {
717			MUTEX_EXIT(&ipf_natio);
718			break;
719		}
720		bcopy((char *)nat, (char *)nt, sizeof(*n));
721		error = nat_siocaddnat(nt, np, getlock);
722		MUTEX_EXIT(&ipf_natio);
723		if (error == 0)
724			nt = NULL;
725		break;
726	case SIOCRMNAT :
727		if (!(mode & FWRITE)) {
728			error = EPERM;
729			n = NULL;
730		} else if (n == NULL) {
731			error = ESRCH;
732		}
733
734		if (error != 0) {
735			MUTEX_EXIT(&ipf_natio);
736			break;
737		}
738		nat_siocdelnat(n, np, getlock);
739
740		MUTEX_EXIT(&ipf_natio);
741		n = NULL;
742		break;
743	case SIOCGNATS :
744		nat_stats.ns_table[0] = nat_table[0];
745		nat_stats.ns_table[1] = nat_table[1];
746		nat_stats.ns_list = nat_list;
747		nat_stats.ns_maptable = maptable;
748		nat_stats.ns_nattab_sz = ipf_nattable_sz;
749		nat_stats.ns_nattab_max = ipf_nattable_max;
750		nat_stats.ns_rultab_sz = ipf_natrules_sz;
751		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
752		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
753		nat_stats.ns_instances = nat_instances;
754		nat_stats.ns_apslist = ap_sess_list;
755		error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
756		break;
757	case SIOCGNATL :
758	    {
759		natlookup_t nl;
760
761		if (getlock) {
762			READ_ENTER(&ipf_nat);
763		}
764		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
765		if (error == 0) {
766			if (nat_lookupredir(&nl) != NULL) {
767				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
768			} else {
769				error = ESRCH;
770			}
771		}
772		if (getlock) {
773			RWLOCK_EXIT(&ipf_nat);
774		}
775		break;
776	    }
777	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
778		if (!(mode & FWRITE)) {
779			error = EPERM;
780			break;
781		}
782		if (getlock) {
783			WRITE_ENTER(&ipf_nat);
784		}
785		error = 0;
786		if (arg == 0)
787			ret = nat_flushtable();
788		else if (arg == 1)
789			ret = nat_clearlist();
790		else
791			error = EINVAL;
792		if (getlock) {
793			RWLOCK_EXIT(&ipf_nat);
794		}
795		if (error == 0) {
796			BCOPYOUT(&ret, data, sizeof(ret));
797		}
798		break;
799	case SIOCPROXY :
800		error = appr_ioctl(data, cmd, mode);
801		break;
802	case SIOCSTLCK :
803		fr_lock(data, &fr_nat_lock);
804		break;
805	case SIOCSTPUT :
806		if (fr_nat_lock) {
807			error = fr_natputent(data, getlock);
808		} else {
809			error = EACCES;
810		}
811		break;
812	case SIOCSTGSZ :
813		if (fr_nat_lock) {
814			if (getlock) {
815				READ_ENTER(&ipf_nat);
816			}
817			error = fr_natgetsz(data);
818			if (getlock) {
819				RWLOCK_EXIT(&ipf_nat);
820			}
821		} else
822			error = EACCES;
823		break;
824	case SIOCSTGET :
825		if (fr_nat_lock) {
826			if (getlock) {
827				READ_ENTER(&ipf_nat);
828			}
829			error = fr_natgetent(data);
830			if (getlock) {
831				RWLOCK_EXIT(&ipf_nat);
832			}
833		} else
834			error = EACCES;
835		break;
836	default :
837		error = EINVAL;
838		break;
839	}
840done:
841	if (nt)
842		KFREE(nt);
843	return error;
844}
845
846
847/* ------------------------------------------------------------------------ */
848/* Function:    nat_siocaddnat                                              */
849/* Returns:     int - 0 == success, != 0 == failure                         */
850/* Parameters:  n(I)       - pointer to new NAT rule                        */
851/*              np(I)      - pointer to where to insert new NAT rule        */
852/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
853/* Mutex Locks: ipf_natio                                                   */
854/*                                                                          */
855/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
856/* from information passed to the kernel, then add it  to the appropriate   */
857/* NAT rule table(s).                                                       */
858/* ------------------------------------------------------------------------ */
859static int nat_siocaddnat(n, np, getlock)
860ipnat_t *n, **np;
861int getlock;
862{
863	int error = 0, i, j;
864
865	nat_resolverule(n);
866	if (n->in_plabel[0] != '\0') {
867		if (n->in_apr == NULL)
868			return ENOENT;
869	}
870
871	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
872		return EINVAL;
873
874	n->in_use = 0;
875	if (n->in_redir & NAT_MAPBLK)
876		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
877	else if (n->in_flags & IPN_AUTOPORTMAP)
878		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
879	else if (n->in_flags & IPN_IPRANGE)
880		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
881	else if (n->in_flags & IPN_SPLIT)
882		n->in_space = 2;
883	else if (n->in_outmsk != 0)
884		n->in_space = ~ntohl(n->in_outmsk);
885	else
886		n->in_space = 1;
887
888	/*
889	 * Calculate the number of valid IP addresses in the output
890	 * mapping range.  In all cases, the range is inclusive of
891	 * the start and ending IP addresses.
892	 * If to a CIDR address, lose 2: broadcast + network address
893	 *                               (so subtract 1)
894	 * If to a range, add one.
895	 * If to a single IP address, set to 1.
896	 */
897	if (n->in_space) {
898		if ((n->in_flags & IPN_IPRANGE) != 0)
899			n->in_space += 1;
900		else
901			n->in_space -= 1;
902	} else
903		n->in_space = 1;
904
905	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
906	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
907		n->in_nip = ntohl(n->in_outip) + 1;
908	else if ((n->in_flags & IPN_SPLIT) &&
909		 (n->in_redir & NAT_REDIRECT))
910		n->in_nip = ntohl(n->in_inip);
911	else
912		n->in_nip = ntohl(n->in_outip);
913	if (n->in_redir & NAT_MAP) {
914		n->in_pnext = ntohs(n->in_pmin);
915		/*
916		 * Multiply by the number of ports made available.
917		 */
918		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
919			n->in_space *= (ntohs(n->in_pmax) -
920					ntohs(n->in_pmin) + 1);
921			/*
922			 * Because two different sources can map to
923			 * different destinations but use the same
924			 * local IP#/port #.
925			 * If the result is smaller than in_space, then
926			 * we may have wrapped around 32bits.
927			 */
928			i = n->in_inmsk;
929			if ((i != 0) && (i != 0xffffffff)) {
930				j = n->in_space * (~ntohl(i) + 1);
931				if (j >= n->in_space)
932					n->in_space = j;
933				else
934					n->in_space = 0xffffffff;
935			}
936		}
937		/*
938		 * If no protocol is specified, multiple by 256 to allow for
939		 * at least one IP:IP mapping per protocol.
940		 */
941		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
942				j = n->in_space * 256;
943				if (j >= n->in_space)
944					n->in_space = j;
945				else
946					n->in_space = 0xffffffff;
947		}
948	}
949
950	/* Otherwise, these fields are preset */
951
952	if (getlock) {
953		WRITE_ENTER(&ipf_nat);
954	}
955	n->in_next = NULL;
956	*np = n;
957
958	if (n->in_age[0] != 0)
959		n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
960
961	if (n->in_age[1] != 0)
962		n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
963
964	if (n->in_redir & NAT_REDIRECT) {
965		n->in_flags &= ~IPN_NOTDST;
966		nat_addrdr(n);
967	}
968	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
969		n->in_flags &= ~IPN_NOTSRC;
970		nat_addnat(n);
971	}
972	n = NULL;
973	nat_stats.ns_rules++;
974#if SOLARIS
975	pfil_delayed_copy = 0;
976#endif
977	if (getlock) {
978		RWLOCK_EXIT(&ipf_nat);			/* WRITE */
979	}
980
981	return error;
982}
983
984
985/* ------------------------------------------------------------------------ */
986/* Function:    nat_resolvrule                                              */
987/* Returns:     Nil                                                         */
988/* Parameters:  n(I)  - pointer to NAT rule                                 */
989/*                                                                          */
990/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
991/* from information passed to the kernel, then add it  to the appropriate   */
992/* NAT rule table(s).                                                       */
993/* ------------------------------------------------------------------------ */
994static void nat_resolverule(n)
995ipnat_t *n;
996{
997	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
998	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
999
1000	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1001	if (n->in_ifnames[1][0] == '\0') {
1002		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1003		n->in_ifps[1] = n->in_ifps[0];
1004	} else {
1005		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[0], 4);
1006	}
1007
1008	if (n->in_plabel[0] != '\0') {
1009		n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1010	}
1011}
1012
1013
1014/* ------------------------------------------------------------------------ */
1015/* Function:    nat_siocdelnat                                              */
1016/* Returns:     int - 0 == success, != 0 == failure                         */
1017/* Parameters:  n(I)       - pointer to new NAT rule                        */
1018/*              np(I)      - pointer to where to insert new NAT rule        */
1019/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1020/* Mutex Locks: ipf_natio                                                   */
1021/*                                                                          */
1022/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1023/* from information passed to the kernel, then add it  to the appropriate   */
1024/* NAT rule table(s).                                                       */
1025/* ------------------------------------------------------------------------ */
1026static void nat_siocdelnat(n, np, getlock)
1027ipnat_t *n, **np;
1028int getlock;
1029{
1030	if (getlock) {
1031		WRITE_ENTER(&ipf_nat);
1032	}
1033	if (n->in_redir & NAT_REDIRECT)
1034		nat_delrdr(n);
1035	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1036		nat_delnat(n);
1037	if (nat_list == NULL) {
1038		nat_masks = 0;
1039		rdr_masks = 0;
1040	}
1041
1042	if (n->in_tqehead[0] != NULL) {
1043		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1044			fr_freetimeoutqueue(n->in_tqehead[1]);
1045		}
1046	}
1047
1048	if (n->in_tqehead[1] != NULL) {
1049		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1050			fr_freetimeoutqueue(n->in_tqehead[1]);
1051		}
1052	}
1053
1054	*np = n->in_next;
1055
1056	if (n->in_use == 0) {
1057		if (n->in_apr)
1058			appr_free(n->in_apr);
1059		KFREE(n);
1060		nat_stats.ns_rules--;
1061#if SOLARIS
1062		if (nat_stats.ns_rules == 0)
1063			pfil_delayed_copy = 1;
1064#endif
1065	} else {
1066		n->in_flags |= IPN_DELETE;
1067		n->in_next = NULL;
1068	}
1069	if (getlock) {
1070		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1071	}
1072}
1073
1074
1075/* ------------------------------------------------------------------------ */
1076/* Function:    fr_natgetsz                                                 */
1077/* Returns:     int - 0 == success, != 0 is the error value.                */
1078/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1079/*                        get the size of.                                  */
1080/*                                                                          */
1081/* Handle SIOCSTGSZ.                                                        */
1082/* Return the size of the nat list entry to be copied back to user space.   */
1083/* The size of the entry is stored in the ng_sz field and the enture natget */
1084/* structure is copied back to the user.                                    */
1085/* ------------------------------------------------------------------------ */
1086static int fr_natgetsz(data)
1087caddr_t data;
1088{
1089	ap_session_t *aps;
1090	nat_t *nat, *n;
1091	natget_t ng;
1092
1093	BCOPYIN(data, &ng, sizeof(ng));
1094
1095	nat = ng.ng_ptr;
1096	if (!nat) {
1097		nat = nat_instances;
1098		ng.ng_sz = 0;
1099		/*
1100		 * Empty list so the size returned is 0.  Simple.
1101		 */
1102		if (nat == NULL) {
1103			BCOPYOUT(&ng, data, sizeof(ng));
1104			return 0;
1105		}
1106	} else {
1107		/*
1108		 * Make sure the pointer we're copying from exists in the
1109		 * current list of entries.  Security precaution to prevent
1110		 * copying of random kernel data.
1111		 */
1112		for (n = nat_instances; n; n = n->nat_next)
1113			if (n == nat)
1114				break;
1115		if (!n)
1116			return ESRCH;
1117	}
1118
1119	/*
1120	 * Incluse any space required for proxy data structures.
1121	 */
1122	ng.ng_sz = sizeof(nat_save_t);
1123	aps = nat->nat_aps;
1124	if (aps != NULL) {
1125		ng.ng_sz += sizeof(ap_session_t) - 4;
1126		if (aps->aps_data != 0)
1127			ng.ng_sz += aps->aps_psiz;
1128	}
1129
1130	BCOPYOUT(&ng, data, sizeof(ng));
1131	return 0;
1132}
1133
1134
1135/* ------------------------------------------------------------------------ */
1136/* Function:    fr_natgetent                                                */
1137/* Returns:     int - 0 == success, != 0 is the error value.                */
1138/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1139/*                        to NAT structure to copy out.                     */
1140/*                                                                          */
1141/* Handle SIOCSTGET.                                                        */
1142/* Copies out NAT entry to user space.  Any additional data held for a      */
1143/* proxy is also copied, as to is the NAT rule which was responsible for it */
1144/* ------------------------------------------------------------------------ */
1145static int fr_natgetent(data)
1146caddr_t data;
1147{
1148	int error, outsize;
1149	ap_session_t *aps;
1150	nat_save_t *ipn, ipns;
1151	nat_t *n, *nat;
1152
1153	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1154	if (error != 0)
1155		return error;
1156
1157	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1158		return EINVAL;
1159
1160	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1161	if (ipn == NULL)
1162		return ENOMEM;
1163
1164	ipn->ipn_dsize = ipns.ipn_dsize;
1165	nat = ipns.ipn_next;
1166	if (nat == NULL) {
1167		nat = nat_instances;
1168		if (nat == NULL) {
1169			if (nat_instances == NULL)
1170				error = ENOENT;
1171			goto finished;
1172		}
1173	} else {
1174		/*
1175		 * Make sure the pointer we're copying from exists in the
1176		 * current list of entries.  Security precaution to prevent
1177		 * copying of random kernel data.
1178		 */
1179		for (n = nat_instances; n; n = n->nat_next)
1180			if (n == nat)
1181				break;
1182		if (n == NULL) {
1183			error = ESRCH;
1184			goto finished;
1185		}
1186	}
1187	ipn->ipn_next = nat->nat_next;
1188
1189	/*
1190	 * Copy the NAT structure.
1191	 */
1192	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1193
1194	/*
1195	 * If we have a pointer to the NAT rule it belongs to, save that too.
1196	 */
1197	if (nat->nat_ptr != NULL)
1198		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1199		      sizeof(ipn->ipn_ipnat));
1200
1201	/*
1202	 * If we also know the NAT entry has an associated filter rule,
1203	 * save that too.
1204	 */
1205	if (nat->nat_fr != NULL)
1206		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1207		      sizeof(ipn->ipn_fr));
1208
1209	/*
1210	 * Last but not least, if there is an application proxy session set
1211	 * up for this NAT entry, then copy that out too, including any
1212	 * private data saved along side it by the proxy.
1213	 */
1214	aps = nat->nat_aps;
1215	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1216	if (aps != NULL) {
1217		char *s;
1218
1219		if (outsize < sizeof(*aps)) {
1220			error = ENOBUFS;
1221			goto finished;
1222		}
1223
1224		s = ipn->ipn_data;
1225		bcopy((char *)aps, s, sizeof(*aps));
1226		s += sizeof(*aps);
1227		outsize -= sizeof(*aps);
1228		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1229			bcopy(aps->aps_data, s, aps->aps_psiz);
1230		else
1231			error = ENOBUFS;
1232	}
1233	if (error == 0) {
1234		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1235	}
1236
1237finished:
1238	if (ipn != NULL) {
1239		KFREES(ipn, ipns.ipn_dsize);
1240	}
1241	return error;
1242}
1243
1244
1245/* ------------------------------------------------------------------------ */
1246/* Function:    fr_natputent                                                */
1247/* Returns:     int - 0 == success, != 0 is the error value.                */
1248/* Parameters:  data(I) -     pointer to natget structure with NAT          */
1249/*                            structure information to load into the kernel */
1250/*              getlock(I) - flag indicating whether or not a write lock    */
1251/*                           on ipf_nat is already held.                    */
1252/*                                                                          */
1253/* Handle SIOCSTPUT.                                                        */
1254/* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1255/* firewall rule data structures, if pointers to them indicate so.          */
1256/* ------------------------------------------------------------------------ */
1257static int fr_natputent(data, getlock)
1258caddr_t data;
1259int getlock;
1260{
1261	nat_save_t ipn, *ipnn;
1262	ap_session_t *aps;
1263	nat_t *n, *nat;
1264	frentry_t *fr;
1265	fr_info_t fin;
1266	ipnat_t *in;
1267	int error;
1268
1269	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1270	if (error != 0)
1271		return error;
1272
1273	/*
1274	 * Initialise early because of code at junkput label.
1275	 */
1276	in = NULL;
1277	aps = NULL;
1278	nat = NULL;
1279	ipnn = NULL;
1280
1281	/*
1282	 * New entry, copy in the rest of the NAT entry if it's size is more
1283	 * than just the nat_t structure.
1284	 */
1285	fr = NULL;
1286	if (ipn.ipn_dsize > sizeof(ipn)) {
1287		if (ipn.ipn_dsize > 81920) {
1288			error = ENOMEM;
1289			goto junkput;
1290		}
1291
1292		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1293		if (ipnn == NULL)
1294			return ENOMEM;
1295
1296		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1297		if (error != 0) {
1298			error = EFAULT;
1299			goto junkput;
1300		}
1301	} else
1302		ipnn = &ipn;
1303
1304	KMALLOC(nat, nat_t *);
1305	if (nat == NULL) {
1306		error = ENOMEM;
1307		goto junkput;
1308	}
1309
1310	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1311	/*
1312	 * Initialize all these so that nat_delete() doesn't cause a crash.
1313	 */
1314	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1315	nat->nat_tqe.tqe_pnext = NULL;
1316	nat->nat_tqe.tqe_next = NULL;
1317	nat->nat_tqe.tqe_ifq = NULL;
1318	nat->nat_tqe.tqe_parent = nat;
1319
1320	/*
1321	 * Restore the rule associated with this nat session
1322	 */
1323	in = ipnn->ipn_nat.nat_ptr;
1324	if (in != NULL) {
1325		KMALLOC(in, ipnat_t *);
1326		nat->nat_ptr = in;
1327		if (in == NULL) {
1328			error = ENOMEM;
1329			goto junkput;
1330		}
1331		bzero((char *)in, offsetof(struct ipnat, in_next6));
1332		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1333		in->in_use = 1;
1334		in->in_flags |= IPN_DELETE;
1335
1336		ATOMIC_INC(nat_stats.ns_rules);
1337
1338		nat_resolverule(in);
1339	}
1340
1341	/*
1342	 * Check that the NAT entry doesn't already exist in the kernel.
1343	 */
1344	bzero((char *)&fin, sizeof(fin));
1345	fin.fin_p = nat->nat_p;
1346	if (nat->nat_dir == NAT_OUTBOUND) {
1347		fin.fin_data[0] = ntohs(nat->nat_oport);
1348		fin.fin_data[1] = ntohs(nat->nat_outport);
1349		fin.fin_ifp = nat->nat_ifps[1];
1350		if (nat_inlookup(&fin, 0, fin.fin_p, nat->nat_oip,
1351				  nat->nat_inip) != NULL) {
1352			error = EEXIST;
1353			goto junkput;
1354		}
1355	} else if (nat->nat_dir == NAT_INBOUND) {
1356		fin.fin_data[0] = ntohs(nat->nat_outport);
1357		fin.fin_data[1] = ntohs(nat->nat_oport);
1358		fin.fin_ifp = nat->nat_ifps[0];
1359		if (nat_outlookup(&fin, 0, fin.fin_p, nat->nat_outip,
1360				 nat->nat_oip) != NULL) {
1361			error = EEXIST;
1362			goto junkput;
1363		}
1364	} else {
1365		error = EINVAL;
1366		goto junkput;
1367	}
1368
1369	/*
1370	 * Restore ap_session_t structure.  Include the private data allocated
1371	 * if it was there.
1372	 */
1373	aps = nat->nat_aps;
1374	if (aps != NULL) {
1375		KMALLOC(aps, ap_session_t *);
1376		nat->nat_aps = aps;
1377		if (aps == NULL) {
1378			error = ENOMEM;
1379			goto junkput;
1380		}
1381		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1382		if (in != NULL)
1383			aps->aps_apr = in->in_apr;
1384		else
1385			aps->aps_apr = NULL;
1386		if (aps->aps_psiz != 0) {
1387			if (aps->aps_psiz > 81920) {
1388				error = ENOMEM;
1389				goto junkput;
1390			}
1391			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1392			if (aps->aps_data == NULL) {
1393				error = ENOMEM;
1394				goto junkput;
1395			}
1396			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1397			      aps->aps_psiz);
1398		} else {
1399			aps->aps_psiz = 0;
1400			aps->aps_data = NULL;
1401		}
1402	}
1403
1404	/*
1405	 * If there was a filtering rule associated with this entry then
1406	 * build up a new one.
1407	 */
1408	fr = nat->nat_fr;
1409	if (fr != NULL) {
1410		if ((nat->nat_flags & SI_NEWFR) != 0) {
1411			KMALLOC(fr, frentry_t *);
1412			nat->nat_fr = fr;
1413			if (fr == NULL) {
1414				error = ENOMEM;
1415				goto junkput;
1416			}
1417			ipnn->ipn_nat.nat_fr = fr;
1418			fr->fr_ref = 1;
1419			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1420			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1421			MUTEX_NUKE(&fr->fr_lock);
1422			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1423		} else {
1424			READ_ENTER(&ipf_nat);
1425			for (n = nat_instances; n; n = n->nat_next)
1426				if (n->nat_fr == fr)
1427					break;
1428
1429			if (n != NULL) {
1430				MUTEX_ENTER(&fr->fr_lock);
1431				fr->fr_ref++;
1432				MUTEX_EXIT(&fr->fr_lock);
1433			}
1434			RWLOCK_EXIT(&ipf_nat);
1435
1436			if (!n) {
1437				error = ESRCH;
1438				goto junkput;
1439			}
1440		}
1441	}
1442
1443	if (ipnn != &ipn) {
1444		KFREES(ipnn, ipn.ipn_dsize);
1445		ipnn = NULL;
1446	}
1447
1448	if (getlock) {
1449		WRITE_ENTER(&ipf_nat);
1450	}
1451	error = nat_insert(nat, nat->nat_rev);
1452	if ((error == 0) && (aps != NULL)) {
1453		aps->aps_next = ap_sess_list;
1454		ap_sess_list = aps;
1455	}
1456	if (getlock) {
1457		RWLOCK_EXIT(&ipf_nat);
1458	}
1459
1460	if (error == 0)
1461		return 0;
1462
1463	error = ENOMEM;
1464
1465junkput:
1466	if (fr != NULL)
1467		fr_derefrule(&fr);
1468
1469	if ((ipnn != NULL) && (ipnn != &ipn)) {
1470		KFREES(ipnn, ipn.ipn_dsize);
1471	}
1472	if (nat != NULL) {
1473		if (aps != NULL) {
1474			if (aps->aps_data != NULL) {
1475				KFREES(aps->aps_data, aps->aps_psiz);
1476			}
1477			KFREE(aps);
1478		}
1479		if (in != NULL) {
1480			if (in->in_apr)
1481				appr_free(in->in_apr);
1482			KFREE(in);
1483		}
1484		KFREE(nat);
1485	}
1486	return error;
1487}
1488
1489
1490/* ------------------------------------------------------------------------ */
1491/* Function:    nat_delete                                                  */
1492/* Returns:     Nil                                                         */
1493/* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1494/*              logtype(I) - type of LOG record to create before deleting   */
1495/* Write Lock:  ipf_nat                                                     */
1496/*                                                                          */
1497/* Delete a nat entry from the various lists and table.  If NAT logging is  */
1498/* enabled then generate a NAT log record for this event.                   */
1499/* ------------------------------------------------------------------------ */
1500static void nat_delete(nat, logtype)
1501struct nat *nat;
1502int logtype;
1503{
1504	struct ipnat *ipn;
1505
1506	if (logtype != 0 && nat_logging != 0)
1507		nat_log(nat, logtype);
1508
1509	MUTEX_ENTER(&ipf_nat_new);
1510
1511	/*
1512	 * Take it as a general indication that all the pointers are set if
1513	 * nat_pnext is set.
1514	 */
1515	if (nat->nat_pnext != NULL) {
1516		nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1517		nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1518
1519		*nat->nat_pnext = nat->nat_next;
1520		if (nat->nat_next != NULL) {
1521			nat->nat_next->nat_pnext = nat->nat_pnext;
1522			nat->nat_next = NULL;
1523		}
1524		nat->nat_pnext = NULL;
1525
1526		*nat->nat_phnext[0] = nat->nat_hnext[0];
1527		if (nat->nat_hnext[0] != NULL) {
1528			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1529			nat->nat_hnext[0] = NULL;
1530		}
1531		nat->nat_phnext[0] = NULL;
1532
1533		*nat->nat_phnext[1] = nat->nat_hnext[1];
1534		if (nat->nat_hnext[1] != NULL) {
1535			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1536			nat->nat_hnext[1] = NULL;
1537		}
1538		nat->nat_phnext[1] = NULL;
1539
1540		if ((nat->nat_flags & SI_WILDP) != 0)
1541			nat_stats.ns_wilds--;
1542	}
1543
1544	if (nat->nat_me != NULL) {
1545		*nat->nat_me = NULL;
1546		nat->nat_me = NULL;
1547	}
1548
1549	fr_deletequeueentry(&nat->nat_tqe);
1550
1551	nat->nat_ref--;
1552	if (nat->nat_ref > 0) {
1553		MUTEX_EXIT(&ipf_nat_new);
1554		return;
1555	}
1556
1557#ifdef	IPFILTER_SYNC
1558	if (nat->nat_sync)
1559		ipfsync_del(nat->nat_sync);
1560#endif
1561
1562	if (nat->nat_fr != NULL)
1563		(void)fr_derefrule(&nat->nat_fr);
1564
1565	if (nat->nat_hm != NULL)
1566		nat_hostmapdel(nat->nat_hm);
1567
1568	/*
1569	 * If there is an active reference from the nat entry to its parent
1570	 * rule, decrement the rule's reference count and free it too if no
1571	 * longer being used.
1572	 */
1573	ipn = nat->nat_ptr;
1574	if (ipn != NULL) {
1575		ipn->in_space++;
1576		ipn->in_use--;
1577		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1578			if (ipn->in_apr)
1579				appr_free(ipn->in_apr);
1580			KFREE(ipn);
1581			nat_stats.ns_rules--;
1582#if SOLARIS
1583			if (nat_stats.ns_rules == 0)
1584				pfil_delayed_copy = 1;
1585#endif
1586		}
1587	}
1588
1589	MUTEX_DESTROY(&nat->nat_lock);
1590
1591	aps_free(nat->nat_aps);
1592	nat_stats.ns_inuse--;
1593	MUTEX_EXIT(&ipf_nat_new);
1594
1595	/*
1596	 * If there's a fragment table entry too for this nat entry, then
1597	 * dereference that as well.  This is after nat_lock is released
1598	 * because of Tru64.
1599	 */
1600	fr_forgetnat((void *)nat);
1601
1602	KFREE(nat);
1603}
1604
1605
1606/* ------------------------------------------------------------------------ */
1607/* Function:    nat_flushtable                                              */
1608/* Returns:     int - number of NAT rules deleted                           */
1609/* Parameters:  Nil                                                         */
1610/*                                                                          */
1611/* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1612/* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1613/* ------------------------------------------------------------------------ */
1614/*
1615 * nat_flushtable - clear the NAT table of all mapping entries.
1616 */
1617static int nat_flushtable()
1618{
1619	nat_t *nat;
1620	int j = 0;
1621
1622	/*
1623	 * ALL NAT mappings deleted, so lets just make the deletions
1624	 * quicker.
1625	 */
1626	if (nat_table[0] != NULL)
1627		bzero((char *)nat_table[0],
1628		      sizeof(nat_table[0]) * ipf_nattable_sz);
1629	if (nat_table[1] != NULL)
1630		bzero((char *)nat_table[1],
1631		      sizeof(nat_table[1]) * ipf_nattable_sz);
1632
1633	while ((nat = nat_instances) != NULL) {
1634		nat_delete(nat, NL_FLUSH);
1635		j++;
1636	}
1637
1638	nat_stats.ns_inuse = 0;
1639	return j;
1640}
1641
1642
1643/* ------------------------------------------------------------------------ */
1644/* Function:    nat_clearlist                                               */
1645/* Returns:     int - number of NAT/RDR rules deleted                       */
1646/* Parameters:  Nil                                                         */
1647/*                                                                          */
1648/* Delete all rules in the current list of rules.  There is nothing elegant */
1649/* about this cleanup: simply free all entries on the list of rules and     */
1650/* clear out the tables used for hashed NAT rule lookups.                   */
1651/* ------------------------------------------------------------------------ */
1652static int nat_clearlist()
1653{
1654	ipnat_t *n, **np = &nat_list;
1655	int i = 0;
1656
1657	if (nat_rules != NULL)
1658		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1659	if (rdr_rules != NULL)
1660		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1661
1662	while ((n = *np) != NULL) {
1663		*np = n->in_next;
1664		if (n->in_use == 0) {
1665			if (n->in_apr != NULL)
1666				appr_free(n->in_apr);
1667			KFREE(n);
1668			nat_stats.ns_rules--;
1669		} else {
1670			n->in_flags |= IPN_DELETE;
1671			n->in_next = NULL;
1672		}
1673		i++;
1674	}
1675#if SOLARIS
1676	pfil_delayed_copy = 1;
1677#endif
1678	nat_masks = 0;
1679	rdr_masks = 0;
1680	return i;
1681}
1682
1683
1684/* ------------------------------------------------------------------------ */
1685/* Function:    nat_newmap                                                  */
1686/* Returns:     int - -1 == error, 0 == success                             */
1687/* Parameters:  fin(I) - pointer to packet information                      */
1688/*              nat(I) - pointer to NAT entry                               */
1689/*              ni(I)  - pointer to structure with misc. information needed */
1690/*                       to create new NAT entry.                           */
1691/*                                                                          */
1692/* Given an empty NAT structure, populate it with new information about a   */
1693/* new NAT session, as defined by the matching NAT rule.                    */
1694/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1695/* to the new IP address for the translation.                               */
1696/* ------------------------------------------------------------------------ */
1697static INLINE int nat_newmap(fin, nat, ni)
1698fr_info_t *fin;
1699nat_t *nat;
1700natinfo_t *ni;
1701{
1702	u_short st_port, dport, sport, port, sp, dp;
1703	struct in_addr in, inb;
1704	hostmap_t *hm;
1705	u_32_t flags;
1706	u_32_t st_ip;
1707	ipnat_t *np;
1708	nat_t *natl;
1709	int l;
1710
1711	/*
1712	 * If it's an outbound packet which doesn't match any existing
1713	 * record, then create a new port
1714	 */
1715	l = 0;
1716	hm = NULL;
1717	np = ni->nai_np;
1718	st_ip = np->in_nip;
1719	st_port = np->in_pnext;
1720	flags = ni->nai_flags;
1721	sport = ni->nai_sport;
1722	dport = ni->nai_dport;
1723
1724	/*
1725	 * Do a loop until we either run out of entries to try or we find
1726	 * a NAT mapping that isn't currently being used.  This is done
1727	 * because the change to the source is not (usually) being fixed.
1728	 */
1729	do {
1730		port = 0;
1731		in.s_addr = htonl(np->in_nip);
1732		if (l == 0) {
1733			/*
1734			 * Check to see if there is an existing NAT
1735			 * setup for this IP address pair.
1736			 */
1737			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1738					 in, 0);
1739			if (hm != NULL)
1740				in.s_addr = hm->hm_mapip.s_addr;
1741		} else if ((l == 1) && (hm != NULL)) {
1742			nat_hostmapdel(hm);
1743			hm = NULL;
1744		}
1745		in.s_addr = ntohl(in.s_addr);
1746
1747		nat->nat_hm = hm;
1748
1749		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1750			if (l > 0)
1751				return -1;
1752		}
1753
1754		if (np->in_redir == NAT_BIMAP &&
1755		    np->in_inmsk == np->in_outmsk) {
1756			/*
1757			 * map the address block in a 1:1 fashion
1758			 */
1759			in.s_addr = np->in_outip;
1760			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1761			in.s_addr = ntohl(in.s_addr);
1762
1763		} else if (np->in_redir & NAT_MAPBLK) {
1764			if ((l >= np->in_ppip) || ((l > 0) &&
1765			     !(flags & IPN_TCPUDP)))
1766				return -1;
1767			/*
1768			 * map-block - Calculate destination address.
1769			 */
1770			in.s_addr = ntohl(fin->fin_saddr);
1771			in.s_addr &= ntohl(~np->in_inmsk);
1772			inb.s_addr = in.s_addr;
1773			in.s_addr /= np->in_ippip;
1774			in.s_addr &= ntohl(~np->in_outmsk);
1775			in.s_addr += ntohl(np->in_outip);
1776			/*
1777			 * Calculate destination port.
1778			 */
1779			if ((flags & IPN_TCPUDP) &&
1780			    (np->in_ppip != 0)) {
1781				port = ntohs(sport) + l;
1782				port %= np->in_ppip;
1783				port += np->in_ppip *
1784					(inb.s_addr % np->in_ippip);
1785				port += MAPBLK_MINPORT;
1786				port = htons(port);
1787			}
1788
1789		} else if ((np->in_outip == 0) &&
1790			   (np->in_outmsk == 0xffffffff)) {
1791			/*
1792			 * 0/32 - use the interface's IP address.
1793			 */
1794			if ((l > 0) ||
1795			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1796				       &in, NULL) == -1)
1797				return -1;
1798			in.s_addr = ntohl(in.s_addr);
1799
1800		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1801			/*
1802			 * 0/0 - use the original source address/port.
1803			 */
1804			if (l > 0)
1805				return -1;
1806			in.s_addr = ntohl(fin->fin_saddr);
1807
1808		} else if ((np->in_outmsk != 0xffffffff) &&
1809			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1810			np->in_nip++;
1811
1812		natl = NULL;
1813
1814		if ((flags & IPN_TCPUDP) &&
1815		    ((np->in_redir & NAT_MAPBLK) == 0) &&
1816		    (np->in_flags & IPN_AUTOPORTMAP)) {
1817			/*
1818			 * "ports auto" (without map-block)
1819			 */
1820			if ((l > 0) && (l % np->in_ppip == 0)) {
1821				if (l > np->in_space) {
1822					return -1;
1823				} else if ((l > np->in_ppip) &&
1824					   np->in_outmsk != 0xffffffff)
1825					np->in_nip++;
1826			}
1827			if (np->in_ppip != 0) {
1828				port = ntohs(sport);
1829				port += (l % np->in_ppip);
1830				port %= np->in_ppip;
1831				port += np->in_ppip *
1832					(ntohl(fin->fin_saddr) %
1833					 np->in_ippip);
1834				port += MAPBLK_MINPORT;
1835				port = htons(port);
1836			}
1837
1838		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1839			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
1840			/*
1841			 * Standard port translation.  Select next port.
1842			 */
1843			port = htons(np->in_pnext++);
1844
1845			if (np->in_pnext > ntohs(np->in_pmax)) {
1846				np->in_pnext = ntohs(np->in_pmin);
1847				if (np->in_outmsk != 0xffffffff)
1848					np->in_nip++;
1849			}
1850		}
1851
1852		if (np->in_flags & IPN_IPRANGE) {
1853			if (np->in_nip > ntohl(np->in_outmsk))
1854				np->in_nip = ntohl(np->in_outip);
1855		} else {
1856			if ((np->in_outmsk != 0xffffffff) &&
1857			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1858			    ntohl(np->in_outip))
1859				np->in_nip = ntohl(np->in_outip) + 1;
1860		}
1861
1862		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
1863			port = sport;
1864
1865		/*
1866		 * Here we do a lookup of the connection as seen from
1867		 * the outside.  If an IP# pair already exists, try
1868		 * again.  So if you have A->B becomes C->B, you can
1869		 * also have D->E become C->E but not D->B causing
1870		 * another C->B.  Also take protocol and ports into
1871		 * account when determining whether a pre-existing
1872		 * NAT setup will cause an external conflict where
1873		 * this is appropriate.
1874		 */
1875		inb.s_addr = htonl(in.s_addr);
1876		sp = fin->fin_data[0];
1877		dp = fin->fin_data[1];
1878		fin->fin_data[0] = fin->fin_data[1];
1879		fin->fin_data[1] = htons(port);
1880		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
1881				    (u_int)fin->fin_p, fin->fin_dst, inb);
1882		fin->fin_data[0] = sp;
1883		fin->fin_data[1] = dp;
1884
1885		/*
1886		 * Has the search wrapped around and come back to the
1887		 * start ?
1888		 */
1889		if ((natl != NULL) &&
1890		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1891		    (np->in_nip != 0) && (st_ip == np->in_nip))
1892			return -1;
1893		l++;
1894	} while (natl != NULL);
1895
1896	if (np->in_space > 0)
1897		np->in_space--;
1898
1899	/* Setup the NAT table */
1900	nat->nat_inip = fin->fin_src;
1901	nat->nat_outip.s_addr = htonl(in.s_addr);
1902	nat->nat_oip = fin->fin_dst;
1903	if (nat->nat_hm == NULL)
1904		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1905					  nat->nat_outip, 0);
1906
1907	/*
1908	 * The ICMP checksum does not have a pseudo header containing
1909	 * the IP addresses
1910	 */
1911	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1912	ni->nai_sum2 = LONG_SUM(in.s_addr);
1913	if ((flags & IPN_TCPUDP)) {
1914		ni->nai_sum1 += ntohs(sport);
1915		ni->nai_sum2 += ntohs(port);
1916	}
1917
1918	if (flags & IPN_TCPUDP) {
1919		nat->nat_inport = sport;
1920		nat->nat_outport = port;	/* sport */
1921		nat->nat_oport = dport;
1922		((tcphdr_t *)fin->fin_dp)->th_sport = port;
1923	} else if (flags & IPN_ICMPQUERY) {
1924		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
1925		nat->nat_inport = port;
1926		nat->nat_outport = port;
1927	} else if (fin->fin_p == IPPROTO_GRE) {
1928#if 0
1929		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
1930		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
1931			nat->nat_oport = 0;/*fin->fin_data[1];*/
1932			nat->nat_inport = 0;/*fin->fin_data[0];*/
1933			nat->nat_outport = 0;/*fin->fin_data[0];*/
1934			nat->nat_call[0] = fin->fin_data[0];
1935			nat->nat_call[1] = fin->fin_data[0];
1936		}
1937#endif
1938	}
1939	ni->nai_ip.s_addr = in.s_addr;
1940	ni->nai_port = port;
1941	ni->nai_nport = dport;
1942	return 0;
1943}
1944
1945
1946/* ------------------------------------------------------------------------ */
1947/* Function:    nat_newrdr                                                  */
1948/* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
1949/*                    allow rule to be moved if IPN_ROUNDR is set.          */
1950/* Parameters:  fin(I) - pointer to packet information                      */
1951/*              nat(I) - pointer to NAT entry                               */
1952/*              ni(I)  - pointer to structure with misc. information needed */
1953/*                       to create new NAT entry.                           */
1954/*                                                                          */
1955/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1956/* to the new IP address for the translation.                               */
1957/* ------------------------------------------------------------------------ */
1958static INLINE int nat_newrdr(fin, nat, ni)
1959fr_info_t *fin;
1960nat_t *nat;
1961natinfo_t *ni;
1962{
1963	u_short nport, dport, sport;
1964	struct in_addr in;
1965	hostmap_t *hm;
1966	u_32_t flags;
1967	ipnat_t *np;
1968	int move;
1969
1970	move = 1;
1971	hm = NULL;
1972	in.s_addr = 0;
1973	np = ni->nai_np;
1974	flags = ni->nai_flags;
1975	sport = ni->nai_sport;
1976	dport = ni->nai_dport;
1977
1978	/*
1979	 * If the matching rule has IPN_STICKY set, then we want to have the
1980	 * same rule kick in as before.  Why would this happen?  If you have
1981	 * a collection of rdr rules with "round-robin sticky", the current
1982	 * packet might match a different one to the previous connection but
1983	 * we want the same destination to be used.
1984	 */
1985	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
1986	    (IPN_ROUNDR|IPN_STICKY)) {
1987		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
1988				 (u_32_t)dport);
1989		if (hm != NULL) {
1990			in.s_addr = ntohl(hm->hm_mapip.s_addr);
1991			np = hm->hm_ipnat;
1992			ni->nai_np = np;
1993			move = 0;
1994		}
1995	}
1996
1997	/*
1998	 * Otherwise, it's an inbound packet. Most likely, we don't
1999	 * want to rewrite source ports and source addresses. Instead,
2000	 * we want to rewrite to a fixed internal address and fixed
2001	 * internal port.
2002	 */
2003	if (np->in_flags & IPN_SPLIT) {
2004		in.s_addr = np->in_nip;
2005
2006		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2007			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2008					 in, (u_32_t)dport);
2009			if (hm != NULL) {
2010				in.s_addr = hm->hm_mapip.s_addr;
2011				move = 0;
2012			}
2013		}
2014
2015		if (hm == NULL || hm->hm_ref == 1) {
2016			if (np->in_inip == htonl(in.s_addr)) {
2017				np->in_nip = ntohl(np->in_inmsk);
2018				move = 0;
2019			} else {
2020				np->in_nip = ntohl(np->in_inip);
2021			}
2022		}
2023
2024	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2025		/*
2026		 * 0/32 - use the interface's IP address.
2027		 */
2028		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2029			return -1;
2030		in.s_addr = ntohl(in.s_addr);
2031
2032	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2033		/*
2034		 * 0/0 - use the original destination address/port.
2035		 */
2036		in.s_addr = ntohl(fin->fin_daddr);
2037
2038	} else if (np->in_redir == NAT_BIMAP &&
2039		   np->in_inmsk == np->in_outmsk) {
2040		/*
2041		 * map the address block in a 1:1 fashion
2042		 */
2043		in.s_addr = np->in_inip;
2044		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2045		in.s_addr = ntohl(in.s_addr);
2046	} else {
2047		in.s_addr = ntohl(np->in_inip);
2048	}
2049
2050	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2051		nport = dport;
2052	else {
2053		/*
2054		 * Whilst not optimized for the case where
2055		 * pmin == pmax, the gain is not significant.
2056		 */
2057		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2058		    (np->in_pmin != np->in_pmax)) {
2059			nport = ntohs(dport) - ntohs(np->in_pmin) +
2060				ntohs(np->in_pnext);
2061			nport = htons(nport);
2062		} else
2063			nport = np->in_pnext;
2064	}
2065
2066	/*
2067	 * When the redirect-to address is set to 0.0.0.0, just
2068	 * assume a blank `forwarding' of the packet.  We don't
2069	 * setup any translation for this either.
2070	 */
2071	if (in.s_addr == 0) {
2072		if (nport == dport)
2073			return -1;
2074		in.s_addr = ntohl(fin->fin_daddr);
2075	}
2076
2077	nat->nat_inip.s_addr = htonl(in.s_addr);
2078	nat->nat_outip = fin->fin_dst;
2079	nat->nat_oip = fin->fin_src;
2080
2081	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2082	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2083
2084	ni->nai_ip.s_addr = in.s_addr;
2085	ni->nai_nport = nport;
2086	ni->nai_port = sport;
2087
2088	if (flags & IPN_TCPUDP) {
2089		nat->nat_inport = nport;
2090		nat->nat_outport = dport;
2091		nat->nat_oport = sport;
2092		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2093	} else if (flags & IPN_ICMPQUERY) {
2094		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2095		nat->nat_inport = nport;
2096		nat->nat_outport = nport;
2097	} else if (fin->fin_p == IPPROTO_GRE) {
2098#if 0
2099		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2100		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2101			nat->nat_call[0] = fin->fin_data[0];
2102			nat->nat_call[1] = fin->fin_data[1];
2103			nat->nat_oport = 0; /*fin->fin_data[0];*/
2104			nat->nat_inport = 0; /*fin->fin_data[1];*/
2105			nat->nat_outport = 0; /*fin->fin_data[1];*/
2106		}
2107#endif
2108	}
2109
2110	return move;
2111}
2112
2113/* ------------------------------------------------------------------------ */
2114/* Function:    nat_new                                                     */
2115/* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2116/*                       else pointer to new NAT structure                  */
2117/* Parameters:  fin(I)       - pointer to packet information                */
2118/*              np(I)        - pointer to NAT rule                          */
2119/*              natsave(I)   - pointer to where to store NAT struct pointer */
2120/*              flags(I)     - flags describing the current packet          */
2121/*              direction(I) - direction of packet (in/out)                 */
2122/* Write Lock:  ipf_nat                                                     */
2123/*                                                                          */
2124/* Attempts to create a new NAT entry.  Does not actually change the packet */
2125/* in any way.                                                              */
2126/*                                                                          */
2127/* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2128/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2129/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2130/* and (3) building that structure and putting it into the NAT table(s).    */
2131/* ------------------------------------------------------------------------ */
2132nat_t *nat_new(fin, np, natsave, flags, direction)
2133fr_info_t *fin;
2134ipnat_t *np;
2135nat_t **natsave;
2136u_int flags;
2137int direction;
2138{
2139	u_short port = 0, sport = 0, dport = 0, nport = 0;
2140	tcphdr_t *tcp = NULL;
2141	hostmap_t *hm = NULL;
2142	struct in_addr in;
2143	nat_t *nat, *natl;
2144	u_int nflags;
2145	natinfo_t ni;
2146	u_32_t sumd;
2147	int move;
2148#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2149	qpktinfo_t *qpi = fin->fin_qpi;
2150#endif
2151
2152	if (nat_stats.ns_inuse >= ipf_nattable_max) {
2153		nat_stats.ns_memfail++;
2154		return NULL;
2155	}
2156
2157	move = 1;
2158	nflags = np->in_flags & flags;
2159	nflags &= NAT_FROMRULE;
2160
2161	ni.nai_np = np;
2162	ni.nai_nflags = nflags;
2163	ni.nai_flags = flags;
2164
2165	/* Give me a new nat */
2166	KMALLOC(nat, nat_t *);
2167	if (nat == NULL) {
2168		nat_stats.ns_memfail++;
2169		/*
2170		 * Try to automatically tune the max # of entries in the
2171		 * table allowed to be less than what will cause kmem_alloc()
2172		 * to fail and try to eliminate panics due to out of memory
2173		 * conditions arising.
2174		 */
2175		if (ipf_nattable_max > ipf_nattable_sz) {
2176			ipf_nattable_max = nat_stats.ns_inuse - 100;
2177			printf("ipf_nattable_max reduced to %d\n",
2178				ipf_nattable_max);
2179		}
2180		return NULL;
2181	}
2182
2183	if (flags & IPN_TCPUDP) {
2184		tcp = fin->fin_dp;
2185		ni.nai_sport = htons(fin->fin_sport);
2186		ni.nai_dport = htons(fin->fin_dport);
2187	} else if (flags & IPN_ICMPQUERY) {
2188		/*
2189		 * In the ICMP query NAT code, we translate the ICMP id fields
2190		 * to make them unique. This is indepedent of the ICMP type
2191		 * (e.g. in the unlikely event that a host sends an echo and
2192		 * an tstamp request with the same id, both packets will have
2193		 * their ip address/id field changed in the same way).
2194		 */
2195		/* The icmp_id field is used by the sender to identify the
2196		 * process making the icmp request. (the receiver justs
2197		 * copies it back in its response). So, it closely matches
2198		 * the concept of source port. We overlay sport, so we can
2199		 * maximally reuse the existing code.
2200		 */
2201		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2202		ni.nai_dport = ni.nai_sport;
2203	}
2204
2205	bzero((char *)nat, sizeof(*nat));
2206	nat->nat_flags = flags;
2207
2208	if ((flags & NAT_SLAVE) == 0) {
2209		MUTEX_ENTER(&ipf_nat_new);
2210	}
2211
2212	/*
2213	 * Search the current table for a match.
2214	 */
2215	if (direction == NAT_OUTBOUND) {
2216		/*
2217		 * We can now arrange to call this for the same connection
2218		 * because ipf_nat_new doesn't protect the code path into
2219		 * this function.
2220		 */
2221		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2222				     fin->fin_src, fin->fin_dst);
2223		if (natl != NULL) {
2224			nat = natl;
2225			goto done;
2226		}
2227
2228		move = nat_newmap(fin, nat, &ni);
2229		if (move == -1)
2230			goto badnat;
2231
2232		np = ni.nai_np;
2233		in = ni.nai_ip;
2234	} else {
2235		/*
2236		 * NAT_INBOUND is used only for redirects rules
2237		 */
2238		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2239				    fin->fin_src, fin->fin_dst);
2240		if (natl != NULL) {
2241			nat = natl;
2242			goto done;
2243		}
2244
2245		move = nat_newrdr(fin, nat, &ni);
2246		if (move == -1)
2247			goto badnat;
2248
2249		np = ni.nai_np;
2250		in = ni.nai_ip;
2251	}
2252	port = ni.nai_port;
2253	nport = ni.nai_nport;
2254
2255	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2256		if (np->in_redir == NAT_REDIRECT) {
2257			nat_delrdr(np);
2258			nat_addrdr(np);
2259		} else if (np->in_redir == NAT_MAP) {
2260			nat_delnat(np);
2261			nat_addnat(np);
2262		}
2263	}
2264
2265	if (flags & IPN_TCPUDP) {
2266		sport = ni.nai_sport;
2267		dport = ni.nai_dport;
2268	} else if (flags & IPN_ICMPQUERY) {
2269		sport = ni.nai_sport;
2270		dport = 0;
2271	}
2272
2273	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2274	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2275#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2276	if ((flags & IPN_TCP) && dohwcksum &&
2277	    (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2278		if (direction == NAT_OUTBOUND)
2279			ni.nai_sum1 = LONG_SUM(in.s_addr);
2280		else
2281			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2282		ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2283		ni.nai_sum1 += 30;
2284		ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2285		nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2286	} else
2287#endif
2288		nat->nat_sumd[1] = nat->nat_sumd[0];
2289
2290	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2291		if (direction == NAT_OUTBOUND)
2292			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2293		else
2294			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2295
2296		ni.nai_sum2 = LONG_SUM(in.s_addr);
2297
2298		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2299		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2300	} else {
2301		nat->nat_ipsumd = nat->nat_sumd[0];
2302		if (!(flags & IPN_TCPUDPICMP)) {
2303			nat->nat_sumd[0] = 0;
2304			nat->nat_sumd[1] = 0;
2305		}
2306	}
2307
2308	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2309		goto badnat;
2310	}
2311	if (flags & SI_WILDP)
2312		nat_stats.ns_wilds++;
2313	goto done;
2314badnat:
2315	nat_stats.ns_badnat++;
2316	if ((hm = nat->nat_hm) != NULL)
2317		nat_hostmapdel(hm);
2318	KFREE(nat);
2319	nat = NULL;
2320done:
2321	if ((flags & NAT_SLAVE) == 0) {
2322		MUTEX_EXIT(&ipf_nat_new);
2323	}
2324	return nat;
2325}
2326
2327
2328/* ------------------------------------------------------------------------ */
2329/* Function:    nat_finalise                                                */
2330/* Returns:     int - 0 == sucess, -1 == failure                            */
2331/* Parameters:  fin(I) - pointer to packet information                      */
2332/*              nat(I) - pointer to NAT entry                               */
2333/*              ni(I)  - pointer to structure with misc. information needed */
2334/*                       to create new NAT entry.                           */
2335/* Write Lock:  ipf_nat                                                     */
2336/*                                                                          */
2337/* This is the tail end of constructing a new NAT entry and is the same     */
2338/* for both IPv4 and IPv6.                                                  */
2339/* ------------------------------------------------------------------------ */
2340/*ARGSUSED*/
2341static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2342fr_info_t *fin;
2343nat_t *nat;
2344natinfo_t *ni;
2345tcphdr_t *tcp;
2346nat_t **natsave;
2347int direction;
2348{
2349	frentry_t *fr;
2350	ipnat_t *np;
2351
2352	np = ni->nai_np;
2353
2354	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0]);
2355#ifdef	IPFILTER_SYNC
2356	if ((nat->nat_flags & SI_CLONE) == 0)
2357		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2358#endif
2359
2360	nat->nat_me = natsave;
2361	nat->nat_dir = direction;
2362	nat->nat_ifps[0] = fin->fin_ifp;
2363	nat->nat_ptr = np;
2364	nat->nat_p = fin->fin_p;
2365	nat->nat_mssclamp = np->in_mssclamp;
2366	fr = fin->fin_fr;
2367	nat->nat_fr = fr;
2368
2369	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2370		if (appr_new(fin, nat) == -1)
2371			return -1;
2372
2373	if (nat_insert(nat, fin->fin_rev) == 0) {
2374		if (nat_logging)
2375			nat_log(nat, (u_int)np->in_redir);
2376		np->in_use++;
2377		if (fr != NULL) {
2378			MUTEX_ENTER(&fr->fr_lock);
2379			fr->fr_ref++;
2380			MUTEX_EXIT(&fr->fr_lock);
2381		}
2382		return 0;
2383	}
2384
2385	/*
2386	 * nat_insert failed, so cleanup time...
2387	 */
2388	return -1;
2389}
2390
2391
2392/* ------------------------------------------------------------------------ */
2393/* Function:   nat_insert                                                   */
2394/* Returns:    int - 0 == sucess, -1 == failure                             */
2395/* Parameters: nat(I) - pointer to NAT structure                            */
2396/*             rev(I) - flag indicating forward/reverse direction of packet */
2397/* Write Lock: ipf_nat                                                      */
2398/*                                                                          */
2399/* Insert a NAT entry into the hash tables for searching and add it to the  */
2400/* list of active NAT entries.  Adjust global counters when complete.       */
2401/* ------------------------------------------------------------------------ */
2402int	nat_insert(nat, rev)
2403nat_t	*nat;
2404int	rev;
2405{
2406	u_int hv1, hv2;
2407	nat_t **natp;
2408
2409	/*
2410	 * Try and return an error as early as possible, so calculate the hash
2411	 * entry numbers first and then proceed.
2412	 */
2413	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2414		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2415				  0xffffffff);
2416		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2417				  ipf_nattable_sz);
2418		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2419				  0xffffffff);
2420		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2421				  ipf_nattable_sz);
2422	} else {
2423		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2424		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2425		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2426		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2427	}
2428
2429	if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2430	    nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2431		return -1;
2432	}
2433
2434	nat->nat_hv[0] = hv1;
2435	nat->nat_hv[1] = hv2;
2436
2437	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2438
2439	nat->nat_rev = rev;
2440	nat->nat_ref = 1;
2441	nat->nat_bytes[0] = 0;
2442	nat->nat_pkts[0] = 0;
2443	nat->nat_bytes[1] = 0;
2444	nat->nat_pkts[1] = 0;
2445
2446	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2447	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2448
2449	if (nat->nat_ifnames[1][0] !='\0') {
2450		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2451		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2452	} else {
2453		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2454			       LIFNAMSIZ);
2455		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2456		nat->nat_ifps[1] = nat->nat_ifps[0];
2457	}
2458
2459	nat->nat_next = nat_instances;
2460	nat->nat_pnext = &nat_instances;
2461	if (nat_instances)
2462		nat_instances->nat_pnext = &nat->nat_next;
2463	nat_instances = nat;
2464
2465	natp = &nat_table[0][hv1];
2466	if (*natp)
2467		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2468	nat->nat_phnext[0] = natp;
2469	nat->nat_hnext[0] = *natp;
2470	*natp = nat;
2471	nat_stats.ns_bucketlen[0][hv1]++;
2472
2473	natp = &nat_table[1][hv2];
2474	if (*natp)
2475		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2476	nat->nat_phnext[1] = natp;
2477	nat->nat_hnext[1] = *natp;
2478	*natp = nat;
2479	nat_stats.ns_bucketlen[1][hv2]++;
2480
2481	fr_setnatqueue(nat, rev);
2482
2483	nat_stats.ns_added++;
2484	nat_stats.ns_inuse++;
2485	return 0;
2486}
2487
2488
2489/* ------------------------------------------------------------------------ */
2490/* Function:    nat_icmperrorlookup                                         */
2491/* Returns:     nat_t* - point to matching NAT structure                    */
2492/* Parameters:  fin(I) - pointer to packet information                      */
2493/*              dir(I) - direction of packet (in/out)                       */
2494/*                                                                          */
2495/* Check if the ICMP error message is related to an existing TCP, UDP or    */
2496/* ICMP query nat entry.  It is assumed that the packet is already of the   */
2497/* the required length.                                                     */
2498/* ------------------------------------------------------------------------ */
2499nat_t *nat_icmperrorlookup(fin, dir)
2500fr_info_t *fin;
2501int dir;
2502{
2503	int flags = 0, type, minlen;
2504	icmphdr_t *icmp, *orgicmp;
2505	tcphdr_t *tcp = NULL;
2506	u_short data[2];
2507	nat_t *nat;
2508	ip_t *oip;
2509	u_int p;
2510
2511	icmp = fin->fin_dp;
2512	type = icmp->icmp_type;
2513	/*
2514	 * Does it at least have the return (basic) IP header ?
2515	 * Only a basic IP header (no options) should be with an ICMP error
2516	 * header.  Also, if it's not an error type, then return.
2517	 */
2518	if ((fin->fin_hlen != sizeof(ip_t)) ||
2519	    !fr_icmp4errortype(type))
2520		return NULL;
2521
2522	/*
2523	 * Check packet size
2524	 */
2525	oip = (ip_t *)((char *)fin->fin_dp + 8);
2526	minlen = IP_HL(oip) << 2;
2527	if ((minlen < sizeof(ip_t)) ||
2528	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2529		return NULL;
2530	/*
2531	 * Is the buffer big enough for all of it ?  It's the size of the IP
2532	 * header claimed in the encapsulated part which is of concern.  It
2533	 * may be too big to be in this buffer but not so big that it's
2534	 * outside the ICMP packet, leading to TCP deref's causing problems.
2535	 * This is possible because we don't know how big oip_hl is when we
2536	 * do the pullup early in fr_check() and thus can't gaurantee it is
2537	 * all here now.
2538	 */
2539#ifdef  _KERNEL
2540	{
2541	mb_t *m;
2542
2543	m = fin->fin_m;
2544# if defined(MENTAT)
2545	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2546		return NULL;
2547# else
2548	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2549	    (char *)fin->fin_ip + M_LEN(m))
2550		return NULL;
2551# endif
2552	}
2553#endif
2554
2555	if (fin->fin_daddr != oip->ip_src.s_addr)
2556		return NULL;
2557
2558	p = oip->ip_p;
2559	if (p == IPPROTO_TCP)
2560		flags = IPN_TCP;
2561	else if (p == IPPROTO_UDP)
2562		flags = IPN_UDP;
2563	else if (p == IPPROTO_ICMP) {
2564		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2565
2566		/* see if this is related to an ICMP query */
2567		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2568			data[0] = fin->fin_data[0];
2569			data[1] = fin->fin_data[1];
2570			fin->fin_data[0] = 0;
2571			fin->fin_data[1] = orgicmp->icmp_id;
2572
2573			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2574			/*
2575			 * NOTE : dir refers to the direction of the original
2576			 *        ip packet. By definition the icmp error
2577			 *        message flows in the opposite direction.
2578			 */
2579			if (dir == NAT_INBOUND)
2580				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2581						   oip->ip_src);
2582			else
2583				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2584						    oip->ip_src);
2585			fin->fin_data[0] = data[0];
2586			fin->fin_data[1] = data[1];
2587			return nat;
2588		}
2589	}
2590
2591	if (flags & IPN_TCPUDP) {
2592		minlen += 8;		/* + 64bits of data to get ports */
2593		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2594			return NULL;
2595
2596		data[0] = fin->fin_data[0];
2597		data[1] = fin->fin_data[1];
2598		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2599		fin->fin_data[0] = ntohs(tcp->th_dport);
2600		fin->fin_data[1] = ntohs(tcp->th_sport);
2601
2602		if (dir == NAT_INBOUND) {
2603			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2604					   oip->ip_src);
2605		} else {
2606			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2607					    oip->ip_src);
2608		}
2609		fin->fin_data[0] = data[0];
2610		fin->fin_data[1] = data[1];
2611		return nat;
2612	}
2613	if (dir == NAT_INBOUND)
2614		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2615	else
2616		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2617}
2618
2619
2620/* ------------------------------------------------------------------------ */
2621/* Function:    nat_icmperror                                               */
2622/* Returns:     nat_t* - point to matching NAT structure                    */
2623/* Parameters:  fin(I)    - pointer to packet information                   */
2624/*              nflags(I) - NAT flags for this packet                       */
2625/*              dir(I)    - direction of packet (in/out)                    */
2626/*                                                                          */
2627/* Fix up an ICMP packet which is an error message for an existing NAT      */
2628/* session.  This will correct both packet header data and checksums.       */
2629/*                                                                          */
2630/* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2631/* a NAT'd ICMP packet gets correctly recognised.                           */
2632/* ------------------------------------------------------------------------ */
2633nat_t *nat_icmperror(fin, nflags, dir)
2634fr_info_t *fin;
2635u_int *nflags;
2636int dir;
2637{
2638	u_32_t sum1, sum2, sumd, sumd2;
2639	struct in_addr in;
2640	icmphdr_t *icmp;
2641	int flags, dlen;
2642	u_short *csump;
2643	tcphdr_t *tcp;
2644	nat_t *nat;
2645	ip_t *oip;
2646	void *dp;
2647
2648	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2649		return NULL;
2650	/*
2651	 * nat_icmperrorlookup() will return NULL for `defective' packets.
2652	 */
2653	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2654		return NULL;
2655
2656	tcp = NULL;
2657	csump = NULL;
2658	flags = 0;
2659	sumd2 = 0;
2660	*nflags = IPN_ICMPERR;
2661	icmp = fin->fin_dp;
2662	oip = (ip_t *)&icmp->icmp_ip;
2663	dp = (((char *)oip) + (IP_HL(oip) << 2));
2664	if (oip->ip_p == IPPROTO_TCP) {
2665		tcp = (tcphdr_t *)dp;
2666		csump = (u_short *)&tcp->th_sum;
2667		flags = IPN_TCP;
2668	} else if (oip->ip_p == IPPROTO_UDP) {
2669		udphdr_t *udp;
2670
2671		udp = (udphdr_t *)dp;
2672		tcp = (tcphdr_t *)dp;
2673		csump = (u_short *)&udp->uh_sum;
2674		flags = IPN_UDP;
2675	} else if (oip->ip_p == IPPROTO_ICMP)
2676		flags = IPN_ICMPQUERY;
2677	dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2678
2679	/*
2680	 * Need to adjust ICMP header to include the real IP#'s and
2681	 * port #'s.  Only apply a checksum change relative to the
2682	 * IP address change as it will be modified again in fr_checknatout
2683	 * for both address and port.  Two checksum changes are
2684	 * necessary for the two header address changes.  Be careful
2685	 * to only modify the checksum once for the port # and twice
2686	 * for the IP#.
2687	 */
2688
2689	/*
2690	 * Step 1
2691	 * Fix the IP addresses in the offending IP packet. You also need
2692	 * to adjust the IP header checksum of that offending IP packet
2693	 * and the ICMP checksum of the ICMP error message itself.
2694	 *
2695	 * Unfortunately, for UDP and TCP, the IP addresses are also contained
2696	 * in the pseudo header that is used to compute the UDP resp. TCP
2697	 * checksum. So, we must compensate that as well. Even worse, the
2698	 * change in the UDP and TCP checksums require yet another
2699	 * adjustment of the ICMP checksum of the ICMP error message.
2700	 */
2701
2702	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
2703		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
2704		in = nat->nat_inip;
2705		oip->ip_src = in;
2706	} else {
2707		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
2708		in = nat->nat_outip;
2709		oip->ip_dst = in;
2710	}
2711
2712	sum2 = LONG_SUM(ntohl(in.s_addr));
2713
2714	CALC_SUMD(sum1, sum2, sumd);
2715
2716	/*
2717	 * Fix IP checksum of the offending IP packet to adjust for
2718	 * the change in the IP address.
2719	 *
2720	 * Normally, you would expect that the ICMP checksum of the
2721	 * ICMP error message needs to be adjusted as well for the
2722	 * IP address change in oip.
2723	 * However, this is a NOP, because the ICMP checksum is
2724	 * calculated over the complete ICMP packet, which includes the
2725	 * changed oip IP addresses and oip->ip_sum. However, these
2726	 * two changes cancel each other out (if the delta for
2727	 * the IP address is x, then the delta for ip_sum is minus x),
2728	 * so no change in the icmp_cksum is necessary.
2729	 *
2730	 * Be careful that nat_dir refers to the direction of the
2731	 * offending IP packet (oip), not to its ICMP response (icmp)
2732	 */
2733	fix_datacksum(&oip->ip_sum, sumd);
2734	/* Fix icmp cksum : IP Addr + Cksum */
2735	sumd2 = (sumd >> 16);
2736
2737	/*
2738	 * Fix UDP pseudo header checksum to compensate for the
2739	 * IP address change.
2740	 */
2741	if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && (*csump != 0)) {
2742		/*
2743		 * The UDP checksum is optional, only adjust it
2744		 * if it has been set.
2745		 */
2746		sum1 = ntohs(*csump);
2747		fix_datacksum(csump, sumd);
2748		sum2 = ntohs(*csump);
2749
2750		/*
2751		 * Fix ICMP checksum to compensate the UDP
2752		 * checksum adjustment.
2753		 */
2754		sumd2 = sumd << 1;
2755		CALC_SUMD(sum1, sum2, sumd);
2756		sumd2 += sumd;
2757	}
2758
2759	/*
2760	 * Fix TCP pseudo header checksum to compensate for the
2761	 * IP address change. Before we can do the change, we
2762	 * must make sure that oip is sufficient large to hold
2763	 * the TCP checksum (normally it does not!).
2764	 * 18 = offsetof(tcphdr_t, th_sum) + 2
2765	 */
2766	else if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
2767		sum1 = ntohs(*csump);
2768		fix_datacksum(csump, sumd);
2769		sum2 = ntohs(*csump);
2770
2771		/*
2772		 * Fix ICMP checksum to compensate the TCP
2773		 * checksum adjustment.
2774		 */
2775		sumd2 = sumd << 1;
2776		CALC_SUMD(sum1, sum2, sumd);
2777		sumd2 += sumd;
2778	} else {
2779		if (nat->nat_dir == NAT_OUTBOUND)
2780			sumd2 = ~sumd2;
2781		else
2782			sumd2 = ~sumd2 + 1;
2783	}
2784
2785	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2786		int mode = 0;
2787
2788		/*
2789		 * Step 2 :
2790		 * For offending TCP/UDP IP packets, translate the ports as
2791		 * well, based on the NAT specification. Of course such
2792		 * a change must be reflected in the ICMP checksum as well.
2793		 *
2794		 * Advance notice : Now it becomes complicated :-)
2795		 *
2796		 * Since the port fields are part of the TCP/UDP checksum
2797		 * of the offending IP packet, you need to adjust that checksum
2798		 * as well... but, if you change, you must change the icmp
2799		 * checksum *again*, to reflect that change.
2800		 *
2801		 * To further complicate: the TCP checksum is not in the first
2802		 * 8 bytes of the offending ip packet, so it most likely is not
2803		 * available. Some OSses like Solaris return enough bytes to
2804		 * include the TCP checksum. So we have to check if the
2805		 * ip->ip_len actually holds the TCP checksum of the oip!
2806		 */
2807
2808		if (nat->nat_oport == tcp->th_dport) {
2809			if (tcp->th_sport != nat->nat_inport) {
2810				mode = 1;
2811				sum1 = ntohs(nat->nat_inport);
2812				sum2 = ntohs(tcp->th_sport);
2813			}
2814		} else if (tcp->th_sport == nat->nat_oport) {
2815			mode = 2;
2816			sum1 = ntohs(nat->nat_outport);
2817			sum2 = ntohs(tcp->th_dport);
2818		}
2819
2820		if (mode == 1) {
2821			/*
2822			 * Fix ICMP checksum to compensate port adjustment.
2823			 */
2824			tcp->th_sport = htons(sum1);
2825
2826			/*
2827			 * Fix udp checksum to compensate port adjustment.
2828			 * NOTE : the offending IP packet flows the other
2829			 * direction compared to the ICMP message.
2830			 *
2831			 * The UDP checksum is optional, only adjust it if
2832			 * it has been set.
2833			 */
2834			if ((oip->ip_p == IPPROTO_UDP) &&
2835			    (dlen >= 8) && (*csump != 0)) {
2836				sumd = sum1 - sum2;
2837				sumd2 += sumd;
2838
2839				sum1 = ntohs(*csump);
2840				fix_datacksum(csump, sumd);
2841				sum2 = ntohs(*csump);
2842
2843				/*
2844				 * Fix ICMP checksum to compenstate
2845				 * UDP checksum adjustment.
2846				 */
2847				CALC_SUMD(sum1, sum2, sumd);
2848				sumd2 += sumd;
2849			}
2850
2851			/*
2852			 * Fix TCP checksum (if present) to compensate port
2853			 * adjustment. NOTE : the offending IP packet flows
2854			 * the other direction compared to the ICMP message.
2855			 */
2856			if (oip->ip_p == IPPROTO_TCP) {
2857				if (dlen >= 18) {
2858					sumd = sum1 - sum2;
2859					sumd2 += sumd;
2860
2861					sum1 = ntohs(*csump);
2862					fix_datacksum(csump, sumd);
2863					sum2 = ntohs(*csump);
2864
2865					/*
2866					 * Fix ICMP checksum to compensate
2867					 * TCP checksum adjustment.
2868					 */
2869					CALC_SUMD(sum1, sum2, sumd);
2870					sumd2 += sumd;
2871				} else {
2872					sumd = sum2 - sum1 + 1;
2873					sumd2 += sumd;
2874				}
2875			}
2876		} else if (mode == 2) {
2877			/*
2878			 * Fix ICMP checksum to compensate port adjustment.
2879			 */
2880			tcp->th_dport = htons(sum1);
2881
2882			/*
2883			 * Fix UDP checksum to compensate port adjustment.
2884			 * NOTE : the offending IP packet flows the other
2885			 * direction compared to the ICMP message.
2886			 *
2887			 * The UDP checksum is optional, only adjust
2888			 * it if it has been set.
2889			 */
2890			if ((oip->ip_p == IPPROTO_UDP) &&
2891			    (dlen >= 8) && (*csump != 0)) {
2892				sumd = sum1 - sum2;
2893				sumd2 += sumd;
2894
2895				sum1 = ntohs(*csump);
2896				fix_datacksum(csump, sumd);
2897				sum2 = ntohs(*csump);
2898
2899				/*
2900				 * Fix ICMP checksum to compensate
2901				 * UDP checksum adjustment.
2902				 */
2903				CALC_SUMD(sum1, sum2, sumd);
2904				sumd2 += sumd;
2905			}
2906
2907			/*
2908			 * Fix TCP checksum (if present) to compensate port
2909			 * adjustment. NOTE : the offending IP packet flows
2910			 * the other direction compared to the ICMP message.
2911			 */
2912			if (oip->ip_p == IPPROTO_TCP) {
2913				if (dlen >= 18) {
2914					sumd = sum1 - sum2;
2915					sumd2 += sumd;
2916
2917					sum1 = ntohs(*csump);
2918					fix_datacksum(csump, sumd);
2919					sum2 = ntohs(*csump);
2920
2921					/*
2922					 * Fix ICMP checksum to compensate
2923					 * TCP checksum adjustment.
2924					 */
2925					CALC_SUMD(sum1, sum2, sumd);
2926					sumd2 += sumd;
2927				} else {
2928					if (nat->nat_dir == NAT_INBOUND)
2929						sumd = sum2 - sum1;
2930					else
2931						sumd = sum2 - sum1 + 1;
2932					sumd2 += sumd;
2933				}
2934			}
2935		}
2936		if (sumd2 != 0) {
2937			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2938			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2939			fix_incksum(fin, &icmp->icmp_cksum, sumd2);
2940		}
2941	} else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
2942		icmphdr_t *orgicmp;
2943
2944		/*
2945		 * XXX - what if this is bogus hl and we go off the end ?
2946		 * In this case, nat_icmperrorlookup() will have returned NULL.
2947		 */
2948		orgicmp = (icmphdr_t *)dp;
2949
2950		if (nat->nat_dir == NAT_OUTBOUND) {
2951			if (orgicmp->icmp_id != nat->nat_inport) {
2952
2953				/*
2954				 * Fix ICMP checksum (of the offening ICMP
2955				 * query packet) to compensate the change
2956				 * in the ICMP id of the offending ICMP
2957				 * packet.
2958				 *
2959				 * Since you modify orgicmp->icmp_id with
2960				 * a delta (say x) and you compensate that
2961				 * in origicmp->icmp_cksum with a delta
2962				 * minus x, you don't have to adjust the
2963				 * overall icmp->icmp_cksum
2964				 */
2965				sum1 = ntohs(orgicmp->icmp_id);
2966				sum2 = ntohs(nat->nat_inport);
2967				CALC_SUMD(sum1, sum2, sumd);
2968				orgicmp->icmp_id = nat->nat_inport;
2969				fix_datacksum(&orgicmp->icmp_cksum, sumd);
2970			}
2971		} /* nat_dir == NAT_INBOUND is impossible for icmp queries */
2972	}
2973	return nat;
2974}
2975
2976
2977/*
2978 * NB: these lookups don't lock access to the list, it assumed that it has
2979 * already been done!
2980 */
2981
2982/* ------------------------------------------------------------------------ */
2983/* Function:    nat_inlookup                                                */
2984/* Returns:     nat_t* - NULL == no match,                                  */
2985/*                       else pointer to matching NAT entry                 */
2986/* Parameters:  fin(I)    - pointer to packet information                   */
2987/*              flags(I)  - NAT flags for this packet                       */
2988/*              p(I)      - protocol for this packet                        */
2989/*              src(I)    - source IP address                               */
2990/*              mapdst(I) - destination IP address                          */
2991/*                                                                          */
2992/* Lookup a nat entry based on the mapped destination ip address/port and   */
2993/* real source address/port.  We use this lookup when receiving a packet,   */
2994/* we're looking for a table entry, based on the destination address.       */
2995/*                                                                          */
2996/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
2997/*                                                                          */
2998/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
2999/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3000/*                                                                          */
3001/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3002/*            the packet is of said protocol                                */
3003/* ------------------------------------------------------------------------ */
3004nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3005fr_info_t *fin;
3006u_int flags, p;
3007struct in_addr src , mapdst;
3008{
3009	u_short sport, dport;
3010	grehdr_t *gre;
3011	ipnat_t *ipn;
3012	u_int sflags;
3013	nat_t *nat;
3014	int nflags;
3015	u_32_t dst;
3016	void *ifp;
3017	u_int hv;
3018
3019	if (fin != NULL)
3020		ifp = fin->fin_ifp;
3021	else
3022		ifp = NULL;
3023	sport = 0;
3024	dport = 0;
3025	gre = NULL;
3026	dst = mapdst.s_addr;
3027	sflags = flags & NAT_TCPUDPICMP;
3028
3029	switch (p)
3030	{
3031	case IPPROTO_TCP :
3032	case IPPROTO_UDP :
3033		sport = htons(fin->fin_data[0]);
3034		dport = htons(fin->fin_data[1]);
3035		break;
3036	case IPPROTO_ICMP :
3037		if (flags & IPN_ICMPERR)
3038			sport = fin->fin_data[1];
3039		else
3040			dport = fin->fin_data[1];
3041		break;
3042	default :
3043		break;
3044	}
3045
3046
3047	if ((flags & SI_WILDP) != 0)
3048		goto find_in_wild_ports;
3049
3050	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3051	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3052	nat = nat_table[1][hv];
3053	for (; nat; nat = nat->nat_hnext[1]) {
3054		nflags = nat->nat_flags;
3055
3056		if (ifp != NULL) {
3057			if (nat->nat_dir == NAT_REDIRECT) {
3058				if (ifp != nat->nat_ifps[0])
3059					continue;
3060			} else {
3061				if (ifp != nat->nat_ifps[1])
3062					continue;
3063			}
3064		}
3065
3066		if (nat->nat_oip.s_addr == src.s_addr &&
3067		    nat->nat_outip.s_addr == dst &&
3068		    (((p == 0) &&
3069		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3070		     || (p == nat->nat_p))) {
3071			switch (p)
3072			{
3073#if 0
3074			case IPPROTO_GRE :
3075				if (nat->nat_call[1] != fin->fin_data[0])
3076					continue;
3077				break;
3078#endif
3079			case IPPROTO_ICMP :
3080				if ((flags & IPN_ICMPERR) != 0) {
3081					if (nat->nat_outport != sport)
3082						continue;
3083				} else {
3084					if (nat->nat_outport != dport)
3085						continue;
3086				}
3087				break;
3088			case IPPROTO_TCP :
3089			case IPPROTO_UDP :
3090				if (nat->nat_oport != sport)
3091					continue;
3092				if (nat->nat_outport != dport)
3093					continue;
3094				break;
3095			default :
3096				break;
3097			}
3098
3099			ipn = nat->nat_ptr;
3100			if ((ipn != NULL) && (nat->nat_aps != NULL))
3101				if (appr_match(fin, nat) != 0)
3102					continue;
3103			return nat;
3104		}
3105	}
3106
3107	/*
3108	 * So if we didn't find it but there are wildcard members in the hash
3109	 * table, go back and look for them.  We do this search and update here
3110	 * because it is modifying the NAT table and we want to do this only
3111	 * for the first packet that matches.  The exception, of course, is
3112	 * for "dummy" (FI_IGNORE) lookups.
3113	 */
3114find_in_wild_ports:
3115	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3116		return NULL;
3117	if (nat_stats.ns_wilds == 0)
3118		return NULL;
3119
3120	RWLOCK_EXIT(&ipf_nat);
3121
3122	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3123	hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3124
3125	WRITE_ENTER(&ipf_nat);
3126
3127	nat = nat_table[1][hv];
3128	for (; nat; nat = nat->nat_hnext[1]) {
3129		if (ifp != NULL) {
3130			if (nat->nat_dir == NAT_REDIRECT) {
3131				if (ifp != nat->nat_ifps[0])
3132					continue;
3133			} else {
3134				if (ifp != nat->nat_ifps[1])
3135					continue;
3136			}
3137		}
3138
3139		if (nat->nat_p != fin->fin_p)
3140			continue;
3141		if (nat->nat_oip.s_addr != src.s_addr ||
3142		    nat->nat_outip.s_addr != dst)
3143			continue;
3144
3145		nflags = nat->nat_flags;
3146		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3147			continue;
3148
3149		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3150			       NAT_INBOUND) == 1) {
3151			if ((fin->fin_flx & FI_IGNORE) != 0)
3152				break;
3153			if ((nflags & SI_CLONE) != 0) {
3154				nat = fr_natclone(fin, nat);
3155				if (nat == NULL)
3156					break;
3157			} else {
3158				MUTEX_ENTER(&ipf_nat_new);
3159				nat_stats.ns_wilds--;
3160				MUTEX_EXIT(&ipf_nat_new);
3161			}
3162			nat->nat_oport = sport;
3163			nat->nat_outport = dport;
3164			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3165			nat_tabmove(nat);
3166			break;
3167		}
3168	}
3169
3170	MUTEX_DOWNGRADE(&ipf_nat);
3171
3172	return nat;
3173}
3174
3175
3176/* ------------------------------------------------------------------------ */
3177/* Function:    nat_tabmove                                                 */
3178/* Returns:     Nil                                                         */
3179/* Parameters:  nat(I) - pointer to NAT structure                           */
3180/* Write Lock:  ipf_nat                                                     */
3181/*                                                                          */
3182/* This function is only called for TCP/UDP NAT table entries where the     */
3183/* original was placed in the table without hashing on the ports and we now */
3184/* want to include hashing on port numbers.                                 */
3185/* ------------------------------------------------------------------------ */
3186static void nat_tabmove(nat)
3187nat_t *nat;
3188{
3189	nat_t **natp;
3190	u_int hv;
3191
3192	if (nat->nat_flags & SI_CLONE)
3193		return;
3194
3195	/*
3196	 * Remove the NAT entry from the old location
3197	 */
3198	if (nat->nat_hnext[0])
3199		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3200	*nat->nat_phnext[0] = nat->nat_hnext[0];
3201	nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3202
3203	if (nat->nat_hnext[1])
3204		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3205	*nat->nat_phnext[1] = nat->nat_hnext[1];
3206	nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3207
3208	/*
3209	 * Add into the NAT table in the new position
3210	 */
3211	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3212	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3213			 ipf_nattable_sz);
3214	nat->nat_hv[0] = hv;
3215	natp = &nat_table[0][hv];
3216	if (*natp)
3217		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3218	nat->nat_phnext[0] = natp;
3219	nat->nat_hnext[0] = *natp;
3220	*natp = nat;
3221	nat_stats.ns_bucketlen[0][hv]++;
3222
3223	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3224	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3225			 ipf_nattable_sz);
3226	nat->nat_hv[1] = hv;
3227	natp = &nat_table[1][hv];
3228	if (*natp)
3229		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3230	nat->nat_phnext[1] = natp;
3231	nat->nat_hnext[1] = *natp;
3232	*natp = nat;
3233	nat_stats.ns_bucketlen[1][hv]++;
3234}
3235
3236
3237/* ------------------------------------------------------------------------ */
3238/* Function:    nat_outlookup                                               */
3239/* Returns:     nat_t* - NULL == no match,                                  */
3240/*                       else pointer to matching NAT entry                 */
3241/* Parameters:  fin(I)   - pointer to packet information                    */
3242/*              flags(I) - NAT flags for this packet                        */
3243/*              p(I)     - protocol for this packet                         */
3244/*              src(I)   - source IP address                                */
3245/*              dst(I)   - destination IP address                           */
3246/*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3247/*                                                                          */
3248/* Lookup a nat entry based on the source 'real' ip address/port and        */
3249/* destination address/port.  We use this lookup when sending a packet out, */
3250/* we're looking for a table entry, based on the source address.            */
3251/*                                                                          */
3252/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3253/*                                                                          */
3254/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3255/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3256/*                                                                          */
3257/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3258/*            the packet is of said protocol                                */
3259/* ------------------------------------------------------------------------ */
3260nat_t *nat_outlookup(fin, flags, p, src, dst)
3261fr_info_t *fin;
3262u_int flags, p;
3263struct in_addr src , dst;
3264{
3265	u_short sport, dport;
3266	u_int sflags;
3267	ipnat_t *ipn;
3268	u_32_t srcip;
3269	nat_t *nat;
3270	int nflags;
3271	void *ifp;
3272	u_int hv;
3273
3274	ifp = fin->fin_ifp;
3275	srcip = src.s_addr;
3276	sflags = flags & IPN_TCPUDPICMP;
3277	sport = 0;
3278	dport = 0;
3279
3280	switch (p)
3281	{
3282	case IPPROTO_TCP :
3283	case IPPROTO_UDP :
3284		sport = htons(fin->fin_data[0]);
3285		dport = htons(fin->fin_data[1]);
3286		break;
3287	case IPPROTO_ICMP :
3288		if (flags & IPN_ICMPERR)
3289			sport = fin->fin_data[1];
3290		else
3291			dport = fin->fin_data[1];
3292		break;
3293	default :
3294		break;
3295	}
3296
3297	if ((flags & SI_WILDP) != 0)
3298		goto find_out_wild_ports;
3299
3300	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3301	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3302	nat = nat_table[0][hv];
3303	for (; nat; nat = nat->nat_hnext[0]) {
3304		nflags = nat->nat_flags;
3305
3306		if (ifp != NULL) {
3307			if (nat->nat_dir == NAT_REDIRECT) {
3308				if (ifp != nat->nat_ifps[1])
3309					continue;
3310			} else {
3311				if (ifp != nat->nat_ifps[0])
3312					continue;
3313			}
3314		}
3315
3316		if (nat->nat_inip.s_addr == srcip &&
3317		    nat->nat_oip.s_addr == dst.s_addr &&
3318		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3319		     || (p == nat->nat_p))) {
3320			switch (p)
3321			{
3322#if 0
3323			case IPPROTO_GRE :
3324				if (nat->nat_call[1] != fin->fin_data[0])
3325					continue;
3326				break;
3327#endif
3328			case IPPROTO_TCP :
3329			case IPPROTO_UDP :
3330				if (nat->nat_oport != dport)
3331					continue;
3332				if (nat->nat_inport != sport)
3333					continue;
3334				break;
3335			default :
3336				break;
3337			}
3338
3339			ipn = nat->nat_ptr;
3340			if ((ipn != NULL) && (nat->nat_aps != NULL))
3341				if (appr_match(fin, nat) != 0)
3342					continue;
3343			return nat;
3344		}
3345	}
3346
3347	/*
3348	 * So if we didn't find it but there are wildcard members in the hash
3349	 * table, go back and look for them.  We do this search and update here
3350	 * because it is modifying the NAT table and we want to do this only
3351	 * for the first packet that matches.  The exception, of course, is
3352	 * for "dummy" (FI_IGNORE) lookups.
3353	 */
3354find_out_wild_ports:
3355	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3356		return NULL;
3357	if (nat_stats.ns_wilds == 0)
3358		return NULL;
3359
3360	RWLOCK_EXIT(&ipf_nat);
3361
3362	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3363	hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3364
3365	WRITE_ENTER(&ipf_nat);
3366
3367	nat = nat_table[0][hv];
3368	for (; nat; nat = nat->nat_hnext[0]) {
3369		if (ifp != NULL) {
3370			if (nat->nat_dir == NAT_REDIRECT) {
3371				if (ifp != nat->nat_ifps[1])
3372					continue;
3373			} else {
3374				if (ifp != nat->nat_ifps[0])
3375					continue;
3376			}
3377		}
3378
3379		if (nat->nat_p != fin->fin_p)
3380			continue;
3381		if ((nat->nat_inip.s_addr != srcip) ||
3382		    (nat->nat_oip.s_addr != dst.s_addr))
3383			continue;
3384
3385		nflags = nat->nat_flags;
3386		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3387			continue;
3388
3389		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3390			       NAT_OUTBOUND) == 1) {
3391			if ((fin->fin_flx & FI_IGNORE) != 0)
3392				break;
3393			if ((nflags & SI_CLONE) != 0) {
3394				nat = fr_natclone(fin, nat);
3395				if (nat == NULL)
3396					break;
3397			} else {
3398				MUTEX_ENTER(&ipf_nat_new);
3399				nat_stats.ns_wilds--;
3400				MUTEX_EXIT(&ipf_nat_new);
3401			}
3402			nat->nat_inport = sport;
3403			nat->nat_oport = dport;
3404			if (nat->nat_outport == 0)
3405				nat->nat_outport = sport;
3406			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3407			nat_tabmove(nat);
3408			break;
3409		}
3410	}
3411
3412	MUTEX_DOWNGRADE(&ipf_nat);
3413
3414	return nat;
3415}
3416
3417
3418/* ------------------------------------------------------------------------ */
3419/* Function:    nat_lookupredir                                             */
3420/* Returns:     nat_t* - NULL == no match,                                  */
3421/*                       else pointer to matching NAT entry                 */
3422/* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3423/*                      entry for.                                          */
3424/*                                                                          */
3425/* Lookup the NAT tables to search for a matching redirect                  */
3426/* ------------------------------------------------------------------------ */
3427nat_t *nat_lookupredir(np)
3428natlookup_t *np;
3429{
3430	fr_info_t fi;
3431	nat_t *nat;
3432
3433	bzero((char *)&fi, sizeof(fi));
3434	if (np->nl_flags & IPN_IN) {
3435		fi.fin_data[0] = ntohs(np->nl_realport);
3436		fi.fin_data[1] = ntohs(np->nl_outport);
3437	} else {
3438		fi.fin_data[0] = ntohs(np->nl_inport);
3439		fi.fin_data[1] = ntohs(np->nl_outport);
3440	}
3441	if (np->nl_flags & IPN_TCP)
3442		fi.fin_p = IPPROTO_TCP;
3443	else if (np->nl_flags & IPN_UDP)
3444		fi.fin_p = IPPROTO_UDP;
3445	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3446		fi.fin_p = IPPROTO_ICMP;
3447
3448	/*
3449	 * We can do two sorts of lookups:
3450	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3451	 * - default: we have the `in' and `out' address, look for `real'.
3452	 */
3453	if (np->nl_flags & IPN_IN) {
3454		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3455					np->nl_realip, np->nl_outip))) {
3456			np->nl_inip = nat->nat_inip;
3457			np->nl_inport = nat->nat_inport;
3458		}
3459	} else {
3460		/*
3461		 * If nl_inip is non null, this is a lookup based on the real
3462		 * ip address. Else, we use the fake.
3463		 */
3464		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3465					 np->nl_inip, np->nl_outip))) {
3466
3467			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3468				fr_info_t fin;
3469				bzero((char *)&fin, sizeof(fin));
3470				fin.fin_p = nat->nat_p;
3471				fin.fin_data[0] = ntohs(nat->nat_outport);
3472				fin.fin_data[1] = ntohs(nat->nat_oport);
3473				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3474						 nat->nat_outip,
3475						 nat->nat_oip) != NULL) {
3476					np->nl_flags &= ~IPN_FINDFORWARD;
3477				}
3478			}
3479
3480			np->nl_realip = nat->nat_outip;
3481			np->nl_realport = nat->nat_outport;
3482		}
3483 	}
3484
3485	return nat;
3486}
3487
3488
3489/* ------------------------------------------------------------------------ */
3490/* Function:    nat_match                                                   */
3491/* Returns:     int - 0 == no match, 1 == match                             */
3492/* Parameters:  fin(I)   - pointer to packet information                    */
3493/*              np(I)    - pointer to NAT rule                              */
3494/*                                                                          */
3495/* Pull the matching of a packet against a NAT rule out of that complex     */
3496/* loop inside fr_checknatin() and lay it out properly in its own function. */
3497/* ------------------------------------------------------------------------ */
3498static int nat_match(fin, np)
3499fr_info_t *fin;
3500ipnat_t *np;
3501{
3502	frtuc_t *ft;
3503
3504	if (fin->fin_v != 4)
3505		return 0;
3506
3507	if (np->in_p && fin->fin_p != np->in_p)
3508		return 0;
3509
3510	if (fin->fin_out) {
3511		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3512			return 0;
3513		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3514		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3515			return 0;
3516		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3517		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3518			return 0;
3519	} else {
3520		if (!(np->in_redir & NAT_REDIRECT))
3521			return 0;
3522		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3523		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3524			return 0;
3525		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3526		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3527			return 0;
3528	}
3529
3530	ft = &np->in_tuc;
3531	if (!(fin->fin_flx & FI_TCPUDP) ||
3532	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3533		if (ft->ftu_scmp || ft->ftu_dcmp)
3534			return 0;
3535		return 1;
3536	}
3537
3538	return fr_tcpudpchk(fin, ft);
3539}
3540
3541
3542/* ------------------------------------------------------------------------ */
3543/* Function:    nat_update                                                  */
3544/* Returns:     Nil                                                         */
3545/* Parameters:  nat(I)    - pointer to NAT structure                        */
3546/*              np(I)     - pointer to NAT rule                             */
3547/*                                                                          */
3548/* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3549/* called with fin_rev updated - i.e. after calling nat_proto().            */
3550/* ------------------------------------------------------------------------ */
3551void nat_update(fin, nat, np)
3552fr_info_t *fin;
3553nat_t *nat;
3554ipnat_t *np;
3555{
3556	ipftq_t *ifq, *ifq2;
3557	ipftqent_t *tqe;
3558
3559	MUTEX_ENTER(&nat->nat_lock);
3560	tqe = &nat->nat_tqe;
3561	ifq = tqe->tqe_ifq;
3562
3563	/*
3564	 * We allow over-riding of NAT timeouts from NAT rules, even for
3565	 * TCP, however, if it is TCP and there is no rule timeout set,
3566	 * then do not update the timeout here.
3567	 */
3568	if (np != NULL)
3569		ifq2 = np->in_tqehead[fin->fin_rev];
3570	else
3571		ifq2 = NULL;
3572
3573	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3574		(void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3575	} else {
3576		if (ifq2 == NULL) {
3577			if (nat->nat_p == IPPROTO_UDP)
3578				ifq2 = &nat_udptq;
3579			else if (nat->nat_p == IPPROTO_ICMP)
3580				ifq2 = &nat_icmptq;
3581			else
3582				ifq2 = &nat_iptq;
3583		}
3584
3585		fr_movequeue(tqe, ifq, ifq2);
3586	}
3587	MUTEX_EXIT(&nat->nat_lock);
3588}
3589
3590
3591/* ------------------------------------------------------------------------ */
3592/* Function:    fr_checknatout                                              */
3593/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3594/*                     0 == no packet translation occurred,                 */
3595/*                     1 == packet was successfully translated.             */
3596/* Parameters:  fin(I)   - pointer to packet information                    */
3597/*              passp(I) - pointer to filtering result flags                */
3598/*                                                                          */
3599/* Check to see if an outcoming packet should be changed.  ICMP packets are */
3600/* first checked to see if they match an existing entry (if an error),      */
3601/* otherwise a search of the current NAT table is made.  If neither results */
3602/* in a match then a search for a matching NAT rule is made.  Create a new  */
3603/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3604/* packet header(s) as required.                                            */
3605/* ------------------------------------------------------------------------ */
3606int fr_checknatout(fin, passp)
3607fr_info_t *fin;
3608u_32_t *passp;
3609{
3610	struct ifnet *ifp, *sifp;
3611	icmphdr_t *icmp = NULL;
3612	tcphdr_t *tcp = NULL;
3613	int rval, natfailed;
3614	ipnat_t *np = NULL;
3615	u_int nflags = 0;
3616	u_32_t ipa, iph;
3617	int natadd = 1;
3618	frentry_t *fr;
3619	nat_t *nat;
3620
3621	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3622		return 0;
3623
3624	natfailed = 0;
3625	fr = fin->fin_fr;
3626	sifp = fin->fin_ifp;
3627	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3628	    fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1)
3629		fin->fin_ifp = fr->fr_tif.fd_ifp;
3630	ifp = fin->fin_ifp;
3631
3632	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3633		switch (fin->fin_p)
3634		{
3635		case IPPROTO_TCP :
3636			nflags = IPN_TCP;
3637			break;
3638		case IPPROTO_UDP :
3639			nflags = IPN_UDP;
3640			break;
3641		case IPPROTO_ICMP :
3642			icmp = fin->fin_dp;
3643
3644			/*
3645			 * This is an incoming packet, so the destination is
3646			 * the icmp_id and the source port equals 0
3647			 */
3648			if (nat_icmpquerytype4(icmp->icmp_type))
3649				nflags = IPN_ICMPQUERY;
3650			break;
3651		default :
3652			break;
3653		}
3654
3655		if ((nflags & IPN_TCPUDP))
3656			tcp = fin->fin_dp;
3657	}
3658
3659	ipa = fin->fin_saddr;
3660
3661	READ_ENTER(&ipf_nat);
3662
3663	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3664	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3665		/*EMPTY*/;
3666	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3667		natadd = 0;
3668	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3669				      fin->fin_src, fin->fin_dst))) {
3670		nflags = nat->nat_flags;
3671	} else {
3672		u_32_t hv, msk, nmsk;
3673
3674		/*
3675		 * If there is no current entry in the nat table for this IP#,
3676		 * create one for it (if there is a matching rule).
3677		 */
3678		RWLOCK_EXIT(&ipf_nat);
3679		msk = 0xffffffff;
3680		nmsk = nat_masks;
3681		WRITE_ENTER(&ipf_nat);
3682maskloop:
3683		iph = ipa & htonl(msk);
3684		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3685		for (np = nat_rules[hv]; np; np = np->in_mnext)
3686		{
3687			if ((np->in_ifps[0] && (np->in_ifps[0] != ifp)))
3688				continue;
3689			if (np->in_v != fin->fin_v)
3690				continue;
3691			if (np->in_p && (np->in_p != fin->fin_p))
3692				continue;
3693			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3694				continue;
3695			if (np->in_flags & IPN_FILTER) {
3696				if (!nat_match(fin, np))
3697					continue;
3698			} else if ((ipa & np->in_inmsk) != np->in_inip)
3699				continue;
3700
3701			if ((fr != NULL) &&
3702			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3703				continue;
3704
3705			if (*np->in_plabel != '\0') {
3706				if (((np->in_flags & IPN_FILTER) == 0) &&
3707				    (np->in_dport != tcp->th_dport))
3708					continue;
3709				if (appr_ok(fin, tcp, np) == 0)
3710					continue;
3711			}
3712
3713			if ((nat = nat_new(fin, np, NULL, nflags,
3714					   NAT_OUTBOUND))) {
3715				np->in_hits++;
3716				break;
3717			} else
3718				natfailed = -1;
3719		}
3720		if ((np == NULL) && (nmsk != 0)) {
3721			while (nmsk) {
3722				msk <<= 1;
3723				if (nmsk & 0x80000000)
3724					break;
3725				nmsk <<= 1;
3726			}
3727			if (nmsk != 0) {
3728				nmsk <<= 1;
3729				goto maskloop;
3730			}
3731		}
3732		MUTEX_DOWNGRADE(&ipf_nat);
3733	}
3734
3735	if (nat != NULL) {
3736		rval = fr_natout(fin, nat, natadd, nflags);
3737		if (rval == 1) {
3738			MUTEX_ENTER(&nat->nat_lock);
3739			nat->nat_ref++;
3740			MUTEX_EXIT(&nat->nat_lock);
3741			fin->fin_nat = nat;
3742		}
3743	} else
3744		rval = natfailed;
3745	RWLOCK_EXIT(&ipf_nat);
3746
3747	if (rval == -1) {
3748		if (passp != NULL)
3749			*passp = FR_BLOCK;
3750		fin->fin_flx |= FI_BADNAT;
3751	}
3752	fin->fin_ifp = sifp;
3753	return rval;
3754}
3755
3756/* ------------------------------------------------------------------------ */
3757/* Function:    fr_natout                                                   */
3758/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3759/*                     1 == packet was successfully translated.             */
3760/* Parameters:  fin(I)    - pointer to packet information                   */
3761/*              nat(I)    - pointer to NAT structure                        */
3762/*              natadd(I) - flag indicating if it is safe to add frag cache */
3763/*              nflags(I) - NAT flags set for this packet                   */
3764/*                                                                          */
3765/* Translate a packet coming "out" on an interface.                         */
3766/* ------------------------------------------------------------------------ */
3767int fr_natout(fin, nat, natadd, nflags)
3768fr_info_t *fin;
3769nat_t *nat;
3770int natadd;
3771u_32_t nflags;
3772{
3773	icmphdr_t *icmp;
3774	u_short *csump;
3775	tcphdr_t *tcp;
3776	ipnat_t *np;
3777	int i;
3778
3779	tcp = NULL;
3780	icmp = NULL;
3781	csump = NULL;
3782	np = nat->nat_ptr;
3783
3784	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3785		(void) fr_nat_newfrag(fin, 0, nat);
3786
3787	MUTEX_ENTER(&nat->nat_lock);
3788	nat->nat_bytes[1] += fin->fin_plen;
3789	nat->nat_pkts[1]++;
3790	MUTEX_EXIT(&nat->nat_lock);
3791
3792	/*
3793	 * Fix up checksums, not by recalculating them, but
3794	 * simply computing adjustments.
3795	 * This is only done for STREAMS based IP implementations where the
3796	 * checksum has already been calculated by IP.  In all other cases,
3797	 * IPFilter is called before the checksum needs calculating so there
3798	 * is no call to modify whatever is in the header now.
3799	 */
3800	if (fin->fin_v == 4) {
3801		if (nflags == IPN_ICMPERR) {
3802			u_32_t s1, s2, sumd;
3803
3804			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3805			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3806			CALC_SUMD(s1, s2, sumd);
3807			fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3808		}
3809#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || defined(linux)
3810		else {
3811			if (nat->nat_dir == NAT_OUTBOUND)
3812				fix_outcksum(fin, &fin->fin_ip->ip_sum,
3813					     nat->nat_ipsumd);
3814			else
3815				fix_incksum(fin, &fin->fin_ip->ip_sum,
3816					    nat->nat_ipsumd);
3817		}
3818#endif
3819	}
3820
3821	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3822		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3823			tcp = fin->fin_dp;
3824
3825			tcp->th_sport = nat->nat_outport;
3826			fin->fin_data[0] = ntohs(nat->nat_outport);
3827		}
3828
3829		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3830			icmp = fin->fin_dp;
3831			icmp->icmp_id = nat->nat_outport;
3832		}
3833
3834		csump = nat_proto(fin, nat, nflags);
3835	}
3836
3837	fin->fin_ip->ip_src = nat->nat_outip;
3838
3839	nat_update(fin, nat, np);
3840
3841	/*
3842	 * The above comments do not hold for layer 4 (or higher) checksums...
3843	 */
3844	if (csump != NULL) {
3845		if (nat->nat_dir == NAT_OUTBOUND)
3846			fix_outcksum(fin, csump, nat->nat_sumd[1]);
3847		else
3848			fix_incksum(fin, csump, nat->nat_sumd[1]);
3849	}
3850#ifdef	IPFILTER_SYNC
3851	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3852#endif
3853	/* ------------------------------------------------------------- */
3854	/* A few quick notes:						 */
3855	/*	Following are test conditions prior to calling the 	 */
3856	/*	appr_check routine.					 */
3857	/*								 */
3858	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3859	/*	with a redirect rule, we attempt to match the packet's	 */
3860	/*	source port against in_dport, otherwise	we'd compare the */
3861	/*	packet's destination.			 		 */
3862	/* ------------------------------------------------------------- */
3863	if ((np != NULL) && (np->in_apr != NULL)) {
3864		i = appr_check(fin, nat);
3865		if (i == 0)
3866			i = 1;
3867	} else
3868		i = 1;
3869	ATOMIC_INCL(nat_stats.ns_mapped[1]);
3870	fin->fin_flx |= FI_NATED;
3871	return i;
3872}
3873
3874
3875/* ------------------------------------------------------------------------ */
3876/* Function:    fr_checknatin                                               */
3877/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3878/*                     0 == no packet translation occurred,                 */
3879/*                     1 == packet was successfully translated.             */
3880/* Parameters:  fin(I)   - pointer to packet information                    */
3881/*              passp(I) - pointer to filtering result flags                */
3882/*                                                                          */
3883/* Check to see if an incoming packet should be changed.  ICMP packets are  */
3884/* first checked to see if they match an existing entry (if an error),      */
3885/* otherwise a search of the current NAT table is made.  If neither results */
3886/* in a match then a search for a matching NAT rule is made.  Create a new  */
3887/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3888/* packet header(s) as required.                                            */
3889/* ------------------------------------------------------------------------ */
3890int fr_checknatin(fin, passp)
3891fr_info_t *fin;
3892u_32_t *passp;
3893{
3894	u_int nflags, natadd;
3895	int rval, natfailed;
3896	struct ifnet *ifp;
3897	struct in_addr in;
3898	icmphdr_t *icmp;
3899	tcphdr_t *tcp;
3900	u_short dport;
3901	ipnat_t *np;
3902	nat_t *nat;
3903	u_32_t iph;
3904
3905	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3906		return 0;
3907
3908	tcp = NULL;
3909	icmp = NULL;
3910	dport = 0;
3911	natadd = 1;
3912	nflags = 0;
3913	natfailed = 0;
3914	ifp = fin->fin_ifp;
3915
3916	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3917		switch (fin->fin_p)
3918		{
3919		case IPPROTO_TCP :
3920			nflags = IPN_TCP;
3921			break;
3922		case IPPROTO_UDP :
3923			nflags = IPN_UDP;
3924			break;
3925		case IPPROTO_ICMP :
3926			icmp = fin->fin_dp;
3927
3928			/*
3929			 * This is an incoming packet, so the destination is
3930			 * the icmp_id and the source port equals 0
3931			 */
3932			if (nat_icmpquerytype4(icmp->icmp_type)) {
3933				nflags = IPN_ICMPQUERY;
3934				dport = icmp->icmp_id;
3935			} break;
3936		default :
3937			break;
3938		}
3939
3940		if ((nflags & IPN_TCPUDP)) {
3941			tcp = fin->fin_dp;
3942			dport = tcp->th_dport;
3943		}
3944	}
3945
3946	in = fin->fin_dst;
3947
3948	READ_ENTER(&ipf_nat);
3949
3950	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3951	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
3952		/*EMPTY*/;
3953	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3954		natadd = 0;
3955	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3956				     fin->fin_src, in))) {
3957		nflags = nat->nat_flags;
3958	} else {
3959		u_32_t hv, msk, rmsk;
3960
3961		RWLOCK_EXIT(&ipf_nat);
3962		rmsk = rdr_masks;
3963		msk = 0xffffffff;
3964		WRITE_ENTER(&ipf_nat);
3965		/*
3966		 * If there is no current entry in the nat table for this IP#,
3967		 * create one for it (if there is a matching rule).
3968		 */
3969maskloop:
3970		iph = in.s_addr & htonl(msk);
3971		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
3972		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
3973			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
3974				continue;
3975			if (np->in_v != fin->fin_v)
3976				continue;
3977			if (np->in_p && (np->in_p != fin->fin_p))
3978				continue;
3979			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3980				continue;
3981			if (np->in_flags & IPN_FILTER) {
3982				if (!nat_match(fin, np))
3983					continue;
3984			} else {
3985				if ((in.s_addr & np->in_outmsk) != np->in_outip)
3986					continue;
3987				if (np->in_pmin &&
3988				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
3989				     (ntohs(dport) < ntohs(np->in_pmin))))
3990					continue;
3991			}
3992
3993			if (*np->in_plabel != '\0') {
3994				if (!appr_ok(fin, tcp, np)) {
3995					continue;
3996				}
3997			}
3998
3999			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4000			if (nat != NULL) {
4001				np->in_hits++;
4002				break;
4003			} else
4004				natfailed = -1;
4005		}
4006
4007		if ((np == NULL) && (rmsk != 0)) {
4008			while (rmsk) {
4009				msk <<= 1;
4010				if (rmsk & 0x80000000)
4011					break;
4012				rmsk <<= 1;
4013			}
4014			if (rmsk != 0) {
4015				rmsk <<= 1;
4016				goto maskloop;
4017			}
4018		}
4019		MUTEX_DOWNGRADE(&ipf_nat);
4020	}
4021	if (nat != NULL) {
4022		rval = fr_natin(fin, nat, natadd, nflags);
4023		if (rval == 1) {
4024			MUTEX_ENTER(&nat->nat_lock);
4025			nat->nat_ref++;
4026			MUTEX_EXIT(&nat->nat_lock);
4027			fin->fin_nat = nat;
4028			fin->fin_state = nat->nat_state;
4029		}
4030	} else
4031		rval = natfailed;
4032	RWLOCK_EXIT(&ipf_nat);
4033
4034	if (rval == -1) {
4035		if (passp != NULL)
4036			*passp = FR_BLOCK;
4037		fin->fin_flx |= FI_BADNAT;
4038	}
4039	return rval;
4040}
4041
4042
4043/* ------------------------------------------------------------------------ */
4044/* Function:    fr_natin                                                    */
4045/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4046/*                     1 == packet was successfully translated.             */
4047/* Parameters:  fin(I)    - pointer to packet information                   */
4048/*              nat(I)    - pointer to NAT structure                        */
4049/*              natadd(I) - flag indicating if it is safe to add frag cache */
4050/*              nflags(I) - NAT flags set for this packet                   */
4051/* Locks Held:  ipf_nat (READ)                                              */
4052/*                                                                          */
4053/* Translate a packet coming "in" on an interface.                          */
4054/* ------------------------------------------------------------------------ */
4055int fr_natin(fin, nat, natadd, nflags)
4056fr_info_t *fin;
4057nat_t *nat;
4058int natadd;
4059u_32_t nflags;
4060{
4061	icmphdr_t *icmp;
4062	u_short *csump;
4063	tcphdr_t *tcp;
4064	ipnat_t *np;
4065	int i;
4066
4067	tcp = NULL;
4068	csump = NULL;
4069	np = nat->nat_ptr;
4070	fin->fin_fr = nat->nat_fr;
4071
4072	if (np != NULL) {
4073		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4074			(void) fr_nat_newfrag(fin, 0, nat);
4075
4076	/* ------------------------------------------------------------- */
4077	/* A few quick notes:						 */
4078	/*	Following are test conditions prior to calling the 	 */
4079	/*	appr_check routine.					 */
4080	/*								 */
4081	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4082	/*	with a map rule, we attempt to match the packet's	 */
4083	/*	source port against in_dport, otherwise	we'd compare the */
4084	/*	packet's destination.			 		 */
4085	/* ------------------------------------------------------------- */
4086		if (np->in_apr != NULL) {
4087			i = appr_check(fin, nat);
4088			if (i == -1) {
4089				return -1;
4090			}
4091		}
4092	}
4093
4094#ifdef	IPFILTER_SYNC
4095	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4096#endif
4097
4098	MUTEX_ENTER(&nat->nat_lock);
4099	nat->nat_bytes[0] += fin->fin_plen;
4100	nat->nat_pkts[0]++;
4101	MUTEX_EXIT(&nat->nat_lock);
4102
4103	fin->fin_ip->ip_dst = nat->nat_inip;
4104	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4105	if (nflags & IPN_TCPUDP)
4106		tcp = fin->fin_dp;
4107
4108	/*
4109	 * Fix up checksums, not by recalculating them, but
4110	 * simply computing adjustments.
4111	 * Why only do this for some platforms on inbound packets ?
4112	 * Because for those that it is done, IP processing is yet to happen
4113	 * and so the IPv4 header checksum has not yet been evaluated.
4114	 * Perhaps it should always be done for the benefit of things like
4115	 * fast forwarding (so that it doesn't need to be recomputed) but with
4116	 * header checksum offloading, perhaps it is a moot point.
4117	 */
4118#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4119     defined(__osf__) || defined(linux)
4120	if (nat->nat_dir == NAT_OUTBOUND)
4121		fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4122	else
4123		fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4124#endif
4125
4126	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4127		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4128			tcp->th_dport = nat->nat_inport;
4129			fin->fin_data[1] = ntohs(nat->nat_inport);
4130		}
4131
4132
4133		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4134			icmp = fin->fin_dp;
4135
4136			icmp->icmp_id = nat->nat_inport;
4137		}
4138
4139		csump = nat_proto(fin, nat, nflags);
4140	}
4141
4142	nat_update(fin, nat, np);
4143
4144	/*
4145	 * The above comments do not hold for layer 4 (or higher) checksums...
4146	 */
4147	if (csump != NULL) {
4148		if (nat->nat_dir == NAT_OUTBOUND)
4149			fix_incksum(fin, csump, nat->nat_sumd[0]);
4150		else
4151			fix_outcksum(fin, csump, nat->nat_sumd[0]);
4152	}
4153	ATOMIC_INCL(nat_stats.ns_mapped[0]);
4154	fin->fin_flx |= FI_NATED;
4155	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4156		fin->fin_nattag = &np->in_tag;
4157	return 1;
4158}
4159
4160
4161/* ------------------------------------------------------------------------ */
4162/* Function:    nat_proto                                                   */
4163/* Returns:     u_short* - pointer to transport header checksum to update,  */
4164/*                         NULL if the transport protocol is not recognised */
4165/*                         as needing a checksum update.                    */
4166/* Parameters:  fin(I)    - pointer to packet information                   */
4167/*              nat(I)    - pointer to NAT structure                        */
4168/*              nflags(I) - NAT flags set for this packet                   */
4169/*                                                                          */
4170/* Return the pointer to the checksum field for each protocol so understood.*/
4171/* If support for making other changes to a protocol header is required,    */
4172/* that is not strictly 'address' translation, such as clamping the MSS in  */
4173/* TCP down to a specific value, then do it from here.                      */
4174/* ------------------------------------------------------------------------ */
4175u_short *nat_proto(fin, nat, nflags)
4176fr_info_t *fin;
4177nat_t *nat;
4178u_int nflags;
4179{
4180	icmphdr_t *icmp;
4181	u_short *csump;
4182	tcphdr_t *tcp;
4183	udphdr_t *udp;
4184
4185	csump = NULL;
4186	if (fin->fin_out == 0) {
4187		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4188	} else {
4189		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4190	}
4191
4192	switch (fin->fin_p)
4193	{
4194	case IPPROTO_TCP :
4195		tcp = fin->fin_dp;
4196
4197		csump = &tcp->th_sum;
4198
4199		/*
4200		 * Do a MSS CLAMPING on a SYN packet,
4201		 * only deal IPv4 for now.
4202		 */
4203		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4204			nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4205
4206		break;
4207
4208	case IPPROTO_UDP :
4209		udp = fin->fin_dp;
4210
4211		if (udp->uh_sum)
4212			csump = &udp->uh_sum;
4213		break;
4214
4215	case IPPROTO_ICMP :
4216		icmp = fin->fin_dp;
4217
4218		if ((nflags & IPN_ICMPQUERY) != 0) {
4219			if (icmp->icmp_cksum != 0)
4220				csump = &icmp->icmp_cksum;
4221		}
4222		break;
4223	}
4224	return csump;
4225}
4226
4227
4228/* ------------------------------------------------------------------------ */
4229/* Function:    fr_natunload                                                */
4230/* Returns:     Nil                                                         */
4231/* Parameters:  Nil                                                         */
4232/*                                                                          */
4233/* Free all memory used by NAT structures allocated at runtime.             */
4234/* ------------------------------------------------------------------------ */
4235void fr_natunload()
4236{
4237	ipftq_t *ifq, *ifqnext;
4238
4239	(void) nat_clearlist();
4240	(void) nat_flushtable();
4241
4242	/*
4243	 * Proxy timeout queues are not cleaned here because although they
4244	 * exist on the NAT list, appr_unload is called after fr_natunload
4245	 * and the proxies actually are responsible for them being created.
4246	 * Should the proxy timeouts have their own list?  There's no real
4247	 * justification as this is the only complication.
4248	 */
4249	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4250		ifqnext = ifq->ifq_next;
4251		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4252		    (fr_deletetimeoutqueue(ifq) == 0))
4253			fr_freetimeoutqueue(ifq);
4254	}
4255
4256	if (nat_table[0] != NULL) {
4257		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4258		nat_table[0] = NULL;
4259	}
4260	if (nat_table[1] != NULL) {
4261		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4262		nat_table[1] = NULL;
4263	}
4264	if (nat_rules != NULL) {
4265		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4266		nat_rules = NULL;
4267	}
4268	if (rdr_rules != NULL) {
4269		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4270		rdr_rules = NULL;
4271	}
4272	if (maptable != NULL) {
4273		KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4274		maptable = NULL;
4275	}
4276	if (nat_stats.ns_bucketlen[0] != NULL) {
4277		KFREES(nat_stats.ns_bucketlen[0],
4278		       sizeof(u_long *) * ipf_nattable_sz);
4279		nat_stats.ns_bucketlen[0] = NULL;
4280	}
4281	if (nat_stats.ns_bucketlen[1] != NULL) {
4282		KFREES(nat_stats.ns_bucketlen[1],
4283		       sizeof(u_long *) * ipf_nattable_sz);
4284		nat_stats.ns_bucketlen[1] = NULL;
4285	}
4286
4287	if (fr_nat_maxbucket_reset == 1)
4288		fr_nat_maxbucket = 0;
4289
4290	if (fr_nat_init == 1) {
4291		fr_nat_init = 0;
4292		fr_sttab_destroy(nat_tqb);
4293
4294		RW_DESTROY(&ipf_natfrag);
4295		RW_DESTROY(&ipf_nat);
4296
4297		MUTEX_DESTROY(&ipf_nat_new);
4298		MUTEX_DESTROY(&ipf_natio);
4299
4300		MUTEX_DESTROY(&nat_udptq.ifq_lock);
4301		MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4302		MUTEX_DESTROY(&nat_iptq.ifq_lock);
4303	}
4304}
4305
4306
4307/* ------------------------------------------------------------------------ */
4308/* Function:    fr_natexpire                                                */
4309/* Returns:     Nil                                                         */
4310/* Parameters:  Nil                                                         */
4311/*                                                                          */
4312/* Check all of the timeout queues for entries at the top which need to be  */
4313/* expired.                                                                 */
4314/* ------------------------------------------------------------------------ */
4315void fr_natexpire()
4316{
4317	ipftq_t *ifq, *ifqnext;
4318	ipftqent_t *tqe, *tqn;
4319#if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4320	int s;
4321#endif
4322	int i;
4323
4324	SPL_NET(s);
4325	WRITE_ENTER(&ipf_nat);
4326	for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4327		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4328			if (tqe->tqe_die > fr_ticks)
4329				break;
4330			tqn = tqe->tqe_next;
4331			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4332		}
4333	}
4334
4335	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4336		ifqnext = ifq->ifq_next;
4337
4338		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4339			if (tqe->tqe_die > fr_ticks)
4340				break;
4341			tqn = tqe->tqe_next;
4342			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4343		}
4344	}
4345
4346	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4347		ifqnext = ifq->ifq_next;
4348
4349		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4350		    (ifq->ifq_ref == 0)) {
4351			fr_freetimeoutqueue(ifq);
4352		}
4353	}
4354
4355	RWLOCK_EXIT(&ipf_nat);
4356	SPL_X(s);
4357}
4358
4359
4360/* ------------------------------------------------------------------------ */
4361/* Function:    fr_natsync                                                  */
4362/* Returns:     Nil                                                         */
4363/* Parameters:  ifp(I) - pointer to network interface                       */
4364/*                                                                          */
4365/* Walk through all of the currently active NAT sessions, looking for those */
4366/* which need to have their translated address updated.                     */
4367/* ------------------------------------------------------------------------ */
4368void fr_natsync(ifp)
4369void *ifp;
4370{
4371	u_32_t sum1, sum2, sumd;
4372	struct in_addr in;
4373	ipnat_t *n;
4374	nat_t *nat;
4375	void *ifp2;
4376#if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4377	int s;
4378#endif
4379
4380	if (fr_running <= 0)
4381		return;
4382
4383	/*
4384	 * Change IP addresses for NAT sessions for any protocol except TCP
4385	 * since it will break the TCP connection anyway.  The only rules
4386	 * which will get changed are those which are "map ... -> 0/32",
4387	 * where the rule specifies the address is taken from the interface.
4388	 */
4389	SPL_NET(s);
4390	WRITE_ENTER(&ipf_nat);
4391
4392	if (fr_running <= 0) {
4393		RWLOCK_EXIT(&ipf_nat);
4394		return;
4395	}
4396
4397	for (nat = nat_instances; nat; nat = nat->nat_next) {
4398		if ((nat->nat_flags & IPN_TCP) != 0)
4399			continue;
4400		n = nat->nat_ptr;
4401		if ((n == NULL) ||
4402		    (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4403			continue;
4404		if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4405		     (ifp == nat->nat_ifps[1]))) {
4406			nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4407			if (nat->nat_ifnames[1][0] != '\0') {
4408				nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4409							  4);
4410			} else
4411				nat->nat_ifps[1] = nat->nat_ifps[0];
4412			ifp2 = nat->nat_ifps[0];
4413			if (ifp2 == NULL)
4414				continue;
4415
4416			/*
4417			 * Change the map-to address to be the same as the
4418			 * new one.
4419			 */
4420			sum1 = nat->nat_outip.s_addr;
4421			if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4422				nat->nat_outip = in;
4423			sum2 = nat->nat_outip.s_addr;
4424
4425			if (sum1 == sum2)
4426				continue;
4427			/*
4428			 * Readjust the checksum adjustment to take into
4429			 * account the new IP#.
4430			 */
4431			CALC_SUMD(sum1, sum2, sumd);
4432			/* XXX - dont change for TCP when solaris does
4433			 * hardware checksumming.
4434			 */
4435			sumd += nat->nat_sumd[0];
4436			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4437			nat->nat_sumd[1] = nat->nat_sumd[0];
4438		}
4439	}
4440
4441	for (n = nat_list; (n != NULL); n = n->in_next) {
4442		if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4443			n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4444		if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4445			n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4446	}
4447	RWLOCK_EXIT(&ipf_nat);
4448	SPL_X(s);
4449}
4450
4451
4452/* ------------------------------------------------------------------------ */
4453/* Function:    nat_icmpquerytype4                                          */
4454/* Returns:     int - 1 == success, 0 == failure                            */
4455/* Parameters:  icmptype(I) - ICMP type number                              */
4456/*                                                                          */
4457/* Tests to see if the ICMP type number passed is a query/response type or  */
4458/* not.                                                                     */
4459/* ------------------------------------------------------------------------ */
4460static INLINE int nat_icmpquerytype4(icmptype)
4461int icmptype;
4462{
4463
4464	/*
4465	 * For the ICMP query NAT code, it is essential that both the query
4466	 * and the reply match on the NAT rule. Because the NAT structure
4467	 * does not keep track of the icmptype, and a single NAT structure
4468	 * is used for all icmp types with the same src, dest and id, we
4469	 * simply define the replies as queries as well. The funny thing is,
4470	 * altough it seems silly to call a reply a query, this is exactly
4471	 * as it is defined in the IPv4 specification
4472	 */
4473
4474	switch (icmptype)
4475	{
4476
4477	case ICMP_ECHOREPLY:
4478	case ICMP_ECHO:
4479	/* route aedvertisement/solliciation is currently unsupported: */
4480	/* it would require rewriting the ICMP data section            */
4481	case ICMP_TSTAMP:
4482	case ICMP_TSTAMPREPLY:
4483	case ICMP_IREQ:
4484	case ICMP_IREQREPLY:
4485	case ICMP_MASKREQ:
4486	case ICMP_MASKREPLY:
4487		return 1;
4488	default:
4489		return 0;
4490	}
4491}
4492
4493
4494/* ------------------------------------------------------------------------ */
4495/* Function:    nat_log                                                     */
4496/* Returns:     Nil                                                         */
4497/* Parameters:  nat(I)  - pointer to NAT structure                          */
4498/*              type(I) - type of log entry to create                       */
4499/*                                                                          */
4500/* Creates a NAT log entry.                                                 */
4501/* ------------------------------------------------------------------------ */
4502void nat_log(nat, type)
4503struct nat *nat;
4504u_int type;
4505{
4506#ifdef	IPFILTER_LOG
4507# ifndef LARGE_NAT
4508	struct ipnat *np;
4509	int rulen;
4510# endif
4511	struct natlog natl;
4512	void *items[1];
4513	size_t sizes[1];
4514	int types[1];
4515
4516	natl.nl_inip = nat->nat_inip;
4517	natl.nl_outip = nat->nat_outip;
4518	natl.nl_origip = nat->nat_oip;
4519	natl.nl_bytes[0] = nat->nat_bytes[0];
4520	natl.nl_bytes[1] = nat->nat_bytes[1];
4521	natl.nl_pkts[0] = nat->nat_pkts[0];
4522	natl.nl_pkts[1] = nat->nat_pkts[1];
4523	natl.nl_origport = nat->nat_oport;
4524	natl.nl_inport = nat->nat_inport;
4525	natl.nl_outport = nat->nat_outport;
4526	natl.nl_p = nat->nat_p;
4527	natl.nl_type = type;
4528	natl.nl_rule = -1;
4529# ifndef LARGE_NAT
4530	if (nat->nat_ptr != NULL) {
4531		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4532			if (np == nat->nat_ptr) {
4533				natl.nl_rule = rulen;
4534				break;
4535			}
4536	}
4537# endif
4538	items[0] = &natl;
4539	sizes[0] = sizeof(natl);
4540	types[0] = 0;
4541
4542	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4543#endif
4544}
4545
4546
4547#if defined(__OpenBSD__)
4548/* ------------------------------------------------------------------------ */
4549/* Function:    nat_ifdetach                                                */
4550/* Returns:     Nil                                                         */
4551/* Parameters:  ifp(I) - pointer to network interface                       */
4552/*                                                                          */
4553/* Compatibility interface for OpenBSD to trigger the correct updating of   */
4554/* interface references within IPFilter.                                    */
4555/* ------------------------------------------------------------------------ */
4556void nat_ifdetach(ifp)
4557void *ifp;
4558{
4559	frsync(ifp);
4560	return;
4561}
4562#endif
4563
4564
4565/* ------------------------------------------------------------------------ */
4566/* Function:    fr_natderef                                                 */
4567/* Returns:     Nil                                                         */
4568/* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4569/*                                                                          */
4570/* Decrement the reference counter for this NAT table entry and free it if  */
4571/* there are no more things using it.                                       */
4572/* ------------------------------------------------------------------------ */
4573void fr_natderef(natp)
4574nat_t **natp;
4575{
4576	nat_t *nat;
4577
4578	nat = *natp;
4579	*natp = NULL;
4580	WRITE_ENTER(&ipf_nat);
4581	nat->nat_ref--;
4582	if (nat->nat_ref == 0)
4583		nat_delete(nat, NL_EXPIRE);
4584	RWLOCK_EXIT(&ipf_nat);
4585}
4586
4587
4588/* ------------------------------------------------------------------------ */
4589/* Function:    fr_natclone                                                 */
4590/* Returns:     ipstate_t* - NULL == cloning failed,                        */
4591/*                           else pointer to new state structure            */
4592/* Parameters:  fin(I) - pointer to packet information                      */
4593/*              is(I)  - pointer to master state structure                  */
4594/* Write Lock:  ipf_nat                                                     */
4595/*                                                                          */
4596/* Create a "duplcate" state table entry from the master.                   */
4597/* ------------------------------------------------------------------------ */
4598static nat_t *fr_natclone(fin, nat)
4599fr_info_t *fin;
4600nat_t *nat;
4601{
4602	frentry_t *fr;
4603	nat_t *clone;
4604	ipnat_t *np;
4605
4606	KMALLOC(clone, nat_t *);
4607	if (clone == NULL)
4608		return NULL;
4609	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4610
4611	MUTEX_NUKE(&clone->nat_lock);
4612
4613	clone->nat_flags &= ~SI_CLONE;
4614	clone->nat_flags |= SI_CLONED;
4615
4616
4617	if (nat_insert(clone, fin->fin_rev) == -1) {
4618		KFREE(clone);
4619		return NULL;
4620	}
4621	np = clone->nat_ptr;
4622	if (np != NULL) {
4623		if (nat_logging)
4624			nat_log(clone, (u_int)np->in_redir);
4625		np->in_use++;
4626	}
4627	fr = clone->nat_fr;
4628	if (fr != NULL) {
4629		MUTEX_ENTER(&fr->fr_lock);
4630		fr->fr_ref++;
4631		MUTEX_EXIT(&fr->fr_lock);
4632	}
4633
4634
4635	/*
4636	 * Because the clone is created outside the normal loop of things and
4637	 * TCP has special needs in terms of state, initialise the timeout
4638	 * state of the new NAT from here.
4639	 */
4640	if (clone->nat_p == IPPROTO_TCP) {
4641		(void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb, \
4642				  clone->nat_flags);
4643	}
4644#ifdef	IPFILTER_SYNC
4645	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4646#endif
4647	if (nat_logging)
4648		nat_log(clone, NL_CLONE);
4649	return clone;
4650}
4651
4652
4653/* ------------------------------------------------------------------------ */
4654/* Function:   nat_wildok                                                   */
4655/* Returns:    int - 1 == packet's ports match wildcards                    */
4656/*                   0 == packet's ports don't match wildcards              */
4657/* Parameters: nat(I)   - NAT entry                                         */
4658/*             sport(I) - source port                                       */
4659/*             dport(I) - destination port                                  */
4660/*             flags(I) - wildcard flags                                    */
4661/*             dir(I)   - packet direction                                  */
4662/*                                                                          */
4663/* Use NAT entry and packet direction to determine which combination of     */
4664/* wildcard flags should be used.                                           */
4665/* ------------------------------------------------------------------------ */
4666static INLINE int nat_wildok(nat, sport, dport, flags, dir)
4667nat_t *nat;
4668int sport;
4669int dport;
4670int flags;
4671int dir;
4672{
4673	/*
4674	 * When called by       dir is set to
4675	 * nat_inlookup         NAT_INBOUND (0)
4676	 * nat_outlookup        NAT_OUTBOUND (1)
4677	 *
4678	 * We simply combine the packet's direction in dir with the original
4679	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4680	 * which combination of wildcard flags to allow.
4681	 */
4682
4683	switch ((dir << 1) | nat->nat_dir)
4684	{
4685	case 3: /* outbound packet / outbound entry */
4686		if (((nat->nat_inport == sport) ||
4687		    (flags & SI_W_SPORT)) &&
4688		    ((nat->nat_oport == dport) ||
4689		    (flags & SI_W_DPORT)))
4690			return 1;
4691		break;
4692	case 2: /* outbound packet / inbound entry */
4693		if (((nat->nat_outport == sport) ||
4694		    (flags & SI_W_DPORT)) &&
4695		    ((nat->nat_oport == dport) ||
4696		    (flags & SI_W_SPORT)))
4697			return 1;
4698		break;
4699	case 1: /* inbound packet / outbound entry */
4700		if (((nat->nat_oport == sport) ||
4701		    (flags & SI_W_DPORT)) &&
4702		    ((nat->nat_outport == dport) ||
4703		    (flags & SI_W_SPORT)))
4704			return 1;
4705		break;
4706	case 0: /* inbound packet / inbound entry */
4707		if (((nat->nat_oport == sport) ||
4708		    (flags & SI_W_SPORT)) &&
4709		    ((nat->nat_outport == dport) ||
4710		    (flags & SI_W_DPORT)))
4711			return 1;
4712		break;
4713	default:
4714		break;
4715	}
4716
4717	return(0);
4718}
4719
4720
4721/* ------------------------------------------------------------------------ */
4722/* Function:    nat_mssclamp                                                */
4723/* Returns:     Nil                                                         */
4724/* Parameters:  tcp(I)    - pointer to TCP header                           */
4725/*              maxmss(I) - value to clamp the TCP MSS to                   */
4726/*              fin(I)    - pointer to packet information                   */
4727/*              csump(I)  - pointer to TCP checksum                         */
4728/*                                                                          */
4729/* Check for MSS option and clamp it if necessary.  If found and changed,   */
4730/* then the TCP header checksum will be updated to reflect the change in    */
4731/* the MSS.                                                                 */
4732/* ------------------------------------------------------------------------ */
4733static void nat_mssclamp(tcp, maxmss, fin, csump)
4734tcphdr_t *tcp;
4735u_32_t maxmss;
4736fr_info_t *fin;
4737u_short *csump;
4738{
4739	u_char *cp, *ep, opt;
4740	int hlen, advance;
4741	u_32_t mss, sumd;
4742
4743	hlen = TCP_OFF(tcp) << 2;
4744	if (hlen > sizeof(*tcp)) {
4745		cp = (u_char *)tcp + sizeof(*tcp);
4746		ep = (u_char *)tcp + hlen;
4747
4748		while (cp < ep) {
4749			opt = cp[0];
4750			if (opt == TCPOPT_EOL)
4751				break;
4752			else if (opt == TCPOPT_NOP) {
4753				cp++;
4754				continue;
4755			}
4756
4757			if (cp + 1 >= ep)
4758				break;
4759			advance = cp[1];
4760			if ((cp + advance > ep) || (advance <= 0))
4761				break;
4762			switch (opt)
4763			{
4764			case TCPOPT_MAXSEG:
4765				if (advance != 4)
4766					break;
4767				mss = cp[2] * 256 + cp[3];
4768				if (mss > maxmss) {
4769					cp[2] = maxmss / 256;
4770					cp[3] = maxmss & 0xff;
4771					CALC_SUMD(mss, maxmss, sumd);
4772					fix_outcksum(fin, csump, sumd);
4773				}
4774				break;
4775			default:
4776				/* ignore unknown options */
4777				break;
4778			}
4779
4780			cp += advance;
4781		}
4782	}
4783}
4784
4785
4786/* ------------------------------------------------------------------------ */
4787/* Function:    fr_setnatqueue                                              */
4788/* Returns:     Nil                                                         */
4789/* Parameters:  nat(I)- pointer to NAT structure                            */
4790/*              rev(I) - forward(0) or reverse(1) direction                 */
4791/* Locks:       ipf_nat (read or write)                                     */
4792/*                                                                          */
4793/* Put the NAT entry on its default queue entry, using rev as a helped in   */
4794/* determining which queue it should be placed on.                          */
4795/* ------------------------------------------------------------------------ */
4796void fr_setnatqueue(nat, rev)
4797nat_t *nat;
4798int rev;
4799{
4800	ipftq_t *oifq, *nifq;
4801
4802	if (nat->nat_ptr != NULL)
4803		nifq = nat->nat_ptr->in_tqehead[rev];
4804	else
4805		nifq = NULL;
4806
4807	if (nifq == NULL) {
4808		switch (nat->nat_p)
4809		{
4810		case IPPROTO_UDP :
4811			nifq = &nat_udptq;
4812			break;
4813		case IPPROTO_ICMP :
4814			nifq = &nat_icmptq;
4815			break;
4816		case IPPROTO_TCP :
4817			nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
4818			break;
4819		default :
4820			nifq = &nat_iptq;
4821			break;
4822		}
4823	}
4824
4825	oifq = nat->nat_tqe.tqe_ifq;
4826	/*
4827	 * If it's currently on a timeout queue, move it from one queue to
4828	 * another, else put it on the end of the newly determined queue.
4829	 */
4830	if (oifq != NULL)
4831		fr_movequeue(&nat->nat_tqe, oifq, nifq);
4832	else
4833		fr_queueappend(&nat->nat_tqe, nifq, nat);
4834	return;
4835}
4836