1/*	$NetBSD: ip_nat.c,v 1.44 2011/11/27 10:53:07 mbalmer Exp $	*/
2
3/*
4 * Copyright (C) 1995-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 *
8 * Copyright 2008 Sun Microsystems, Inc.
9 */
10#if defined(KERNEL) || defined(_KERNEL)
11# undef KERNEL
12# undef _KERNEL
13# define        KERNEL	1
14# define        _KERNEL	1
15#endif
16#include <sys/errno.h>
17#include <sys/types.h>
18#include <sys/param.h>
19#include <sys/time.h>
20#include <sys/file.h>
21#if defined(_KERNEL) && defined(__NetBSD_Version__) && \
22    (__NetBSD_Version__ >= 399002000)
23# include <sys/kauth.h>
24#endif
25#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26    defined(_KERNEL)
27#if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
28#  include "opt_ipfilter_log.h"
29# else
30#  include "opt_ipfilter.h"
31# endif
32#endif
33#if !defined(_KERNEL)
34# include <stdio.h>
35# include <string.h>
36# include <stdlib.h>
37# define _KERNEL
38# ifdef __OpenBSD__
39struct file;
40# endif
41# include <sys/uio.h>
42# undef _KERNEL
43#endif
44#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
45# include <sys/filio.h>
46# include <sys/fcntl.h>
47#else
48# include <sys/ioctl.h>
49#endif
50#if !defined(AIX)
51# include <sys/fcntl.h>
52#endif
53#if !defined(linux)
54# include <sys/protosw.h>
55#endif
56#include <sys/socket.h>
57#if defined(_KERNEL)
58# include <sys/systm.h>
59# if !defined(__SVR4) && !defined(__svr4__)
60#  include <sys/mbuf.h>
61# endif
62#endif
63#if defined(__SVR4) || defined(__svr4__)
64# include <sys/filio.h>
65# include <sys/byteorder.h>
66# ifdef _KERNEL
67#  include <sys/dditypes.h>
68# endif
69# include <sys/stream.h>
70# include <sys/kmem.h>
71#endif
72#if __FreeBSD_version >= 300000
73# include <sys/queue.h>
74#endif
75#include <net/if.h>
76#if __FreeBSD_version >= 300000
77# include <net/if_var.h>
78# if defined(_KERNEL) && !defined(IPFILTER_LKM)
79#  include "opt_ipfilter.h"
80# endif
81#endif
82#ifdef sun
83# include <net/af.h>
84#endif
85#include <netinet/in.h>
86#include <netinet/in_systm.h>
87#include <netinet/ip.h>
88
89#ifdef RFC1825
90# include <vpn/md5.h>
91# include <vpn/ipsec.h>
92extern struct ifnet vpnif;
93#endif
94
95#if !defined(linux)
96# include <netinet/ip_var.h>
97#endif
98#include <netinet/tcp.h>
99#include <netinet/udp.h>
100#include <netinet/ip_icmp.h>
101#include "netinet/ip_compat.h"
102#include <netinet/tcpip.h>
103#include "netinet/ip_fil.h"
104#include "netinet/ip_nat.h"
105#include "netinet/ip_frag.h"
106#include "netinet/ip_state.h"
107#include "netinet/ip_proxy.h"
108#include "netinet/ipl.h"
109#ifdef	IPFILTER_SYNC
110#include "netinet/ip_sync.h"
111#endif
112#if (__FreeBSD_version >= 300000)
113# include <sys/malloc.h>
114#endif
115/* END OF INCLUDES */
116
117#undef	SOCKADDR_IN
118#define	SOCKADDR_IN	struct sockaddr_in
119
120#if !defined(lint)
121#if defined(__NetBSD__)
122#include <sys/cdefs.h>
123__KERNEL_RCSID(0, "$NetBSD: ip_nat.c,v 1.44 2011/11/27 10:53:07 mbalmer Exp $");
124#else
125static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
126static const char rcsid[] = "@(#)Id: ip_nat.c,v 2.195.2.130 2010/03/16 02:24:52 darrenr Exp";
127#endif
128#endif
129
130
131/* ======================================================================== */
132/* How the NAT is organised and works.                                      */
133/*                                                                          */
134/* Inside (interface y) NAT       Outside (interface x)                     */
135/* -------------------- -+- -------------------------------------           */
136/* Packet going          |   out, processsed by fr_checknatout() for x      */
137/* ------------>         |   ------------>                                  */
138/* src=10.1.1.1          |   src=192.1.1.1                                  */
139/*                       |                                                  */
140/*                       |   in, processed by fr_checknatin() for x         */
141/* <------------         |   <------------                                  */
142/* dst=10.1.1.1          |   dst=192.1.1.1                                  */
143/* -------------------- -+- -------------------------------------           */
144/* fr_checknatout() - changes ip_src and if required, sport                 */
145/*             - creates a new mapping, if required.                        */
146/* fr_checknatin()  - changes ip_dst and if required, dport                 */
147/*                                                                          */
148/* In the NAT table, internal source is recorded as "in" and externally     */
149/* seen as "out".                                                           */
150/* ======================================================================== */
151
152
153nat_t	**nat_table[2] = { NULL, NULL },
154	*nat_instances = NULL;
155ipnat_t	*nat_list = NULL;
156u_int	ipf_nattable_max = NAT_TABLE_MAX;
157u_int	ipf_nattable_sz = NAT_TABLE_SZ;
158u_int	ipf_natrules_sz = NAT_SIZE;
159u_int	ipf_rdrrules_sz = RDR_SIZE;
160u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
161u_int	fr_nat_maxbucket = 0,
162	fr_nat_maxbucket_reset = 1;
163u_32_t	nat_masks = 0;
164u_32_t	rdr_masks = 0;
165u_long	nat_last_force_flush = 0;
166ipnat_t	**nat_rules = NULL;
167ipnat_t	**rdr_rules = NULL;
168hostmap_t	**ipf_hm_maptable  = NULL;
169hostmap_t	*ipf_hm_maplist  = NULL;
170ipftq_t	nat_tqb[IPF_TCP_NSTATES];
171ipftq_t	nat_udptq;
172ipftq_t	nat_icmptq;
173ipftq_t	nat_iptq;
174ipftq_t	*nat_utqe = NULL;
175int	fr_nat_doflush = 0;
176#ifdef  IPFILTER_LOG
177int	nat_logging = 1;
178#else
179int	nat_logging = 0;
180#endif
181
182u_long	fr_defnatage = DEF_NAT_AGE,
183	fr_defnatipage = 120,		/* 60 seconds */
184	fr_defnaticmpage = 6;		/* 3 seconds */
185natstat_t nat_stats;
186int	fr_nat_lock = 0;
187int	fr_nat_init = 0;
188#if SOLARIS && !defined(_INET_IP_STACK_H)
189extern	int		pfil_delayed_copy;
190#endif
191
192static	int	nat_flush_entry(void *);
193static	int	nat_flushtable(void);
194static	int	nat_clearlist(void);
195static	void	nat_addnat(struct ipnat *);
196static	void	nat_addrdr(struct ipnat *);
197static	void	nat_delrdr(struct ipnat *);
198static	void	nat_delnat(struct ipnat *);
199static	int	fr_natgetent(void *, int);
200static	int	fr_natgetsz(void *, int);
201static	int	fr_natputent(void *, int);
202static	int	nat_extraflush(int);
203static	int	nat_gettable(char *);
204static	void	nat_tabmove(nat_t *);
205static	int	nat_match(fr_info_t *, ipnat_t *);
206static	INLINE	int
207nat_newmap(fr_info_t *, nat_t *, natinfo_t *);
208static	INLINE	int
209nat_newrdr(fr_info_t *, nat_t *, natinfo_t *);
210static	hostmap_t *nat_hostmap(ipnat_t *, struct in_addr,
211				    struct in_addr, struct in_addr, u_32_t);
212static	int	nat_icmpquerytype4(int);
213static	int	nat_siocaddnat(ipnat_t *, ipnat_t **, int);
214static	void	nat_siocdelnat(ipnat_t *, ipnat_t **, int);
215static	int	nat_finalise(fr_info_t *, nat_t *, natinfo_t *,
216				      tcphdr_t *, nat_t **, int);
217static	int	nat_resolverule(ipnat_t *);
218static	nat_t	*fr_natclone(fr_info_t *, nat_t *);
219static	void	nat_mssclamp(tcphdr_t *, u_32_t, fr_info_t *, u_short *);
220static	int	nat_wildok(nat_t *, int, int, int, int);
221static	int	nat_getnext(ipftoken_t *, ipfgeniter_t *, ipfobj_t *);
222static	int	nat_iterator(ipftoken_t *, ipfgeniter_t *, ipfobj_t *);
223
224
225/* ------------------------------------------------------------------------ */
226/* Function:    fr_natinit                                                  */
227/* Returns:     int - 0 == success, -1 == failure                           */
228/* Parameters:  Nil                                                         */
229/*                                                                          */
230/* Initialise all of the NAT locks, tables and other structures.            */
231/* ------------------------------------------------------------------------ */
232int
233fr_natinit(void)
234{
235	int i;
236
237	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
238	if (nat_table[0] != NULL)
239		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
240	else
241		return -1;
242
243	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
244	if (nat_table[1] != NULL)
245		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
246	else
247		return -2;
248
249	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
250	if (nat_rules != NULL)
251		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
252	else
253		return -3;
254
255	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
256	if (rdr_rules != NULL)
257		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
258	else
259		return -4;
260
261	KMALLOCS(ipf_hm_maptable, hostmap_t **, \
262		 sizeof(hostmap_t *) * ipf_hostmap_sz);
263	if (ipf_hm_maptable != NULL)
264		bzero((char *)ipf_hm_maptable,
265		      sizeof(hostmap_t *) * ipf_hostmap_sz);
266	else
267		return -5;
268	ipf_hm_maplist = NULL;
269
270	KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
271		 ipf_nattable_sz * sizeof(u_long));
272	if (nat_stats.ns_bucketlen[0] == NULL)
273		return -6;
274	bzero((char *)nat_stats.ns_bucketlen[0],
275	      ipf_nattable_sz * sizeof(u_long));
276
277	KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
278		 ipf_nattable_sz * sizeof(u_long));
279	if (nat_stats.ns_bucketlen[1] == NULL)
280		return -7;
281
282	bzero((char *)nat_stats.ns_bucketlen[1],
283	      ipf_nattable_sz * sizeof(u_long));
284
285	if (fr_nat_maxbucket == 0) {
286		for (i = ipf_nattable_sz; i > 0; i >>= 1)
287			fr_nat_maxbucket++;
288		fr_nat_maxbucket *= 2;
289	}
290
291	fr_sttab_init(nat_tqb);
292	/*
293	 * Increase this because we may have "keep state" following this too
294	 * and packet storms can occur if this is removed too quickly.
295	 */
296	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
297	nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
298	nat_udptq.ifq_ttl = fr_defnatage;
299	nat_udptq.ifq_ref = 1;
300	nat_udptq.ifq_head = NULL;
301	nat_udptq.ifq_tail = &nat_udptq.ifq_head;
302	MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
303	nat_udptq.ifq_next = &nat_icmptq;
304	nat_icmptq.ifq_ttl = fr_defnaticmpage;
305	nat_icmptq.ifq_ref = 1;
306	nat_icmptq.ifq_head = NULL;
307	nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
308	MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
309	nat_icmptq.ifq_next = &nat_iptq;
310	nat_iptq.ifq_ttl = fr_defnatipage;
311	nat_iptq.ifq_ref = 1;
312	nat_iptq.ifq_head = NULL;
313	nat_iptq.ifq_tail = &nat_iptq.ifq_head;
314	MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
315	nat_iptq.ifq_next = NULL;
316
317	for (i = 0; i < IPF_TCP_NSTATES; i++) {
318		if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
319			nat_tqb[i].ifq_ttl = fr_defnaticmpage;
320#ifdef LARGE_NAT
321		else if (nat_tqb[i].ifq_ttl > fr_defnatage)
322			nat_tqb[i].ifq_ttl = fr_defnatage;
323#endif
324	}
325
326	/*
327	 * Increase this because we may have "keep state" following
328	 * this too and packet storms can occur if this is removed
329	 * too quickly.
330	 */
331	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
332
333	RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
334	RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
335	MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
336	MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
337
338	fr_nat_init = 1;
339
340	return 0;
341}
342
343
344/* ------------------------------------------------------------------------ */
345/* Function:    nat_addrdr                                                  */
346/* Returns:     Nil                                                         */
347/* Parameters:  n(I) - pointer to NAT rule to add                           */
348/*                                                                          */
349/* Adds a redirect rule to the hash table of redirect rules and the list of */
350/* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
351/* use by redirect rules.                                                   */
352/* ------------------------------------------------------------------------ */
353static void
354nat_addrdr(ipnat_t *n)
355{
356	ipnat_t **np;
357	u_32_t j;
358	u_int hv;
359	int k;
360
361	k = count4bits(n->in_outmsk);
362	if ((k >= 0) && (k != 32))
363		rdr_masks |= 1 << k;
364	j = (n->in_outip & n->in_outmsk);
365	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
366	np = rdr_rules + hv;
367	while (*np != NULL)
368		np = &(*np)->in_rnext;
369	n->in_rnext = NULL;
370	n->in_prnext = np;
371	n->in_hv = hv;
372	*np = n;
373}
374
375
376/* ------------------------------------------------------------------------ */
377/* Function:    nat_addnat                                                  */
378/* Returns:     Nil                                                         */
379/* Parameters:  n(I) - pointer to NAT rule to add                           */
380/*                                                                          */
381/* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
382/* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
383/* redirect rules.                                                          */
384/* ------------------------------------------------------------------------ */
385static void
386nat_addnat(ipnat_t *n)
387{
388	ipnat_t **np;
389	u_32_t j;
390	u_int hv;
391	int k;
392
393	k = count4bits(n->in_inmsk);
394	if ((k >= 0) && (k != 32))
395		nat_masks |= 1 << k;
396	j = (n->in_inip & n->in_inmsk);
397	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
398	np = nat_rules + hv;
399	while (*np != NULL)
400		np = &(*np)->in_mnext;
401	n->in_mnext = NULL;
402	n->in_pmnext = np;
403	n->in_hv = hv;
404	*np = n;
405}
406
407
408/* ------------------------------------------------------------------------ */
409/* Function:    nat_delrdr                                                  */
410/* Returns:     Nil                                                         */
411/* Parameters:  n(I) - pointer to NAT rule to delete                        */
412/*                                                                          */
413/* Removes a redirect rule from the hash table of redirect rules.           */
414/* ------------------------------------------------------------------------ */
415static void
416nat_delrdr(ipnat_t *n)
417{
418	if (n->in_rnext)
419		n->in_rnext->in_prnext = n->in_prnext;
420	*n->in_prnext = n->in_rnext;
421}
422
423
424/* ------------------------------------------------------------------------ */
425/* Function:    nat_delnat                                                  */
426/* Returns:     Nil                                                         */
427/* Parameters:  n(I) - pointer to NAT rule to delete                        */
428/*                                                                          */
429/* Removes a NAT map rule from the hash table of NAT map rules.             */
430/* ------------------------------------------------------------------------ */
431static void
432nat_delnat(ipnat_t *n)
433{
434	if (n->in_mnext != NULL)
435		n->in_mnext->in_pmnext = n->in_pmnext;
436	*n->in_pmnext = n->in_mnext;
437}
438
439
440/* ------------------------------------------------------------------------ */
441/* Function:    nat_hostmap                                                 */
442/* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
443/*                                else a pointer to the hostmapping to use  */
444/* Parameters:  np(I)   - pointer to NAT rule                               */
445/*              real(I) - real IP address                                   */
446/*              map(I)  - mapped IP address                                 */
447/*              port(I) - destination port number                           */
448/* Write Locks: ipf_nat                                                     */
449/*                                                                          */
450/* Check if an ip address has already been allocated for a given mapping    */
451/* that is not doing port based translation.  If is not yet allocated, then */
452/* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
453/* ------------------------------------------------------------------------ */
454static struct hostmap *
455nat_hostmap(ipnat_t *np, struct in_addr src, struct in_addr dst, struct in_addr map, u_32_t port)
456{
457	hostmap_t *hm;
458	u_int hv;
459
460	hv = (src.s_addr ^ dst.s_addr);
461	hv += src.s_addr;
462	hv += dst.s_addr;
463	hv %= HOSTMAP_SIZE;
464	for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_hnext)
465		if ((hm->hm_srcip.s_addr == src.s_addr) &&
466		    (hm->hm_dstip.s_addr == dst.s_addr) &&
467		    ((np == NULL) || (np == hm->hm_ipnat)) &&
468		    ((port == 0) || (port == hm->hm_port))) {
469			hm->hm_ref++;
470			return hm;
471		}
472
473	if (np == NULL)
474		return NULL;
475
476	KMALLOC(hm, hostmap_t *);
477	if (hm) {
478		hm->hm_next = ipf_hm_maplist;
479		hm->hm_pnext = &ipf_hm_maplist;
480		if (ipf_hm_maplist != NULL)
481			ipf_hm_maplist->hm_pnext = &hm->hm_next;
482		ipf_hm_maplist = hm;
483		hm->hm_hnext = ipf_hm_maptable[hv];
484		hm->hm_phnext = ipf_hm_maptable + hv;
485		if (ipf_hm_maptable[hv] != NULL)
486			ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
487		ipf_hm_maptable[hv] = hm;
488		hm->hm_ipnat = np;
489		hm->hm_srcip = src;
490		hm->hm_dstip = dst;
491		hm->hm_mapip = map;
492		hm->hm_ref = 1;
493		hm->hm_port = port;
494	}
495	return hm;
496}
497
498
499/* ------------------------------------------------------------------------ */
500/* Function:    fr_hostmapdel                                               */
501/* Returns:     Nil                                                         */
502/* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
503/* Write Locks: ipf_nat                                                     */
504/*                                                                          */
505/* Decrement the references to this hostmap structure by one.  If this      */
506/* reaches zero then remove it and free it.                                 */
507/* ------------------------------------------------------------------------ */
508void
509fr_hostmapdel(struct hostmap **hmp)
510{
511	struct hostmap *hm;
512
513	hm = *hmp;
514	*hmp = NULL;
515
516	hm->hm_ref--;
517	if (hm->hm_ref == 0) {
518		if (hm->hm_hnext)
519			hm->hm_hnext->hm_phnext = hm->hm_phnext;
520		*hm->hm_phnext = hm->hm_hnext;
521		if (hm->hm_next)
522			hm->hm_next->hm_pnext = hm->hm_pnext;
523		*hm->hm_pnext = hm->hm_next;
524		KFREE(hm);
525	}
526}
527
528
529/* ------------------------------------------------------------------------ */
530/* Function:    fix_outcksum                                                */
531/* Returns:     Nil                                                         */
532/* Parameters:  fin(I) - pointer to packet information                      */
533/*              sp(I)  - location of 16bit checksum to update               */
534/*              n((I)  - amount to adjust checksum by                       */
535/*                                                                          */
536/* Adjusts the 16bit checksum by "n" for packets going out.                 */
537/* ------------------------------------------------------------------------ */
538void
539fix_outcksum(fr_info_t *fin, u_short *sp, u_32_t n)
540{
541	u_short sumshort;
542	u_32_t sum1;
543
544	if (n == 0)
545		return;
546
547	if (n & NAT_HW_CKSUM) {
548		n &= 0xffff;
549		n += fin->fin_dlen;
550		n = (n & 0xffff) + (n >> 16);
551		*sp = n & 0xffff;
552		return;
553	}
554	sum1 = (~ntohs(*sp)) & 0xffff;
555	sum1 += (n);
556	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
557	/* Again */
558	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
559	sumshort = ~(u_short)sum1;
560	*(sp) = htons(sumshort);
561}
562
563
564/* ------------------------------------------------------------------------ */
565/* Function:    fix_incksum                                                 */
566/* Returns:     Nil                                                         */
567/* Parameters:  fin(I) - pointer to packet information                      */
568/*              sp(I)  - location of 16bit checksum to update               */
569/*              n((I)  - amount to adjust checksum by                       */
570/*                                                                          */
571/* Adjusts the 16bit checksum by "n" for packets going in.                  */
572/* ------------------------------------------------------------------------ */
573void
574fix_incksum(fr_info_t *fin, u_short *sp, u_32_t n)
575{
576	u_short sumshort;
577	u_32_t sum1;
578
579	if (n == 0)
580		return;
581
582	if (n & NAT_HW_CKSUM) {
583		n &= 0xffff;
584		n += fin->fin_dlen;
585		n = (n & 0xffff) + (n >> 16);
586		*sp = n & 0xffff;
587		return;
588	}
589	sum1 = (~ntohs(*sp)) & 0xffff;
590	sum1 += ~(n) & 0xffff;
591	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
592	/* Again */
593	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
594	sumshort = ~(u_short)sum1;
595	*(sp) = htons(sumshort);
596}
597
598
599/* ------------------------------------------------------------------------ */
600/* Function:    fix_datacksum                                               */
601/* Returns:     Nil                                                         */
602/* Parameters:  sp(I)  - location of 16bit checksum to update               */
603/*              n((I)  - amount to adjust checksum by                       */
604/*                                                                          */
605/* Fix_datacksum is used *only* for the adjustments of checksums in the     */
606/* data section of an IP packet.                                            */
607/*                                                                          */
608/* The only situation in which you need to do this is when NAT'ing an       */
609/* ICMP error message. Such a message, contains in its body the IP header   */
610/* of the original IP packet, that causes the error.                        */
611/*                                                                          */
612/* You can't use fix_incksum or fix_outcksum in that case, because for the  */
613/* kernel the data section of the ICMP error is just data, and no special   */
614/* processing like hardware cksum or ntohs processing have been done by the */
615/* kernel on the data section.                                              */
616/* ------------------------------------------------------------------------ */
617void
618fix_datacksum(u_short *sp, u_32_t n)
619{
620	u_short sumshort;
621	u_32_t sum1;
622
623	if (n == 0)
624		return;
625
626	sum1 = (~ntohs(*sp)) & 0xffff;
627	sum1 += (n);
628	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629	/* Again */
630	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631	sumshort = ~(u_short)sum1;
632	*(sp) = htons(sumshort);
633}
634
635
636/* ------------------------------------------------------------------------ */
637/* Function:    fr_nat_ioctl                                                */
638/* Returns:     int - 0 == success, != 0 == failure                         */
639/* Parameters:  data(I) - pointer to ioctl data                             */
640/*              cmd(I)  - ioctl command integer                             */
641/*              mode(I) - file mode bits used with open                     */
642/*                                                                          */
643/* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644/* ------------------------------------------------------------------------ */
645int
646fr_nat_ioctl(void * data, ioctlcmd_t cmd, int mode, int uid, void *ctx)
647{
648	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
649	int error = 0, ret, arg, getlock;
650	ipnat_t natd;
651	SPL_INT(s);
652
653#if defined(BSD) && (BSD >= 199306) && defined(_KERNEL)
654# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
655	if ((mode & FWRITE) &&
656	     kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
657				     KAUTH_REQ_NETWORK_FIREWALL_FW,
658				     NULL, NULL, NULL)) {
659		return EPERM;
660	}
661# else
662#  if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
663        if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) {
664#  else
665        if ((securelevel >= 3) && (mode & FWRITE)) {
666#  endif
667                return EPERM;
668	}
669# endif
670#endif
671
672#if defined(__osf__) && defined(_KERNEL)
673	getlock = 0;
674#else
675	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
676#endif
677
678	nat = NULL;     /* XXX gcc -Wuninitialized */
679	if (cmd == (ioctlcmd_t)SIOCADNAT) {
680		KMALLOC(nt, ipnat_t *);
681	} else {
682		nt = NULL;
683	}
684
685	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
686		if (mode & NAT_SYSSPACE) {
687			bcopy(data, (char *)&natd, sizeof(natd));
688			error = 0;
689		} else {
690			error = fr_inobj(data, NULL, &natd, IPFOBJ_IPNAT);
691		}
692	}
693
694	if (error != 0)
695		goto done;
696
697	/*
698	 * For add/delete, look to see if the NAT entry is already present
699	 */
700	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
701		nat = &natd;
702		if (nat->in_v == 0)	/* For backward compat. */
703			nat->in_v = 4;
704		nat->in_flags &= IPN_USERFLAGS;
705		if ((nat->in_redir & NAT_MAPBLK) == 0) {
706			if ((nat->in_flags & IPN_SPLIT) == 0)
707				nat->in_inip &= nat->in_inmsk;
708			if ((nat->in_flags & IPN_IPRANGE) == 0)
709				nat->in_outip &= nat->in_outmsk;
710		}
711		MUTEX_ENTER(&ipf_natio);
712		for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
713			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
714					IPN_CMPSIZ) == 0) {
715				if (nat->in_redir == NAT_REDIRECT &&
716				    nat->in_pnext != n->in_pnext)
717					continue;
718				break;
719			}
720	}
721
722	switch (cmd)
723	{
724#ifdef  IPFILTER_LOG
725	case SIOCIPFFB :
726	{
727		int tmp;
728
729		if (!(mode & FWRITE))
730			error = EPERM;
731		else {
732			tmp = ipflog_clear(IPL_LOGNAT);
733			error = BCOPYOUT((char *)&tmp, (char *)data,
734					 sizeof(tmp));
735			if (error != 0)
736				error = EFAULT;
737		}
738		break;
739	}
740
741	case SIOCSETLG :
742		if (!(mode & FWRITE))
743			error = EPERM;
744		else {
745			error = BCOPYIN((char *)data, (char *)&nat_logging,
746					sizeof(nat_logging));
747			if (error != 0)
748				error = EFAULT;
749		}
750		break;
751
752	case SIOCGETLG :
753		error = BCOPYOUT((char *)&nat_logging, (char *)data,
754				 sizeof(nat_logging));
755		if (error != 0)
756			error = EFAULT;
757		break;
758
759	case FIONREAD :
760		arg = iplused[IPL_LOGNAT];
761		error = BCOPYOUT(&arg, data, sizeof(arg));
762		if (error != 0)
763			error = EFAULT;
764		break;
765#endif
766	case SIOCADNAT :
767		if (!(mode & FWRITE)) {
768			error = EPERM;
769		} else if (n != NULL) {
770			error = EEXIST;
771		} else if (nt == NULL) {
772			error = ENOMEM;
773		}
774		if (error != 0) {
775			MUTEX_EXIT(&ipf_natio);
776			break;
777		}
778		bcopy((char *)nat, (char *)nt, sizeof(*n));
779		error = nat_siocaddnat(nt, np, getlock);
780		MUTEX_EXIT(&ipf_natio);
781		if (error == 0)
782			nt = NULL;
783		break;
784
785	case SIOCRMNAT :
786		if (!(mode & FWRITE)) {
787			error = EPERM;
788			n = NULL;
789		} else if (n == NULL) {
790			error = ESRCH;
791		}
792
793		if (error != 0) {
794			MUTEX_EXIT(&ipf_natio);
795			break;
796		}
797		nat_siocdelnat(n, np, getlock);
798
799		MUTEX_EXIT(&ipf_natio);
800		n = NULL;
801		break;
802
803	case SIOCGNATS :
804		nat_stats.ns_table[0] = nat_table[0];
805		nat_stats.ns_table[1] = nat_table[1];
806		nat_stats.ns_list = nat_list;
807		nat_stats.ns_maptable = ipf_hm_maptable;
808		nat_stats.ns_maplist = ipf_hm_maplist;
809		nat_stats.ns_nattab_sz = ipf_nattable_sz;
810		nat_stats.ns_nattab_max = ipf_nattable_max;
811		nat_stats.ns_rultab_sz = ipf_natrules_sz;
812		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
813		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
814		nat_stats.ns_instances = nat_instances;
815		nat_stats.ns_apslist = ap_sess_list;
816		nat_stats.ns_ticks = fr_ticks;
817		error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
818		break;
819
820	case SIOCGNATL :
821	    {
822		natlookup_t nl;
823
824		error = fr_inobj(data, NULL, &nl, IPFOBJ_NATLOOKUP);
825		if (error == 0) {
826			void *ptr;
827
828			if (getlock) {
829				READ_ENTER(&ipf_nat);
830			}
831			ptr = nat_lookupredir(&nl);
832			if (getlock) {
833				RWLOCK_EXIT(&ipf_nat);
834			}
835			if (ptr != NULL) {
836				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
837			} else {
838				error = ESRCH;
839			}
840		}
841		break;
842	    }
843
844	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
845		if (!(mode & FWRITE)) {
846			error = EPERM;
847			break;
848		}
849		if (getlock) {
850			WRITE_ENTER(&ipf_nat);
851		}
852
853		error = BCOPYIN(data, &arg, sizeof(arg));
854		if (error != 0)
855			error = EFAULT;
856		else {
857			if (arg == 0)
858				ret = nat_flushtable();
859			else if (arg == 1)
860				ret = nat_clearlist();
861			else
862				ret = nat_extraflush(arg);
863		}
864
865		if (getlock) {
866			RWLOCK_EXIT(&ipf_nat);
867		}
868		if (error == 0) {
869			error = BCOPYOUT(&ret, data, sizeof(ret));
870		}
871		break;
872
873	case SIOCPROXY :
874		error = appr_ioctl(data, cmd, mode, ctx);
875		break;
876
877	case SIOCSTLCK :
878		if (!(mode & FWRITE)) {
879			error = EPERM;
880		} else {
881			error = fr_lock(data, &fr_nat_lock);
882		}
883		break;
884
885	case SIOCSTPUT :
886		if ((mode & FWRITE) != 0) {
887			error = fr_natputent(data, getlock);
888		} else {
889			error = EACCES;
890		}
891		break;
892
893	case SIOCSTGSZ :
894		if (fr_nat_lock) {
895			error = fr_natgetsz(data, getlock);
896		} else
897			error = EACCES;
898		break;
899
900	case SIOCSTGET :
901		if (fr_nat_lock) {
902			error = fr_natgetent(data, getlock);
903		} else
904			error = EACCES;
905		break;
906
907	case SIOCGENITER :
908	    {
909		ipfgeniter_t iter;
910		ipftoken_t *token;
911		ipfobj_t obj;
912
913		SPL_SCHED(s);
914		error = fr_inobj(data, &obj, &iter, IPFOBJ_GENITER);
915		if (error == 0) {
916			token = ipf_findtoken(iter.igi_type, uid, ctx);
917			if (token != NULL) {
918				error = nat_iterator(token, &iter, &obj);
919				WRITE_ENTER(&ipf_tokens);
920				if (token->ipt_data == NULL)
921					ipf_freetoken(token);
922				else
923					ipf_dereftoken(token);
924				RWLOCK_EXIT(&ipf_tokens);
925			}
926		}
927		SPL_X(s);
928		break;
929	    }
930
931	case SIOCIPFDELTOK :
932		error = BCOPYIN((void *)data, (void *)&arg, sizeof(arg));
933		if (error == 0) {
934			SPL_SCHED(s);
935			error = ipf_deltoken(arg, uid, ctx);
936			SPL_X(s);
937		} else {
938			error = EFAULT;
939		}
940		break;
941
942	case SIOCGTQTAB :
943		error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
944		break;
945
946	case SIOCGTABL :
947		error = nat_gettable(data);
948		break;
949
950	default :
951		error = EINVAL;
952		break;
953	}
954done:
955	if (nt != NULL)
956		KFREE(nt);
957	return error;
958}
959
960
961/* ------------------------------------------------------------------------ */
962/* Function:    nat_siocaddnat                                              */
963/* Returns:     int - 0 == success, != 0 == failure                         */
964/* Parameters:  n(I)       - pointer to new NAT rule                        */
965/*              np(I)      - pointer to where to insert new NAT rule        */
966/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
967/* Mutex Locks: ipf_natio                                                   */
968/*                                                                          */
969/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
970/* from information passed to the kernel, then add it  to the appropriate   */
971/* NAT rule table(s).                                                       */
972/* ------------------------------------------------------------------------ */
973static int
974nat_siocaddnat(ipnat_t *n, ipnat_t **np, int getlock)
975{
976	int error = 0, i, j;
977
978	if (nat_resolverule(n) != 0)
979		return ENOENT;
980
981	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
982		return EINVAL;
983
984	n->in_use = 0;
985	if (n->in_redir & NAT_MAPBLK)
986		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
987	else if (n->in_flags & IPN_AUTOPORTMAP)
988		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
989	else if (n->in_flags & IPN_IPRANGE)
990		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
991	else if (n->in_flags & IPN_SPLIT)
992		n->in_space = 2;
993	else if (n->in_outmsk != 0)
994		n->in_space = ~ntohl(n->in_outmsk);
995	else
996		n->in_space = 1;
997
998	/*
999	 * Calculate the number of valid IP addresses in the output
1000	 * mapping range.  In all cases, the range is inclusive of
1001	 * the start and ending IP addresses.
1002	 * If to a CIDR address, lose 2: broadcast + network address
1003	 *                               (so subtract 1)
1004	 * If to a range, add one.
1005	 * If to a single IP address, set to 1.
1006	 */
1007	if (n->in_space) {
1008		if ((n->in_flags & IPN_IPRANGE) != 0)
1009			n->in_space += 1;
1010		else
1011			n->in_space -= 1;
1012	} else
1013		n->in_space = 1;
1014
1015	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1016	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1017		n->in_nip = ntohl(n->in_outip) + 1;
1018	else if ((n->in_flags & IPN_SPLIT) &&
1019		 (n->in_redir & NAT_REDIRECT))
1020		n->in_nip = ntohl(n->in_inip);
1021	else
1022		n->in_nip = ntohl(n->in_outip);
1023	if (n->in_redir & NAT_MAP) {
1024		n->in_pnext = ntohs(n->in_pmin);
1025		/*
1026		 * Multiply by the number of ports made available.
1027		 */
1028		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1029			n->in_space *= (ntohs(n->in_pmax) -
1030					ntohs(n->in_pmin) + 1);
1031			/*
1032			 * Because two different sources can map to
1033			 * different destinations but use the same
1034			 * local IP#/port #.
1035			 * If the result is smaller than in_space, then
1036			 * we may have wrapped around 32bits.
1037			 */
1038			i = n->in_inmsk;
1039			if ((i != 0) && (i != 0xffffffff)) {
1040				j = n->in_space * (~ntohl(i) + 1);
1041				if (j >= n->in_space)
1042					n->in_space = j;
1043				else
1044					n->in_space = 0xffffffff;
1045			}
1046		}
1047		/*
1048		 * If no protocol is specified, multiple by 256 to allow for
1049		 * at least one IP:IP mapping per protocol.
1050		 */
1051		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1052				j = n->in_space * 256;
1053				if (j >= n->in_space)
1054					n->in_space = j;
1055				else
1056					n->in_space = 0xffffffff;
1057		}
1058	}
1059
1060	/* Otherwise, these fields are preset */
1061
1062	if (getlock) {
1063		WRITE_ENTER(&ipf_nat);
1064	}
1065	n->in_next = NULL;
1066	*np = n;
1067
1068	if (n->in_age[0] != 0)
1069		n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1070
1071	if (n->in_age[1] != 0)
1072		n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1073
1074	if (n->in_redir & NAT_REDIRECT) {
1075		n->in_flags &= ~IPN_NOTDST;
1076		nat_addrdr(n);
1077	}
1078	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1079		n->in_flags &= ~IPN_NOTSRC;
1080		nat_addnat(n);
1081	}
1082	MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1083
1084	n = NULL;
1085	nat_stats.ns_rules++;
1086#if SOLARIS && !defined(_INET_IP_STACK_H)
1087	pfil_delayed_copy = 0;
1088#endif
1089	if (getlock) {
1090		RWLOCK_EXIT(&ipf_nat);			/* WRITE */
1091	}
1092
1093	return error;
1094}
1095
1096
1097/* ------------------------------------------------------------------------ */
1098/* Function:    nat_resolvrule                                              */
1099/* Returns:     Nil                                                         */
1100/* Parameters:  n(I)  - pointer to NAT rule                                 */
1101/*                                                                          */
1102/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1103/* from information passed to the kernel, then add it  to the appropriate   */
1104/* NAT rule table(s).                                                       */
1105/* ------------------------------------------------------------------------ */
1106static int
1107nat_resolverule(ipnat_t *n)
1108{
1109	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1110	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1111
1112	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1113	if (n->in_ifnames[1][0] == '\0') {
1114		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1115		n->in_ifps[1] = n->in_ifps[0];
1116	} else {
1117		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1118	}
1119
1120	if (n->in_plabel[0] != '\0') {
1121		n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1122		if (n->in_apr == NULL)
1123			return -1;
1124	}
1125	return 0;
1126}
1127
1128
1129/* ------------------------------------------------------------------------ */
1130/* Function:    nat_siocdelnat                                              */
1131/* Returns:     int - 0 == success, != 0 == failure                         */
1132/* Parameters:  n(I)       - pointer to new NAT rule                        */
1133/*              np(I)      - pointer to where to insert new NAT rule        */
1134/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1135/* Mutex Locks: ipf_natio                                                   */
1136/*                                                                          */
1137/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1138/* from information passed to the kernel, then add it  to the appropriate   */
1139/* NAT rule table(s).                                                       */
1140/* ------------------------------------------------------------------------ */
1141static void
1142nat_siocdelnat(ipnat_t *n, ipnat_t **np, int getlock)
1143{
1144	if (getlock) {
1145		WRITE_ENTER(&ipf_nat);
1146	}
1147	if (n->in_redir & NAT_REDIRECT)
1148		nat_delrdr(n);
1149	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1150		nat_delnat(n);
1151	if (nat_list == NULL) {
1152		nat_masks = 0;
1153		rdr_masks = 0;
1154	}
1155
1156	if (n->in_tqehead[0] != NULL) {
1157		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1158			fr_freetimeoutqueue(n->in_tqehead[1]);
1159		}
1160	}
1161
1162	if (n->in_tqehead[1] != NULL) {
1163		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1164			fr_freetimeoutqueue(n->in_tqehead[1]);
1165		}
1166	}
1167
1168	*np = n->in_next;
1169
1170	if (n->in_use == 0) {
1171		if (n->in_apr)
1172			appr_free(n->in_apr);
1173		MUTEX_DESTROY(&n->in_lock);
1174		KFREE(n);
1175		nat_stats.ns_rules--;
1176#if SOLARIS && !defined(_INET_IP_STACK_H)
1177		if (nat_stats.ns_rules == 0)
1178			pfil_delayed_copy = 1;
1179#endif
1180	} else {
1181		n->in_flags |= IPN_DELETE;
1182		n->in_next = NULL;
1183	}
1184	if (getlock) {
1185		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1186	}
1187}
1188
1189
1190/* ------------------------------------------------------------------------ */
1191/* Function:    fr_natgetsz                                                 */
1192/* Returns:     int - 0 == success, != 0 is the error value.                */
1193/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1194/*                        get the size of.                                  */
1195/*                                                                          */
1196/* Handle SIOCSTGSZ.                                                        */
1197/* Return the size of the nat list entry to be copied back to user space.   */
1198/* The size of the entry is stored in the ng_sz field and the enture natget */
1199/* structure is copied back to the user.                                    */
1200/* ------------------------------------------------------------------------ */
1201static int
1202fr_natgetsz(void * data, int getlock)
1203{
1204	ap_session_t *aps;
1205	nat_t *nat, *n;
1206	natget_t ng;
1207
1208	if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1209		return EFAULT;
1210
1211	if (getlock) {
1212		READ_ENTER(&ipf_nat);
1213	}
1214
1215	nat = ng.ng_ptr;
1216	if (!nat) {
1217		nat = nat_instances;
1218		ng.ng_sz = 0;
1219		/*
1220		 * Empty list so the size returned is 0.  Simple.
1221		 */
1222		if (nat == NULL) {
1223			if (getlock) {
1224				RWLOCK_EXIT(&ipf_nat);
1225			}
1226			if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1227				return EFAULT;
1228			return 0;
1229		}
1230	} else {
1231		/*
1232		 * Make sure the pointer we're copying from exists in the
1233		 * current list of entries.  Security precaution to prevent
1234		 * copying of random kernel data.
1235		 */
1236		for (n = nat_instances; n; n = n->nat_next)
1237			if (n == nat)
1238				break;
1239		if (n == NULL) {
1240			if (getlock) {
1241				RWLOCK_EXIT(&ipf_nat);
1242			}
1243			return ESRCH;
1244		}
1245	}
1246
1247	/*
1248	 * Incluse any space required for proxy data structures.
1249	 */
1250	ng.ng_sz = sizeof(nat_save_t);
1251	aps = nat->nat_aps;
1252	if (aps != NULL) {
1253		ng.ng_sz += sizeof(ap_session_t) - 4;
1254		if (aps->aps_data != 0)
1255			ng.ng_sz += aps->aps_psiz;
1256	}
1257	if (getlock) {
1258		RWLOCK_EXIT(&ipf_nat);
1259	}
1260
1261	if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1262		return EFAULT;
1263	return 0;
1264}
1265
1266
1267/* ------------------------------------------------------------------------ */
1268/* Function:    fr_natgetent                                                */
1269/* Returns:     int - 0 == success, != 0 is the error value.                */
1270/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1271/*                        to NAT structure to copy out.                     */
1272/*                                                                          */
1273/* Handle SIOCSTGET.                                                        */
1274/* Copies out NAT entry to user space.  Any additional data held for a      */
1275/* proxy is also copied, as to is the NAT rule which was responsible for it */
1276/* ------------------------------------------------------------------------ */
1277static int
1278fr_natgetent(void * data, int getlock)
1279{
1280	int error, outsize;
1281	ap_session_t *aps;
1282	nat_save_t *ipn, ipns;
1283	nat_t *n, *nat;
1284
1285	error = fr_inobj(data, NULL, &ipns, IPFOBJ_NATSAVE);
1286	if (error != 0)
1287		return error;
1288
1289	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1290		return EINVAL;
1291
1292	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1293	if (ipn == NULL)
1294		return ENOMEM;
1295
1296	if (getlock) {
1297		READ_ENTER(&ipf_nat);
1298	}
1299
1300	ipn->ipn_dsize = ipns.ipn_dsize;
1301	nat = ipns.ipn_next;
1302	if (nat == NULL) {
1303		nat = nat_instances;
1304		if (nat == NULL) {
1305			if (nat_instances == NULL)
1306				error = ENOENT;
1307			goto finished;
1308		}
1309	} else {
1310		/*
1311		 * Make sure the pointer we're copying from exists in the
1312		 * current list of entries.  Security precaution to prevent
1313		 * copying of random kernel data.
1314		 */
1315		for (n = nat_instances; n; n = n->nat_next)
1316			if (n == nat)
1317				break;
1318		if (n == NULL) {
1319			error = ESRCH;
1320			goto finished;
1321		}
1322	}
1323	ipn->ipn_next = nat->nat_next;
1324
1325	/*
1326	 * Copy the NAT structure.
1327	 */
1328	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1329
1330	/*
1331	 * If we have a pointer to the NAT rule it belongs to, save that too.
1332	 */
1333	if (nat->nat_ptr != NULL)
1334		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1335		      sizeof(ipn->ipn_ipnat));
1336
1337	/*
1338	 * If we also know the NAT entry has an associated filter rule,
1339	 * save that too.
1340	 */
1341	if (nat->nat_fr != NULL)
1342		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1343		      sizeof(ipn->ipn_fr));
1344
1345	/*
1346	 * Last but not least, if there is an application proxy session set
1347	 * up for this NAT entry, then copy that out too, including any
1348	 * private data saved along side it by the proxy.
1349	 */
1350	aps = nat->nat_aps;
1351	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1352	if (aps != NULL) {
1353		char *s;
1354
1355		if (outsize < sizeof(*aps)) {
1356			error = ENOBUFS;
1357			goto finished;
1358		}
1359
1360		s = ipn->ipn_data;
1361		bcopy((char *)aps, s, sizeof(*aps));
1362		s += sizeof(*aps);
1363		outsize -= sizeof(*aps);
1364		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1365			bcopy(aps->aps_data, s, aps->aps_psiz);
1366		else
1367			error = ENOBUFS;
1368	}
1369	if (error == 0) {
1370		if (getlock) {
1371			RWLOCK_EXIT(&ipf_nat);
1372			getlock = 0;
1373		}
1374		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1375	}
1376
1377finished:
1378	if (getlock) {
1379		RWLOCK_EXIT(&ipf_nat);
1380	}
1381	if (ipn != NULL) {
1382		KFREES(ipn, ipns.ipn_dsize);
1383	}
1384	return error;
1385}
1386
1387
1388/* ------------------------------------------------------------------------ */
1389/* Function:    fr_natputent                                                */
1390/* Returns:     int - 0 == success, != 0 is the error value.                */
1391/* Parameters:  data(I) -     pointer to natget structure with NAT          */
1392/*                            structure information to load into the kernel */
1393/*              getlock(I) - flag indicating whether or not a write lock    */
1394/*                           on ipf_nat is already held.                    */
1395/*                                                                          */
1396/* Handle SIOCSTPUT.                                                        */
1397/* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1398/* firewall rule data structures, if pointers to them indicate so.          */
1399/* ------------------------------------------------------------------------ */
1400static int
1401fr_natputent(void * data, int getlock)
1402{
1403	nat_save_t *ipn, *ipnn;
1404	ap_session_t *aps;
1405	nat_t *n, *nat;
1406	frentry_t *fr;
1407	fr_info_t *fin;
1408	ipnat_t *in;
1409	int error;
1410
1411	error = fr_inobj(data, NULL, &ipn, IPFOBJ_NATSAVE);
1412	if (error != 0)
1413		return error;
1414
1415	/*
1416	 * Initialise early because of code at junkput label.
1417	 */
1418	in = NULL;
1419	aps = NULL;
1420	nat = NULL;
1421	ipnn = NULL;
1422	fin = NULL;
1423	fr = NULL;
1424
1425	/*
1426	 * New entry, copy in the rest of the NAT entry if it's size is more
1427	 * than just the nat_t structure.
1428	 */
1429	if (ipn->ipn_dsize > sizeof(*ipn)) {
1430		if (ipn->ipn_dsize > 81920) {
1431			error = ENOMEM;
1432			goto junkput;
1433		}
1434
1435		KMALLOCS(ipnn, nat_save_t *, ipn->ipn_dsize);
1436		if (ipnn == NULL) {
1437			KFREE(ipn);
1438			return ENOMEM;
1439		}
1440
1441		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn->ipn_dsize);
1442		if (error != 0) {
1443			error = EFAULT;
1444			goto junkput;
1445		}
1446	} else
1447		ipnn = ipn;
1448
1449	KMALLOC(nat, nat_t *);
1450	if (nat == NULL) {
1451		error = ENOMEM;
1452		goto junkput;
1453	}
1454
1455	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1456	/*
1457	 * Initialize all these so that nat_delete() doesn't cause a crash.
1458	 */
1459	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1460	nat->nat_tqe.tqe_pnext = NULL;
1461	nat->nat_tqe.tqe_next = NULL;
1462	nat->nat_tqe.tqe_ifq = NULL;
1463	nat->nat_tqe.tqe_parent = nat;
1464
1465	/*
1466	 * Restore the rule associated with this nat session
1467	 */
1468	in = ipnn->ipn_nat.nat_ptr;
1469	if (in != NULL) {
1470		KMALLOC(in, ipnat_t *);
1471		nat->nat_ptr = in;
1472		if (in == NULL) {
1473			error = ENOMEM;
1474			goto junkput;
1475		}
1476		bzero((char *)in, offsetof(struct ipnat, in_next6));
1477		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1478		in->in_use = 1;
1479		in->in_flags |= IPN_DELETE;
1480
1481		ATOMIC_INC(nat_stats.ns_rules);
1482
1483		if (nat_resolverule(in) != 0) {
1484			error = ESRCH;
1485			goto junkput;
1486		}
1487	}
1488
1489	/*
1490	 * Check that the NAT entry doesn't already exist in the kernel.
1491	 *
1492	 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1493	 * this, we check to see if the inbound combination of addresses and
1494	 * ports is already known.  Similar logic is applied for NAT_INBOUND.
1495	 *
1496	 */
1497	KMALLOC(fin, fr_info_t *);
1498	if (fin == NULL) {
1499		error = ENOMEM;
1500		goto junkput;
1501	}
1502	bzero(fin, sizeof(*fin));
1503	fin->fin_p = nat->nat_p;
1504	fin->fin_ifp = nat->nat_ifps[0];
1505	if (nat->nat_dir == NAT_OUTBOUND) {
1506		fin->fin_data[0] = ntohs(nat->nat_oport);
1507		fin->fin_data[1] = ntohs(nat->nat_outport);
1508		fin->fin_ifp = nat->nat_ifps[0];
1509		if (getlock) {
1510			READ_ENTER(&ipf_nat);
1511		}
1512		n = nat_inlookup(fin, nat->nat_flags, fin->fin_p,
1513				 nat->nat_oip, nat->nat_inip);
1514		if (getlock) {
1515			RWLOCK_EXIT(&ipf_nat);
1516		}
1517		if (n != NULL) {
1518			error = EEXIST;
1519			goto junkput;
1520		}
1521	} else if (nat->nat_dir == NAT_INBOUND) {
1522		fin->fin_data[0] = ntohs(nat->nat_inport);
1523		fin->fin_data[1] = ntohs(nat->nat_oport);
1524		fin->fin_ifp = nat->nat_ifps[0];
1525		if (getlock) {
1526			READ_ENTER(&ipf_nat);
1527		}
1528		n = nat_outlookup(fin, nat->nat_flags, fin->fin_p,
1529				  nat->nat_outip, nat->nat_oip);
1530		if (getlock) {
1531			RWLOCK_EXIT(&ipf_nat);
1532		}
1533		if (n != NULL) {
1534			error = EEXIST;
1535			goto junkput;
1536		}
1537	} else {
1538		error = EINVAL;
1539		goto junkput;
1540	}
1541
1542	/*
1543	 * Restore ap_session_t structure.  Include the private data allocated
1544	 * if it was there.
1545	 */
1546	aps = nat->nat_aps;
1547	if (aps != NULL) {
1548		KMALLOC(aps, ap_session_t *);
1549		nat->nat_aps = aps;
1550		if (aps == NULL) {
1551			error = ENOMEM;
1552			goto junkput;
1553		}
1554		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1555		if (in != NULL)
1556			aps->aps_apr = in->in_apr;
1557		else
1558			aps->aps_apr = NULL;
1559		if (aps->aps_psiz != 0) {
1560			if (aps->aps_psiz > 81920) {
1561				error = ENOMEM;
1562				goto junkput;
1563			}
1564			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1565			if (aps->aps_data == NULL) {
1566				error = ENOMEM;
1567				goto junkput;
1568			}
1569			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1570			      aps->aps_psiz);
1571		} else {
1572			aps->aps_psiz = 0;
1573			aps->aps_data = NULL;
1574		}
1575	}
1576
1577	/*
1578	 * If there was a filtering rule associated with this entry then
1579	 * build up a new one.
1580	 */
1581	fr = nat->nat_fr;
1582	if (fr != NULL) {
1583		if ((nat->nat_flags & SI_NEWFR) != 0) {
1584			KMALLOC(fr, frentry_t *);
1585			nat->nat_fr = fr;
1586			if (fr == NULL) {
1587				error = ENOMEM;
1588				goto junkput;
1589			}
1590			ipnn->ipn_nat.nat_fr = fr;
1591			fr->fr_ref = 1;
1592			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1593			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1594
1595			fr->fr_ref = 1;
1596			fr->fr_dsize = 0;
1597			fr->fr_data = NULL;
1598			fr->fr_type = FR_T_NONE;
1599
1600			MUTEX_NUKE(&fr->fr_lock);
1601			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1602		} else {
1603			if (getlock) {
1604				READ_ENTER(&ipf_nat);
1605			}
1606			for (n = nat_instances; n; n = n->nat_next)
1607				if (n->nat_fr == fr)
1608					break;
1609
1610			if (n != NULL) {
1611				MUTEX_ENTER(&fr->fr_lock);
1612				fr->fr_ref++;
1613				MUTEX_EXIT(&fr->fr_lock);
1614			}
1615			if (getlock) {
1616				RWLOCK_EXIT(&ipf_nat);
1617			}
1618
1619			if (!n) {
1620				error = ESRCH;
1621				goto junkput;
1622			}
1623		}
1624	}
1625
1626	if (ipnn != ipn) {
1627		KFREES(ipnn, ipn->ipn_dsize);
1628		ipnn = NULL;
1629	}
1630
1631	if (getlock) {
1632		WRITE_ENTER(&ipf_nat);
1633	}
1634	error = nat_insert(nat, nat->nat_rev);
1635	if ((error == 0) && (aps != NULL)) {
1636		aps->aps_next = ap_sess_list;
1637		ap_sess_list = aps;
1638	}
1639	if (getlock) {
1640		RWLOCK_EXIT(&ipf_nat);
1641	}
1642
1643	if (error == 0)
1644		return 0;
1645
1646	error = ENOMEM;
1647
1648junkput:
1649	if (fin != NULL)
1650		KFREE(fin);
1651	if (fr != NULL)
1652		(void) fr_derefrule(&fr);
1653
1654	if ((ipnn != NULL) && (ipnn != ipn)) {
1655		KFREES(ipnn, ipn->ipn_dsize);
1656	}
1657	if (ipn != NULL)
1658		KFREE(ipn);
1659	if (nat != NULL) {
1660		if (aps != NULL) {
1661			if (aps->aps_data != NULL) {
1662				KFREES(aps->aps_data, aps->aps_psiz);
1663			}
1664			KFREE(aps);
1665		}
1666		if (in != NULL) {
1667			if (in->in_apr)
1668				appr_free(in->in_apr);
1669			KFREE(in);
1670		}
1671		KFREE(nat);
1672	}
1673	return error;
1674}
1675
1676
1677/* ------------------------------------------------------------------------ */
1678/* Function:    nat_delete                                                  */
1679/* Returns:     Nil                                                         */
1680/* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1681/*              logtype(I) - type of LOG record to create before deleting   */
1682/* Write Lock:  ipf_nat                                                     */
1683/*                                                                          */
1684/* Delete a nat entry from the various lists and table.  If NAT logging is  */
1685/* enabled then generate a NAT log record for this event.                   */
1686/* ------------------------------------------------------------------------ */
1687void
1688nat_delete(struct nat *nat, int logtype)
1689{
1690	struct ipnat *ipn;
1691	int removed = 0;
1692
1693	if (logtype != 0 && nat_logging != 0)
1694		nat_log(nat, logtype);
1695#if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1696	ipf_rand_push(nat, sizeof(*nat));
1697#endif
1698
1699	/*
1700	 * Take it as a general indication that all the pointers are set if
1701	 * nat_pnext is set.
1702	 */
1703	if (nat->nat_pnext != NULL) {
1704		removed = 1;
1705
1706		nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1707		nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1708
1709		*nat->nat_pnext = nat->nat_next;
1710		if (nat->nat_next != NULL) {
1711			nat->nat_next->nat_pnext = nat->nat_pnext;
1712			nat->nat_next = NULL;
1713		}
1714		nat->nat_pnext = NULL;
1715
1716		*nat->nat_phnext[0] = nat->nat_hnext[0];
1717		if (nat->nat_hnext[0] != NULL) {
1718			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1719			nat->nat_hnext[0] = NULL;
1720		}
1721		nat->nat_phnext[0] = NULL;
1722
1723		*nat->nat_phnext[1] = nat->nat_hnext[1];
1724		if (nat->nat_hnext[1] != NULL) {
1725			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1726			nat->nat_hnext[1] = NULL;
1727		}
1728		nat->nat_phnext[1] = NULL;
1729
1730		if ((nat->nat_flags & SI_WILDP) != 0)
1731			nat_stats.ns_wilds--;
1732	}
1733
1734	if (nat->nat_me != NULL) {
1735		*nat->nat_me = NULL;
1736		nat->nat_me = NULL;
1737	}
1738
1739	if (nat->nat_tqe.tqe_ifq != NULL)
1740		fr_deletequeueentry(&nat->nat_tqe);
1741
1742	if (logtype == NL_EXPIRE)
1743		nat_stats.ns_expire++;
1744
1745	MUTEX_ENTER(&nat->nat_lock);
1746	/*
1747	 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1748	 * This happens when a nat'd packet is blocked and we want to throw
1749	 * away the NAT session.
1750	 */
1751	if (logtype == NL_DESTROY) {
1752		if (nat->nat_ref > 2) {
1753			nat->nat_ref -= 2;
1754			MUTEX_EXIT(&nat->nat_lock);
1755			if (removed)
1756				nat_stats.ns_orphans++;
1757			return;
1758		}
1759	} else if (nat->nat_ref > 1) {
1760		nat->nat_ref--;
1761		MUTEX_EXIT(&nat->nat_lock);
1762		if (removed)
1763			nat_stats.ns_orphans++;
1764		return;
1765	}
1766	MUTEX_EXIT(&nat->nat_lock);
1767
1768	/*
1769	 * At this point, nat_ref is 1, doing "--" would make it 0..
1770	 */
1771	nat->nat_ref = 0;
1772	if (!removed)
1773		nat_stats.ns_orphans--;
1774
1775#ifdef	IPFILTER_SYNC
1776	if (nat->nat_sync)
1777		ipfsync_del(nat->nat_sync);
1778#endif
1779
1780	if (nat->nat_fr != NULL)
1781		(void) fr_derefrule(&nat->nat_fr);
1782
1783	if (nat->nat_hm != NULL)
1784		fr_hostmapdel(&nat->nat_hm);
1785
1786	/*
1787	 * If there is an active reference from the nat entry to its parent
1788	 * rule, decrement the rule's reference count and free it too if no
1789	 * longer being used.
1790	 */
1791	ipn = nat->nat_ptr;
1792	if (ipn != NULL) {
1793		fr_ipnatderef(&ipn);
1794	}
1795
1796	MUTEX_DESTROY(&nat->nat_lock);
1797
1798	aps_free(nat->nat_aps);
1799	nat_stats.ns_inuse--;
1800
1801	/*
1802	 * If there's a fragment table entry too for this nat entry, then
1803	 * dereference that as well.  This is after nat_lock is released
1804	 * because of Tru64.
1805	 */
1806	fr_forgetnat((void *)nat);
1807
1808	KFREE(nat);
1809}
1810
1811
1812/* ------------------------------------------------------------------------ */
1813/* Function:    nat_flushtable                                              */
1814/* Returns:     int - number of NAT rules deleted                           */
1815/* Parameters:  Nil                                                         */
1816/*                                                                          */
1817/* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1818/* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1819/* ------------------------------------------------------------------------ */
1820/*
1821 * nat_flushtable - clear the NAT table of all mapping entries.
1822 */
1823static int
1824nat_flushtable(void)
1825{
1826	nat_t *nat;
1827	int j = 0;
1828
1829	/*
1830	 * ALL NAT mappings deleted, so lets just make the deletions
1831	 * quicker.
1832	 */
1833	if (nat_table[0] != NULL)
1834		bzero((char *)nat_table[0],
1835		      sizeof(nat_table[0]) * ipf_nattable_sz);
1836	if (nat_table[1] != NULL)
1837		bzero((char *)nat_table[1],
1838		      sizeof(nat_table[1]) * ipf_nattable_sz);
1839
1840	while ((nat = nat_instances) != NULL) {
1841		nat_delete(nat, NL_FLUSH);
1842		j++;
1843	}
1844
1845	return j;
1846}
1847
1848
1849/* ------------------------------------------------------------------------ */
1850/* Function:    nat_clearlist                                               */
1851/* Returns:     int - number of NAT/RDR rules deleted                       */
1852/* Parameters:  Nil                                                         */
1853/*                                                                          */
1854/* Delete all rules in the current list of rules.  There is nothing elegant */
1855/* about this cleanup: simply free all entries on the list of rules and     */
1856/* clear out the tables used for hashed NAT rule lookups.                   */
1857/* ------------------------------------------------------------------------ */
1858static int
1859nat_clearlist(void)
1860{
1861	ipnat_t *n, **np = &nat_list;
1862	int i = 0;
1863
1864	if (nat_rules != NULL)
1865		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1866	if (rdr_rules != NULL)
1867		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1868
1869	while ((n = *np) != NULL) {
1870		*np = n->in_next;
1871		if (n->in_use == 0) {
1872			if (n->in_apr != NULL)
1873				appr_free(n->in_apr);
1874			MUTEX_DESTROY(&n->in_lock);
1875			KFREE(n);
1876			nat_stats.ns_rules--;
1877		} else {
1878			n->in_flags |= IPN_DELETE;
1879			n->in_next = NULL;
1880		}
1881		i++;
1882	}
1883#if SOLARIS && !defined(_INET_IP_STACK_H)
1884	pfil_delayed_copy = 1;
1885#endif
1886	nat_masks = 0;
1887	rdr_masks = 0;
1888	return i;
1889}
1890
1891
1892/* ------------------------------------------------------------------------ */
1893/* Function:    nat_newmap                                                  */
1894/* Returns:     int - -1 == error, 0 == success                             */
1895/* Parameters:  fin(I) - pointer to packet information                      */
1896/*              nat(I) - pointer to NAT entry                               */
1897/*              ni(I)  - pointer to structure with misc. information needed */
1898/*                       to create new NAT entry.                           */
1899/*                                                                          */
1900/* Given an empty NAT structure, populate it with new information about a   */
1901/* new NAT session, as defined by the matching NAT rule.                    */
1902/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1903/* to the new IP address for the translation.                               */
1904/* ------------------------------------------------------------------------ */
1905static INLINE int
1906nat_newmap(fr_info_t *fin, nat_t *nat, natinfo_t *ni)
1907{
1908	u_short st_port, dport, sport, port, sp, dp;
1909	struct in_addr in, inb;
1910	hostmap_t *hm;
1911	u_32_t flags;
1912	u_32_t st_ip;
1913	ipnat_t *np;
1914	nat_t *natl;
1915	int l;
1916
1917	/*
1918	 * If it's an outbound packet which doesn't match any existing
1919	 * record, then create a new port
1920	 */
1921	l = 0;
1922	hm = NULL;
1923	np = ni->nai_np;
1924	st_ip = np->in_nip;
1925	st_port = np->in_pnext;
1926	flags = ni->nai_flags;
1927	sport = ni->nai_sport;
1928	dport = ni->nai_dport;
1929
1930	/*
1931	 * Do a loop until we either run out of entries to try or we find
1932	 * a NAT mapping that isn't currently being used.  This is done
1933	 * because the change to the source is not (usually) being fixed.
1934	 */
1935	do {
1936		port = 0;
1937		in.s_addr = htonl(np->in_nip);
1938		if (l == 0) {
1939			/*
1940			 * Check to see if there is an existing NAT
1941			 * setup for this IP address pair.
1942			 */
1943			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1944					 in, 0);
1945			if (hm != NULL)
1946				in.s_addr = hm->hm_mapip.s_addr;
1947		} else if ((l == 1) && (hm != NULL)) {
1948			fr_hostmapdel(&hm);
1949		}
1950		in.s_addr = ntohl(in.s_addr);
1951
1952		nat->nat_hm = hm;
1953
1954		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1955			if (l > 0)
1956				return -1;
1957		}
1958
1959		if (np->in_redir == NAT_BIMAP &&
1960		    np->in_inmsk == np->in_outmsk) {
1961			/*
1962			 * map the address block in a 1:1 fashion
1963			 */
1964			in.s_addr = np->in_outip;
1965			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1966			in.s_addr = ntohl(in.s_addr);
1967
1968		} else if (np->in_redir & NAT_MAPBLK) {
1969			if ((l >= np->in_ppip) || ((l > 0) &&
1970			     !(flags & IPN_TCPUDP)))
1971				return -1;
1972			/*
1973			 * map-block - Calculate destination address.
1974			 */
1975			in.s_addr = ntohl(fin->fin_saddr);
1976			in.s_addr &= ntohl(~np->in_inmsk);
1977			inb.s_addr = in.s_addr;
1978			in.s_addr /= np->in_ippip;
1979			in.s_addr &= ntohl(~np->in_outmsk);
1980			in.s_addr += ntohl(np->in_outip);
1981			/*
1982			 * Calculate destination port.
1983			 */
1984			if ((flags & IPN_TCPUDP) &&
1985			    (np->in_ppip != 0)) {
1986				port = ntohs(sport) + l;
1987				port %= np->in_ppip;
1988				port += np->in_ppip *
1989					(inb.s_addr % np->in_ippip);
1990				port += MAPBLK_MINPORT;
1991				port = htons(port);
1992			}
1993
1994		} else if ((np->in_outip == 0) &&
1995			   (np->in_outmsk == 0xffffffff)) {
1996			/*
1997			 * 0/32 - use the interface's IP address.
1998			 */
1999			if ((l > 0) ||
2000			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2001				       &in, NULL) == -1)
2002				return -1;
2003			in.s_addr = ntohl(in.s_addr);
2004
2005		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2006			/*
2007			 * 0/0 - use the original source address/port.
2008			 */
2009			if (l > 0)
2010				return -1;
2011			in.s_addr = ntohl(fin->fin_saddr);
2012
2013		} else if ((np->in_outmsk != 0xffffffff) &&
2014			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2015			np->in_nip++;
2016
2017		natl = NULL;
2018
2019		if ((flags & IPN_TCPUDP) &&
2020		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2021		    (np->in_flags & IPN_AUTOPORTMAP)) {
2022			/*
2023			 * "ports auto" (without map-block)
2024			 */
2025			if ((l > 0) && np->in_ppip && (l % np->in_ppip == 0)) {
2026				if (l > np->in_space) {
2027					return -1;
2028				} else if ((l > np->in_ppip) &&
2029					   np->in_outmsk != 0xffffffff)
2030					np->in_nip++;
2031			}
2032			if (np->in_ppip != 0) {
2033				port = ntohs(sport);
2034				port += (l % np->in_ppip);
2035				port %= np->in_ppip;
2036				port += np->in_ppip *
2037					(ntohl(fin->fin_saddr) %
2038					 np->in_ippip);
2039				port += MAPBLK_MINPORT;
2040				port = htons(port);
2041			}
2042
2043		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2044			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2045			/*
2046			 * Standard port translation.  Select next port.
2047			 */
2048			if (np->in_flags & IPN_SEQUENTIAL) {
2049				port = np->in_pnext;
2050			} else {
2051				in_port_t d = ntohs(np->in_pmax) -
2052				    ntohs(np->in_pmin) + 1;
2053				if (d)
2054					port = ipf_random() % d;
2055				else
2056					port = 0;
2057				port += ntohs(np->in_pmin);
2058			}
2059			port = htons(port);
2060			np->in_pnext++;
2061
2062			if (np->in_pnext > ntohs(np->in_pmax)) {
2063				np->in_pnext = ntohs(np->in_pmin);
2064				if (np->in_outmsk != 0xffffffff)
2065					np->in_nip++;
2066			}
2067		}
2068
2069		if (np->in_flags & IPN_IPRANGE) {
2070			if (np->in_nip > ntohl(np->in_outmsk))
2071				np->in_nip = ntohl(np->in_outip);
2072		} else {
2073			if ((np->in_outmsk != 0xffffffff) &&
2074			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2075			    ntohl(np->in_outip))
2076				np->in_nip = ntohl(np->in_outip) + 1;
2077		}
2078
2079		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2080			port = sport;
2081
2082		/*
2083		 * Here we do a lookup of the connection as seen from
2084		 * the outside.  If an IP# pair already exists, try
2085		 * again.  So if you have A->B becomes C->B, you can
2086		 * also have D->E become C->E but not D->B causing
2087		 * another C->B.  Also take protocol and ports into
2088		 * account when determining whether a pre-existing
2089		 * NAT setup will cause an external conflict where
2090		 * this is appropriate.
2091		 */
2092		inb.s_addr = htonl(in.s_addr);
2093		sp = fin->fin_data[0];
2094		dp = fin->fin_data[1];
2095		fin->fin_data[0] = fin->fin_data[1];
2096		fin->fin_data[1] = htons(port);
2097		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2098				    (u_int)fin->fin_p, fin->fin_dst, inb);
2099		fin->fin_data[0] = sp;
2100		fin->fin_data[1] = dp;
2101
2102		/*
2103		 * Has the search wrapped around and come back to the
2104		 * start ?
2105		 */
2106		if ((natl != NULL) &&
2107		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2108		    (np->in_nip != 0) && (st_ip == np->in_nip))
2109			return -1;
2110		l++;
2111	} while (natl != NULL);
2112
2113	if (np->in_space > 0)
2114		np->in_space--;
2115
2116	/* Setup the NAT table */
2117	nat->nat_inip = fin->fin_src;
2118	nat->nat_outip.s_addr = htonl(in.s_addr);
2119	nat->nat_oip = fin->fin_dst;
2120	if (nat->nat_hm == NULL)
2121		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2122					  nat->nat_outip, 0);
2123
2124	/*
2125	 * The ICMP checksum does not have a pseudo header containing
2126	 * the IP addresses
2127	 */
2128	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2129	ni->nai_sum2 = LONG_SUM(in.s_addr);
2130	if ((flags & IPN_TCPUDP)) {
2131		ni->nai_sum1 += ntohs(sport);
2132		ni->nai_sum2 += ntohs(port);
2133	}
2134
2135	if (flags & IPN_TCPUDP) {
2136		nat->nat_inport = sport;
2137		nat->nat_outport = port;	/* sport */
2138		nat->nat_oport = dport;
2139		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2140	} else if (flags & IPN_ICMPQUERY) {
2141		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2142		nat->nat_inport = port;
2143		nat->nat_outport = port;
2144#if 0
2145	} else if (fin->fin_p == IPPROTO_GRE) {
2146		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2147		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2148			nat->nat_oport = 0;/*fin->fin_data[1];*/
2149			nat->nat_inport = 0;/*fin->fin_data[0];*/
2150			nat->nat_outport = 0;/*fin->fin_data[0];*/
2151			nat->nat_call[0] = fin->fin_data[0];
2152			nat->nat_call[1] = fin->fin_data[0];
2153		}
2154#endif
2155	}
2156	ni->nai_ip.s_addr = in.s_addr;
2157	ni->nai_port = port;
2158	ni->nai_nport = dport;
2159	return 0;
2160}
2161
2162
2163/* ------------------------------------------------------------------------ */
2164/* Function:    nat_newrdr                                                  */
2165/* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2166/*                    allow rule to be moved if IPN_ROUNDR is set.          */
2167/* Parameters:  fin(I) - pointer to packet information                      */
2168/*              nat(I) - pointer to NAT entry                               */
2169/*              ni(I)  - pointer to structure with misc. information needed */
2170/*                       to create new NAT entry.                           */
2171/*                                                                          */
2172/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2173/* to the new IP address for the translation.                               */
2174/* ------------------------------------------------------------------------ */
2175static INLINE int
2176nat_newrdr(fr_info_t *fin, nat_t *nat, natinfo_t *ni)
2177{
2178	u_short nport, dport, sport;
2179	struct in_addr in, inb;
2180	u_short sp, dp;
2181	hostmap_t *hm;
2182	u_32_t flags;
2183	ipnat_t *np;
2184	nat_t *natl;
2185	int move;
2186
2187	move = 1;
2188	hm = NULL;
2189	in.s_addr = 0;
2190	np = ni->nai_np;
2191	flags = ni->nai_flags;
2192	sport = ni->nai_sport;
2193	dport = ni->nai_dport;
2194
2195	/*
2196	 * If the matching rule has IPN_STICKY set, then we want to have the
2197	 * same rule kick in as before.  Why would this happen?  If you have
2198	 * a collection of rdr rules with "round-robin sticky", the current
2199	 * packet might match a different one to the previous connection but
2200	 * we want the same destination to be used.
2201	 */
2202	if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2203	    ((np->in_flags & IPN_STICKY) != 0)) {
2204		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2205				 (u_32_t)dport);
2206		if (hm != NULL) {
2207			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2208			np = hm->hm_ipnat;
2209			ni->nai_np = np;
2210			move = 0;
2211		}
2212	}
2213
2214	/*
2215	 * Otherwise, it's an inbound packet. Most likely, we don't
2216	 * want to rewrite source ports and source addresses. Instead,
2217	 * we want to rewrite to a fixed internal address and fixed
2218	 * internal port.
2219	 */
2220	if (np->in_flags & IPN_SPLIT) {
2221		in.s_addr = np->in_nip;
2222
2223		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2224			hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2225					 in, (u_32_t)dport);
2226			if (hm != NULL) {
2227				in.s_addr = hm->hm_mapip.s_addr;
2228				move = 0;
2229			}
2230		}
2231
2232		if (hm == NULL || hm->hm_ref == 1) {
2233			if (np->in_inip == htonl(in.s_addr)) {
2234				np->in_nip = ntohl(np->in_inmsk);
2235				move = 0;
2236			} else {
2237				np->in_nip = ntohl(np->in_inip);
2238			}
2239		}
2240
2241	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2242		/*
2243		 * 0/32 - use the interface's IP address.
2244		 */
2245		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2246			return -1;
2247		in.s_addr = ntohl(in.s_addr);
2248
2249	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2250		/*
2251		 * 0/0 - use the original destination address/port.
2252		 */
2253		in.s_addr = ntohl(fin->fin_daddr);
2254
2255	} else if (np->in_redir == NAT_BIMAP &&
2256		   np->in_inmsk == np->in_outmsk) {
2257		/*
2258		 * map the address block in a 1:1 fashion
2259		 */
2260		in.s_addr = np->in_inip;
2261		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2262		in.s_addr = ntohl(in.s_addr);
2263	} else {
2264		in.s_addr = ntohl(np->in_inip);
2265	}
2266
2267	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2268		nport = dport;
2269	else {
2270		/*
2271		 * Whilst not optimized for the case where
2272		 * pmin == pmax, the gain is not significant.
2273		 */
2274		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2275		    (np->in_pmin != np->in_pmax)) {
2276			nport = ntohs(dport) - ntohs(np->in_pmin) +
2277				ntohs(np->in_pnext);
2278			nport = htons(nport);
2279		} else
2280			nport = np->in_pnext;
2281	}
2282
2283	/*
2284	 * When the redirect-to address is set to 0.0.0.0, just
2285	 * assume a blank `forwarding' of the packet.  We don't
2286	 * setup any translation for this either.
2287	 */
2288	if (in.s_addr == 0) {
2289		if (nport == dport)
2290			return -1;
2291		in.s_addr = ntohl(fin->fin_daddr);
2292	}
2293
2294	/*
2295	 * Check to see if this redirect mapping already exists and if
2296	 * it does, return "failure" (allowing it to be created will just
2297	 * cause one or both of these "connections" to stop working.)
2298	 */
2299	inb.s_addr = htonl(in.s_addr);
2300	sp = fin->fin_data[0];
2301	dp = fin->fin_data[1];
2302	fin->fin_data[1] = fin->fin_data[0];
2303	fin->fin_data[0] = ntohs(nport);
2304	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2305			     (u_int)fin->fin_p, inb, fin->fin_src);
2306	fin->fin_data[0] = sp;
2307	fin->fin_data[1] = dp;
2308	if (natl != NULL)
2309		return -1;
2310
2311	nat->nat_inip.s_addr = htonl(in.s_addr);
2312	nat->nat_outip = fin->fin_dst;
2313	nat->nat_oip = fin->fin_src;
2314	if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2315		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2316					  (u_32_t)dport);
2317
2318	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2319	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2320
2321	ni->nai_ip.s_addr = in.s_addr;
2322	ni->nai_nport = nport;
2323	ni->nai_port = sport;
2324
2325	if (flags & IPN_TCPUDP) {
2326		nat->nat_inport = nport;
2327		nat->nat_outport = dport;
2328		nat->nat_oport = sport;
2329		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2330	} else if (flags & IPN_ICMPQUERY) {
2331		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2332		nat->nat_inport = nport;
2333		nat->nat_outport = nport;
2334#if 0
2335	} else if (fin->fin_p == IPPROTO_GRE) {
2336		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2337		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2338			nat->nat_call[0] = fin->fin_data[0];
2339			nat->nat_call[1] = fin->fin_data[1];
2340			nat->nat_oport = 0; /*fin->fin_data[0];*/
2341			nat->nat_inport = 0; /*fin->fin_data[1];*/
2342			nat->nat_outport = 0; /*fin->fin_data[1];*/
2343		}
2344#endif
2345	}
2346
2347	return move;
2348}
2349
2350/* ------------------------------------------------------------------------ */
2351/* Function:    nat_new                                                     */
2352/* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2353/*                       else pointer to new NAT structure                  */
2354/* Parameters:  fin(I)       - pointer to packet information                */
2355/*              np(I)        - pointer to NAT rule                          */
2356/*              natsave(I)   - pointer to where to store NAT struct pointer */
2357/*              flags(I)     - flags describing the current packet          */
2358/*              direction(I) - direction of packet (in/out)                 */
2359/* Write Lock:  ipf_nat                                                     */
2360/*                                                                          */
2361/* Attempts to create a new NAT entry.  Does not actually change the packet */
2362/* in any way.                                                              */
2363/*                                                                          */
2364/* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2365/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2366/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2367/* and (3) building that structure and putting it into the NAT table(s).    */
2368/*                                                                          */
2369/* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2370/*       as it can result in memory being corrupted.                        */
2371/* ------------------------------------------------------------------------ */
2372nat_t *
2373nat_new(fr_info_t *fin, ipnat_t *np, nat_t **natsave, u_int flags, int direction)
2374{
2375	u_short port = 0, sport = 0, dport = 0, nport = 0;
2376	tcphdr_t *tcp = NULL;
2377	hostmap_t *hm = NULL;
2378	struct in_addr in;
2379	nat_t *nat, *natl;
2380	u_int nflags;
2381	natinfo_t ni;
2382	u_32_t sumd;
2383	int move;
2384#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2385	qpktinfo_t *qpi = fin->fin_qpi;
2386#endif
2387
2388	memset(&ni, 0, sizeof ni);	/* XXX gcc */
2389
2390	if (nat_stats.ns_inuse >= ipf_nattable_max) {
2391		nat_stats.ns_memfail++;
2392		fr_nat_doflush = 1;
2393		return NULL;
2394	}
2395
2396	move = 1;
2397	nflags = np->in_flags & flags;
2398	nflags &= NAT_FROMRULE;
2399
2400	ni.nai_np = np;
2401	ni.nai_nflags = nflags;
2402	ni.nai_flags = flags;
2403	ni.nai_dport = 0;
2404	ni.nai_sport = 0;
2405
2406	/* Give me a new nat */
2407	KMALLOC(nat, nat_t *);
2408	if (nat == NULL) {
2409		nat_stats.ns_memfail++;
2410		/*
2411		 * Try to automatically tune the max # of entries in the
2412		 * table allowed to be less than what will cause kmem_alloc()
2413		 * to fail and try to eliminate panics due to out of memory
2414		 * conditions arising.
2415		 */
2416		if (ipf_nattable_max > ipf_nattable_sz) {
2417			ipf_nattable_max = nat_stats.ns_inuse - 100;
2418			printf("ipf_nattable_max reduced to %d\n",
2419				ipf_nattable_max);
2420		}
2421		return NULL;
2422	}
2423
2424	if (flags & IPN_TCPUDP) {
2425		tcp = fin->fin_dp;
2426		ni.nai_sport = htons(fin->fin_sport);
2427		ni.nai_dport = htons(fin->fin_dport);
2428	} else if (flags & IPN_ICMPQUERY) {
2429		/*
2430		 * In the ICMP query NAT code, we translate the ICMP id fields
2431		 * to make them unique. This is indepedent of the ICMP type
2432		 * (e.g. in the unlikely event that a host sends an echo and
2433		 * an tstamp request with the same id, both packets will have
2434		 * their ip address/id field changed in the same way).
2435		 */
2436		/* The icmp_id field is used by the sender to identify the
2437		 * process making the icmp request. (the receiver justs
2438		 * copies it back in its response). So, it closely matches
2439		 * the concept of source port. We overlay sport, so we can
2440		 * maximally reuse the existing code.
2441		 */
2442		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2443		ni.nai_dport = ni.nai_sport;
2444	}
2445
2446	bzero((char *)nat, sizeof(*nat));
2447	nat->nat_flags = flags;
2448	nat->nat_redir = np->in_redir;
2449
2450	/*
2451	 * Search the current table for a match.
2452	 */
2453	if (direction == NAT_OUTBOUND) {
2454		/*
2455		 * We can now arrange to call this for the same connection
2456		 * because ipf_nat_new doesn't protect the code path into
2457		 * this function.
2458		 */
2459		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2460				     fin->fin_src, fin->fin_dst);
2461		if (natl != NULL) {
2462			KFREE(nat);
2463			nat = natl;
2464			goto done;
2465		}
2466
2467		move = nat_newmap(fin, nat, &ni);
2468		if (move == -1)
2469			goto badnat;
2470
2471		np = ni.nai_np;
2472		in = ni.nai_ip;
2473	} else {
2474		/*
2475		 * NAT_INBOUND is used only for redirects rules
2476		 */
2477		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2478				    fin->fin_src, fin->fin_dst);
2479		if (natl != NULL) {
2480			KFREE(nat);
2481			nat = natl;
2482			goto done;
2483		}
2484
2485		move = nat_newrdr(fin, nat, &ni);
2486		if (move == -1)
2487			goto badnat;
2488
2489		np = ni.nai_np;
2490		in = ni.nai_ip;
2491	}
2492	port = ni.nai_port;
2493	nport = ni.nai_nport;
2494
2495	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2496		if (np->in_redir == NAT_REDIRECT) {
2497			nat_delrdr(np);
2498			nat_addrdr(np);
2499		} else if (np->in_redir == NAT_MAP) {
2500			nat_delnat(np);
2501			nat_addnat(np);
2502		}
2503	}
2504
2505	if (flags & IPN_TCPUDP) {
2506		sport = ni.nai_sport;
2507		dport = ni.nai_dport;
2508	} else if (flags & IPN_ICMPQUERY) {
2509		sport = ni.nai_sport;
2510		dport = 0;
2511	}
2512
2513	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2514	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2515#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2516	if ((flags & IPN_TCP) && dohwcksum &&
2517	    (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2518		if (direction == NAT_OUTBOUND)
2519			ni.nai_sum1 = LONG_SUM(in.s_addr);
2520		else
2521			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2522		ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2523		ni.nai_sum1 += 30;
2524		ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2525		nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2526	} else
2527#endif
2528		nat->nat_sumd[1] = nat->nat_sumd[0];
2529
2530	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2531		if (direction == NAT_OUTBOUND)
2532			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2533		else
2534			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2535
2536		ni.nai_sum2 = LONG_SUM(in.s_addr);
2537
2538		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2539		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2540	} else {
2541		nat->nat_ipsumd = nat->nat_sumd[0];
2542		if (!(flags & IPN_TCPUDPICMP)) {
2543			nat->nat_sumd[0] = 0;
2544			nat->nat_sumd[1] = 0;
2545		}
2546	}
2547
2548	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2549		fr_nat_doflush = 1;
2550		goto badnat;
2551	}
2552	if (flags & SI_WILDP)
2553		nat_stats.ns_wilds++;
2554	fin->fin_flx |= FI_NEWNAT;
2555	goto done;
2556badnat:
2557	nat_stats.ns_badnat++;
2558	if ((hm = nat->nat_hm) != NULL)
2559		fr_hostmapdel(&hm);
2560	KFREE(nat);
2561	nat = NULL;
2562done:
2563	if (nat != NULL && np != NULL)
2564		np->in_hits++;
2565	return nat;
2566}
2567
2568
2569/* ------------------------------------------------------------------------ */
2570/* Function:    nat_finalise                                                */
2571/* Returns:     int - 0 == sucess, -1 == failure                            */
2572/* Parameters:  fin(I) - pointer to packet information                      */
2573/*              nat(I) - pointer to NAT entry                               */
2574/*              ni(I)  - pointer to structure with misc. information needed */
2575/*                       to create new NAT entry.                           */
2576/* Write Lock:  ipf_nat                                                     */
2577/*                                                                          */
2578/* This is the tail end of constructing a new NAT entry and is the same     */
2579/* for both IPv4 and IPv6.                                                  */
2580/* ------------------------------------------------------------------------ */
2581/*ARGSUSED*/
2582static int
2583nat_finalise(
2584    fr_info_t *fin,
2585    nat_t *nat,
2586    natinfo_t *ni,
2587    tcphdr_t *tcp,
2588    nat_t **natsave,
2589    int direction
2590)
2591{
2592	frentry_t *fr;
2593	ipnat_t *np;
2594
2595	np = ni->nai_np;
2596
2597	if (np->in_ifps[0] != NULL) {
2598		COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2599	}
2600	if (np->in_ifps[1] != NULL) {
2601		COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2602	}
2603#ifdef	IPFILTER_SYNC
2604	if ((nat->nat_flags & SI_CLONE) == 0)
2605		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2606#endif
2607
2608	nat->nat_me = natsave;
2609	nat->nat_dir = direction;
2610	nat->nat_ifps[0] = np->in_ifps[0];
2611	nat->nat_ifps[1] = np->in_ifps[1];
2612	nat->nat_ptr = np;
2613	nat->nat_p = fin->fin_p;
2614	nat->nat_mssclamp = np->in_mssclamp;
2615	if (nat->nat_flags & IPN_TCP)
2616		nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2617
2618	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2619		if (appr_new(fin, nat) == -1)
2620			return -1;
2621
2622	if (nat_insert(nat, fin->fin_rev) == 0) {
2623		if (nat_logging)
2624			nat_log(nat, (u_int)np->in_redir);
2625		np->in_use++;
2626		fr = fin->fin_fr;
2627		nat->nat_fr = fr;
2628		if (fr != NULL) {
2629			MUTEX_ENTER(&fr->fr_lock);
2630			fr->fr_ref++;
2631			MUTEX_EXIT(&fr->fr_lock);
2632		}
2633		return 0;
2634	}
2635
2636	/*
2637	 * nat_insert failed, so cleanup time...
2638	 */
2639	return -1;
2640}
2641
2642
2643/* ------------------------------------------------------------------------ */
2644/* Function:   nat_insert                                                   */
2645/* Returns:    int - 0 == sucess, -1 == failure                             */
2646/* Parameters: nat(I) - pointer to NAT structure                            */
2647/*             rev(I) - flag indicating forward/reverse direction of packet */
2648/* Write Lock: ipf_nat                                                      */
2649/*                                                                          */
2650/* Insert a NAT entry into the hash tables for searching and add it to the  */
2651/* list of active NAT entries.  Adjust global counters when complete.       */
2652/* ------------------------------------------------------------------------ */
2653int
2654nat_insert(nat_t *nat, int rev)
2655{
2656	u_int hv1, hv2;
2657	nat_t **natp;
2658
2659	/*
2660	 * Try and return an error as early as possible, so calculate the hash
2661	 * entry numbers first and then proceed.
2662	 */
2663	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2664		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2665				  0xffffffff);
2666		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2667				  ipf_nattable_sz);
2668		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2669				  0xffffffff);
2670		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2671				  ipf_nattable_sz);
2672	} else {
2673		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2674		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2675		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2676		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2677	}
2678
2679	if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2680	    nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2681		return -1;
2682	}
2683
2684	nat->nat_hv[0] = hv1;
2685	nat->nat_hv[1] = hv2;
2686
2687	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2688
2689	nat->nat_rev = rev;
2690	nat->nat_ref = 1;
2691
2692	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2693	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2694
2695	if (nat->nat_ifnames[1][0] != '\0') {
2696		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2697		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2698	} else {
2699		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2700			       LIFNAMSIZ);
2701		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2702		nat->nat_ifps[1] = nat->nat_ifps[0];
2703	}
2704
2705	nat->nat_next = nat_instances;
2706	nat->nat_pnext = &nat_instances;
2707	if (nat_instances)
2708		nat_instances->nat_pnext = &nat->nat_next;
2709	nat_instances = nat;
2710
2711	/*
2712	 * Bump this before the hash table inserts.
2713	 */
2714	nat_stats.ns_added++;
2715
2716	natp = &nat_table[0][hv1];
2717	nat->nat_phnext[0] = natp;
2718	nat->nat_hnext[0] = *natp;
2719	if (*natp)
2720		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2721	*natp = nat;
2722	nat_stats.ns_bucketlen[0][hv1]++;
2723
2724	natp = &nat_table[1][hv2];
2725	nat->nat_phnext[1] = natp;
2726	nat->nat_hnext[1] = *natp;
2727	if (*natp)
2728		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2729	*natp = nat;
2730	nat_stats.ns_bucketlen[1][hv2]++;
2731
2732	fr_setnatqueue(nat, rev);
2733
2734	nat_stats.ns_inuse++;
2735	return 0;
2736}
2737
2738
2739/* ------------------------------------------------------------------------ */
2740/* Function:    nat_icmperrorlookup                                         */
2741/* Returns:     nat_t* - point to matching NAT structure                    */
2742/* Parameters:  fin(I) - pointer to packet information                      */
2743/*              dir(I) - direction of packet (in/out)                       */
2744/*                                                                          */
2745/* Check if the ICMP error message is related to an existing TCP, UDP or    */
2746/* ICMP query nat entry.  It is assumed that the packet is already of the   */
2747/* the required length.                                                     */
2748/* ------------------------------------------------------------------------ */
2749nat_t *
2750nat_icmperrorlookup(fr_info_t *fin, int dir)
2751{
2752	int flags = 0, type, minlen;
2753	icmphdr_t *icmp, *orgicmp;
2754	tcphdr_t *tcp = NULL;
2755	u_short data[2];
2756	nat_t *nat;
2757	ip_t *oip;
2758	u_int p;
2759
2760	icmp = fin->fin_dp;
2761	type = icmp->icmp_type;
2762	/*
2763	 * Does it at least have the return (basic) IP header ?
2764	 * Only a basic IP header (no options) should be with an ICMP error
2765	 * header.  Also, if it's not an error type, then return.
2766	 */
2767	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2768		return NULL;
2769
2770	/*
2771	 * Check packet size
2772	 */
2773	oip = (ip_t *)((char *)fin->fin_dp + 8);
2774	minlen = IP_HL(oip) << 2;
2775	if ((minlen < sizeof(ip_t)) ||
2776	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2777		return NULL;
2778	/*
2779	 * Is the buffer big enough for all of it ?  It's the size of the IP
2780	 * header claimed in the encapsulated part which is of concern.  It
2781	 * may be too big to be in this buffer but not so big that it's
2782	 * outside the ICMP packet, leading to TCP deref's causing problems.
2783	 * This is possible because we don't know how big oip_hl is when we
2784	 * do the pullup early in fr_check() and thus can't gaurantee it is
2785	 * all here now.
2786	 */
2787#ifdef  _KERNEL
2788	{
2789	mb_t *m;
2790
2791	m = fin->fin_m;
2792# if defined(MENTAT)
2793	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2794		return NULL;
2795# else
2796	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2797	    (char *)fin->fin_ip + M_LEN(m))
2798		return NULL;
2799# endif
2800	}
2801#endif
2802
2803	if (fin->fin_daddr != oip->ip_src.s_addr)
2804		return NULL;
2805
2806	p = oip->ip_p;
2807	if (p == IPPROTO_TCP)
2808		flags = IPN_TCP;
2809	else if (p == IPPROTO_UDP)
2810		flags = IPN_UDP;
2811	else if (p == IPPROTO_ICMP) {
2812		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2813
2814		/* see if this is related to an ICMP query */
2815		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2816			data[0] = fin->fin_data[0];
2817			data[1] = fin->fin_data[1];
2818			fin->fin_data[0] = 0;
2819			fin->fin_data[1] = orgicmp->icmp_id;
2820
2821			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2822			/*
2823			 * NOTE : dir refers to the direction of the original
2824			 *        ip packet. By definition the icmp error
2825			 *        message flows in the opposite direction.
2826			 */
2827			if (dir == NAT_INBOUND)
2828				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2829						   oip->ip_src);
2830			else
2831				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2832						    oip->ip_src);
2833			fin->fin_data[0] = data[0];
2834			fin->fin_data[1] = data[1];
2835			return nat;
2836		}
2837	}
2838
2839	if (flags & IPN_TCPUDP) {
2840		minlen += 8;		/* + 64bits of data to get ports */
2841		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2842			return NULL;
2843
2844		data[0] = fin->fin_data[0];
2845		data[1] = fin->fin_data[1];
2846		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2847		fin->fin_data[0] = ntohs(tcp->th_dport);
2848		fin->fin_data[1] = ntohs(tcp->th_sport);
2849
2850		if (dir == NAT_INBOUND) {
2851			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2852					   oip->ip_src);
2853		} else {
2854			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2855					    oip->ip_src);
2856		}
2857		fin->fin_data[0] = data[0];
2858		fin->fin_data[1] = data[1];
2859		return nat;
2860	}
2861	if (dir == NAT_INBOUND)
2862		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2863	else
2864		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2865}
2866
2867
2868/* ------------------------------------------------------------------------ */
2869/* Function:    nat_icmperror                                               */
2870/* Returns:     nat_t* - point to matching NAT structure                    */
2871/* Parameters:  fin(I)    - pointer to packet information                   */
2872/*              nflags(I) - NAT flags for this packet                       */
2873/*              dir(I)    - direction of packet (in/out)                    */
2874/*                                                                          */
2875/* Fix up an ICMP packet which is an error message for an existing NAT      */
2876/* session.  This will correct both packet header data and checksums.       */
2877/*                                                                          */
2878/* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2879/* a NAT'd ICMP packet gets correctly recognised.                           */
2880/* ------------------------------------------------------------------------ */
2881nat_t *
2882nat_icmperror(fr_info_t *fin, u_int *nflags, int dir)
2883{
2884	u_32_t sum1, sum2, sumd, sumd2;
2885	struct in_addr a1, a2;
2886	int flags, dlen, odst;
2887	icmphdr_t *icmp;
2888	u_short *csump;
2889	tcphdr_t *tcp;
2890	nat_t *nat;
2891	ip_t *oip;
2892	void *dp;
2893
2894	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2895		return NULL;
2896	/*
2897	 * nat_icmperrorlookup() will return NULL for `defective' packets.
2898	 */
2899	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2900		return NULL;
2901
2902	tcp = NULL;
2903	csump = NULL;
2904	flags = 0;
2905	sumd2 = 0;
2906	*nflags = IPN_ICMPERR;
2907	icmp = fin->fin_dp;
2908	oip = (ip_t *)&icmp->icmp_ip;
2909	dp = (((char *)oip) + (IP_HL(oip) << 2));
2910	if (oip->ip_p == IPPROTO_TCP) {
2911		tcp = (tcphdr_t *)dp;
2912		csump = (u_short *)&tcp->th_sum;
2913		flags = IPN_TCP;
2914	} else if (oip->ip_p == IPPROTO_UDP) {
2915		udphdr_t *udp;
2916
2917		udp = (udphdr_t *)dp;
2918		tcp = (tcphdr_t *)dp;
2919		csump = (u_short *)&udp->uh_sum;
2920		flags = IPN_UDP;
2921	} else if (oip->ip_p == IPPROTO_ICMP)
2922		flags = IPN_ICMPQUERY;
2923	dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2924
2925	/*
2926	 * Need to adjust ICMP header to include the real IP#'s and
2927	 * port #'s.  Only apply a checksum change relative to the
2928	 * IP address change as it will be modified again in fr_checknatout
2929	 * for both address and port.  Two checksum changes are
2930	 * necessary for the two header address changes.  Be careful
2931	 * to only modify the checksum once for the port # and twice
2932	 * for the IP#.
2933	 */
2934
2935	/*
2936	 * Step 1
2937	 * Fix the IP addresses in the offending IP packet. You also need
2938	 * to adjust the IP header checksum of that offending IP packet.
2939	 *
2940	 * Normally, you would expect that the ICMP checksum of the
2941	 * ICMP error message needs to be adjusted as well for the
2942	 * IP address change in oip.
2943	 * However, this is a NOP, because the ICMP checksum is
2944	 * calculated over the complete ICMP packet, which includes the
2945	 * changed oip IP addresses and oip->ip_sum. However, these
2946	 * two changes cancel each other out (if the delta for
2947	 * the IP address is x, then the delta for ip_sum is minus x),
2948	 * so no change in the icmp_cksum is necessary.
2949	 *
2950	 * Inbound ICMP
2951	 * ------------
2952	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2953	 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2954	 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2955	 *
2956	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2957	 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2958	 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2959	 *
2960	 * Outbound ICMP
2961	 * -------------
2962	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2963	 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2964	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2965	 *
2966	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2967	 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2968	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2969	 *
2970	 */
2971	odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2972	if (odst == 1) {
2973		a1.s_addr = ntohl(nat->nat_inip.s_addr);
2974		a2.s_addr = ntohl(oip->ip_src.s_addr);
2975		oip->ip_src.s_addr = htonl(a1.s_addr);
2976	} else {
2977		a1.s_addr = ntohl(nat->nat_outip.s_addr);
2978		a2.s_addr = ntohl(oip->ip_dst.s_addr);
2979		oip->ip_dst.s_addr = htonl(a1.s_addr);
2980	}
2981
2982	sumd = a2.s_addr - a1.s_addr;
2983	if (sumd != 0) {
2984		if (a1.s_addr > a2.s_addr)
2985			sumd--;
2986		sumd = ~sumd;
2987
2988		fix_datacksum(&oip->ip_sum, sumd);
2989	}
2990
2991	sumd2 = sumd;
2992	sum1 = 0;
2993	sum2 = 0;
2994
2995	/*
2996	 * Fix UDP pseudo header checksum to compensate for the
2997	 * IP address change.
2998	 */
2999	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
3000		/*
3001		 * Step 2 :
3002		 * For offending TCP/UDP IP packets, translate the ports as
3003		 * well, based on the NAT specification. Of course such
3004		 * a change may be reflected in the ICMP checksum as well.
3005		 *
3006		 * Since the port fields are part of the TCP/UDP checksum
3007		 * of the offending IP packet, you need to adjust that checksum
3008		 * as well... except that the change in the port numbers should
3009		 * be offset by the checksum change.  However, the TCP/UDP
3010		 * checksum will also need to change if there has been an
3011		 * IP address change.
3012		 */
3013		if (odst == 1) {
3014			sum1 = ntohs(nat->nat_inport);
3015			sum2 = ntohs(tcp->th_sport);
3016
3017			tcp->th_sport = htons(sum1);
3018		} else {
3019			sum1 = ntohs(nat->nat_outport);
3020			sum2 = ntohs(tcp->th_dport);
3021
3022			tcp->th_dport = htons(sum1);
3023		}
3024
3025		sumd += sum1 - sum2;
3026		if (sumd != 0 || sumd2 != 0) {
3027			/*
3028			 * At this point, sumd is the delta to apply to the
3029			 * TCP/UDP header, given the changes in both the IP
3030			 * address and the ports and sumd2 is the delta to
3031			 * apply to the ICMP header, given the IP address
3032			 * change delta that may need to be applied to the
3033			 * TCP/UDP checksum instead.
3034			 *
3035			 * If we will both the IP and TCP/UDP checksums
3036			 * then the ICMP checksum changes by the address
3037			 * delta applied to the TCP/UDP checksum.  If we
3038			 * do not change the TCP/UDP checksum them we
3039			 * apply the delta in ports to the ICMP checksum.
3040			 */
3041			if (oip->ip_p == IPPROTO_UDP) {
3042				if ((dlen >= 8) && (*csump != 0)) {
3043					fix_datacksum(csump, sumd);
3044				} else {
3045					sumd2 = sum1 - sum2;
3046					if (sum2 > sum1)
3047						sumd2--;
3048				}
3049			} else if (oip->ip_p == IPPROTO_TCP) {
3050				if (dlen >= 18) {
3051					fix_datacksum(csump, sumd);
3052				} else {
3053					sumd2 = sum2 - sum1;
3054					if (sum1 > sum2)
3055						sumd2--;
3056				}
3057			}
3058
3059			if (sumd2 != 0) {
3060				ipnat_t *np;
3061
3062				np = nat->nat_ptr;
3063				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3064				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3065				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3066
3067				if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3068				    (fin->fin_rev == 0) && (np != NULL) &&
3069				    (np->in_redir & NAT_REDIRECT)) {
3070					fix_outcksum(fin, &icmp->icmp_cksum,
3071						     sumd2);
3072				} else {
3073					fix_incksum(fin, &icmp->icmp_cksum,
3074						    sumd2);
3075				}
3076			}
3077		}
3078	} else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3079		icmphdr_t *orgicmp;
3080
3081		/*
3082		 * XXX - what if this is bogus hl and we go off the end ?
3083		 * In this case, nat_icmperrorlookup() will have returned NULL.
3084		 */
3085		orgicmp = (icmphdr_t *)dp;
3086
3087		if (odst == 1) {
3088			if (orgicmp->icmp_id != nat->nat_inport) {
3089
3090				/*
3091				 * Fix ICMP checksum (of the offening ICMP
3092				 * query packet) to compensate the change
3093				 * in the ICMP id of the offending ICMP
3094				 * packet.
3095				 *
3096				 * Since you modify orgicmp->icmp_id with
3097				 * a delta (say x) and you compensate that
3098				 * in origicmp->icmp_cksum with a delta
3099				 * minus x, you don't have to adjust the
3100				 * overall icmp->icmp_cksum
3101				 */
3102				sum1 = ntohs(orgicmp->icmp_id);
3103				sum2 = ntohs(nat->nat_inport);
3104				CALC_SUMD(sum1, sum2, sumd);
3105				orgicmp->icmp_id = nat->nat_inport;
3106				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3107			}
3108		} /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3109	}
3110	return nat;
3111}
3112
3113
3114/*
3115 * NB: these lookups don't lock access to the list, it assumed that it has
3116 * already been done!
3117 */
3118
3119/* ------------------------------------------------------------------------ */
3120/* Function:    nat_inlookup                                                */
3121/* Returns:     nat_t* - NULL == no match,                                  */
3122/*                       else pointer to matching NAT entry                 */
3123/* Parameters:  fin(I)    - pointer to packet information                   */
3124/*              flags(I)  - NAT flags for this packet                       */
3125/*              p(I)      - protocol for this packet                        */
3126/*              src(I)    - source IP address                               */
3127/*              mapdst(I) - destination IP address                          */
3128/*                                                                          */
3129/* Lookup a nat entry based on the mapped destination ip address/port and   */
3130/* real source address/port.  We use this lookup when receiving a packet,   */
3131/* we're looking for a table entry, based on the destination address.       */
3132/*                                                                          */
3133/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3134/*                                                                          */
3135/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3136/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3137/*                                                                          */
3138/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3139/*            the packet is of said protocol                                */
3140/* ------------------------------------------------------------------------ */
3141nat_t *
3142nat_inlookup(fr_info_t *fin, u_int flags, u_int p, struct in_addr src,
3143	struct in_addr mapdst)
3144{
3145	u_short sport, dport;
3146	grehdr_t *gre;
3147	ipnat_t *ipn;
3148	u_int sflags;
3149	nat_t *nat;
3150	int nflags;
3151	u_32_t dst;
3152	void *ifp;
3153	u_int hv;
3154
3155	ifp = fin->fin_ifp;
3156	sport = 0;
3157	dport = 0;
3158	gre = NULL;
3159	dst = mapdst.s_addr;
3160	sflags = flags & NAT_TCPUDPICMP;
3161
3162	switch (p)
3163	{
3164	case IPPROTO_TCP :
3165	case IPPROTO_UDP :
3166		sport = htons(fin->fin_data[0]);
3167		dport = htons(fin->fin_data[1]);
3168		break;
3169	case IPPROTO_ICMP :
3170		if (flags & IPN_ICMPERR)
3171			sport = fin->fin_data[1];
3172		else
3173			dport = fin->fin_data[1];
3174		break;
3175	default :
3176		break;
3177	}
3178
3179
3180	if ((flags & SI_WILDP) != 0)
3181		goto find_in_wild_ports;
3182
3183	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3184	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3185	nat = nat_table[1][hv];
3186	for (; nat; nat = nat->nat_hnext[1]) {
3187		if (nat->nat_ifps[0] != NULL) {
3188			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3189				continue;
3190		}
3191
3192		nflags = nat->nat_flags;
3193
3194		if (nat->nat_oip.s_addr == src.s_addr &&
3195		    nat->nat_outip.s_addr == dst &&
3196		    (((p == 0) &&
3197		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3198		     || (p == nat->nat_p))) {
3199			switch (p)
3200			{
3201#if 0
3202			case IPPROTO_GRE :
3203				if (nat->nat_call[1] != fin->fin_data[0])
3204					continue;
3205				break;
3206#endif
3207			case IPPROTO_ICMP :
3208				if ((flags & IPN_ICMPERR) != 0) {
3209					if (nat->nat_outport != sport)
3210						continue;
3211				} else {
3212					if (nat->nat_outport != dport)
3213						continue;
3214				}
3215				break;
3216			case IPPROTO_TCP :
3217			case IPPROTO_UDP :
3218				if (nat->nat_oport != sport)
3219					continue;
3220				if (nat->nat_outport != dport)
3221					continue;
3222				break;
3223			default :
3224				break;
3225			}
3226
3227			ipn = nat->nat_ptr;
3228			if ((ipn != NULL) && (nat->nat_aps != NULL))
3229				if (appr_match(fin, nat) != 0)
3230					continue;
3231			if ((nat->nat_ifps[0] == NULL) && (ifp != NULL))
3232				nat->nat_ifps[0] = ifp;
3233			return nat;
3234		}
3235	}
3236
3237	/*
3238	 * So if we didn't find it but there are wildcard members in the hash
3239	 * table, go back and look for them.  We do this search and update here
3240	 * because it is modifying the NAT table and we want to do this only
3241	 * for the first packet that matches.  The exception, of course, is
3242	 * for "dummy" (FI_IGNORE) lookups.
3243	 */
3244find_in_wild_ports:
3245	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3246		return NULL;
3247	if (nat_stats.ns_wilds == 0)
3248		return NULL;
3249
3250	RWLOCK_EXIT(&ipf_nat);
3251
3252	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3253	hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3254
3255	WRITE_ENTER(&ipf_nat);
3256
3257	nat = nat_table[1][hv];
3258	for (; nat; nat = nat->nat_hnext[1]) {
3259		if (nat->nat_ifps[0] != NULL) {
3260			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3261				continue;
3262		}
3263
3264		if (nat->nat_p != fin->fin_p)
3265			continue;
3266		if (nat->nat_oip.s_addr != src.s_addr ||
3267		    nat->nat_outip.s_addr != dst)
3268			continue;
3269
3270		nflags = nat->nat_flags;
3271		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3272			continue;
3273
3274		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3275			       NAT_INBOUND) == 1) {
3276			if ((fin->fin_flx & FI_IGNORE) != 0)
3277				break;
3278			if ((nflags & SI_CLONE) != 0) {
3279				nat = fr_natclone(fin, nat);
3280				if (nat == NULL)
3281					break;
3282			} else {
3283				MUTEX_ENTER(&ipf_nat_new);
3284				nat_stats.ns_wilds--;
3285				MUTEX_EXIT(&ipf_nat_new);
3286			}
3287			if ((nat->nat_ifps[0] == NULL) && (ifp != NULL))
3288				nat->nat_ifps[0] = ifp;
3289			nat->nat_oport = sport;
3290			nat->nat_outport = dport;
3291			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3292			nat_tabmove(nat);
3293			break;
3294		}
3295	}
3296
3297	MUTEX_DOWNGRADE(&ipf_nat);
3298
3299	return nat;
3300}
3301
3302
3303/* ------------------------------------------------------------------------ */
3304/* Function:    nat_tabmove                                                 */
3305/* Returns:     Nil                                                         */
3306/* Parameters:  nat(I) - pointer to NAT structure                           */
3307/* Write Lock:  ipf_nat                                                     */
3308/*                                                                          */
3309/* This function is only called for TCP/UDP NAT table entries where the     */
3310/* original was placed in the table without hashing on the ports and we now */
3311/* want to include hashing on port numbers.                                 */
3312/* ------------------------------------------------------------------------ */
3313static void
3314nat_tabmove(nat_t *nat)
3315{
3316	nat_t **natp;
3317	u_int hv;
3318
3319	if (nat->nat_flags & SI_CLONE)
3320		return;
3321
3322	/*
3323	 * Remove the NAT entry from the old location
3324	 */
3325	if (nat->nat_hnext[0])
3326		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3327	*nat->nat_phnext[0] = nat->nat_hnext[0];
3328	nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3329
3330	if (nat->nat_hnext[1])
3331		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3332	*nat->nat_phnext[1] = nat->nat_hnext[1];
3333	nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3334
3335	/*
3336	 * Add into the NAT table in the new position
3337	 */
3338	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3339	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3340			 ipf_nattable_sz);
3341	nat->nat_hv[0] = hv;
3342	natp = &nat_table[0][hv];
3343	if (*natp)
3344		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3345	nat->nat_phnext[0] = natp;
3346	nat->nat_hnext[0] = *natp;
3347	*natp = nat;
3348	nat_stats.ns_bucketlen[0][hv]++;
3349
3350	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3351	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3352			 ipf_nattable_sz);
3353	nat->nat_hv[1] = hv;
3354	natp = &nat_table[1][hv];
3355	if (*natp)
3356		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3357	nat->nat_phnext[1] = natp;
3358	nat->nat_hnext[1] = *natp;
3359	*natp = nat;
3360	nat_stats.ns_bucketlen[1][hv]++;
3361}
3362
3363
3364/* ------------------------------------------------------------------------ */
3365/* Function:    nat_outlookup                                               */
3366/* Returns:     nat_t* - NULL == no match,                                  */
3367/*                       else pointer to matching NAT entry                 */
3368/* Parameters:  fin(I)   - pointer to packet information                    */
3369/*              flags(I) - NAT flags for this packet                        */
3370/*              p(I)     - protocol for this packet                         */
3371/*              src(I)   - source IP address                                */
3372/*              dst(I)   - destination IP address                           */
3373/*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3374/*                                                                          */
3375/* Lookup a nat entry based on the source 'real' ip address/port and        */
3376/* destination address/port.  We use this lookup when sending a packet out, */
3377/* we're looking for a table entry, based on the source address.            */
3378/*                                                                          */
3379/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3380/*                                                                          */
3381/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3382/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3383/*                                                                          */
3384/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3385/*            the packet is of said protocol                                */
3386/* ------------------------------------------------------------------------ */
3387nat_t *
3388nat_outlookup(fr_info_t *fin, u_int flags, u_int p, struct in_addr src,
3389	struct in_addr dst)
3390{
3391	u_short sport, dport;
3392	u_int sflags;
3393	ipnat_t *ipn;
3394	u_32_t srcip;
3395	nat_t *nat;
3396	int nflags;
3397	void *ifp;
3398	u_int hv;
3399
3400	ifp = fin->fin_ifp;
3401	srcip = src.s_addr;
3402	sflags = flags & IPN_TCPUDPICMP;
3403	sport = 0;
3404	dport = 0;
3405
3406	switch (p)
3407	{
3408	case IPPROTO_TCP :
3409	case IPPROTO_UDP :
3410		sport = htons(fin->fin_data[0]);
3411		dport = htons(fin->fin_data[1]);
3412		break;
3413	case IPPROTO_ICMP :
3414		if (flags & IPN_ICMPERR)
3415			sport = fin->fin_data[1];
3416		else
3417			dport = fin->fin_data[1];
3418		break;
3419	default :
3420		break;
3421	}
3422
3423	if ((flags & SI_WILDP) != 0)
3424		goto find_out_wild_ports;
3425
3426	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3427	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3428	nat = nat_table[0][hv];
3429	for (; nat; nat = nat->nat_hnext[0]) {
3430		if (nat->nat_ifps[1] != NULL) {
3431			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3432				continue;
3433		}
3434
3435		nflags = nat->nat_flags;
3436
3437		if (nat->nat_inip.s_addr == srcip &&
3438		    nat->nat_oip.s_addr == dst.s_addr &&
3439		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3440		     || (p == nat->nat_p))) {
3441			switch (p)
3442			{
3443#if 0
3444			case IPPROTO_GRE :
3445				if (nat->nat_call[1] != fin->fin_data[0])
3446					continue;
3447				break;
3448#endif
3449			case IPPROTO_TCP :
3450			case IPPROTO_UDP :
3451				if (nat->nat_oport != dport)
3452					continue;
3453				if (nat->nat_inport != sport)
3454					continue;
3455				break;
3456			default :
3457				break;
3458			}
3459
3460			ipn = nat->nat_ptr;
3461			if ((ipn != NULL) && (nat->nat_aps != NULL))
3462				if (appr_match(fin, nat) != 0)
3463					continue;
3464			if ((nat->nat_ifps[1] == NULL) && (ifp != NULL))
3465				nat->nat_ifps[1] = ifp;
3466			return nat;
3467		}
3468	}
3469
3470	/*
3471	 * So if we didn't find it but there are wildcard members in the hash
3472	 * table, go back and look for them.  We do this search and update here
3473	 * because it is modifying the NAT table and we want to do this only
3474	 * for the first packet that matches.  The exception, of course, is
3475	 * for "dummy" (FI_IGNORE) lookups.
3476	 */
3477find_out_wild_ports:
3478	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3479		return NULL;
3480	if (nat_stats.ns_wilds == 0)
3481		return NULL;
3482
3483	RWLOCK_EXIT(&ipf_nat);
3484
3485	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3486	hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3487
3488	WRITE_ENTER(&ipf_nat);
3489
3490	nat = nat_table[0][hv];
3491	for (; nat; nat = nat->nat_hnext[0]) {
3492		if (nat->nat_ifps[1] != NULL) {
3493			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3494				continue;
3495		}
3496
3497		if (nat->nat_p != fin->fin_p)
3498			continue;
3499		if ((nat->nat_inip.s_addr != srcip) ||
3500		    (nat->nat_oip.s_addr != dst.s_addr))
3501			continue;
3502
3503		nflags = nat->nat_flags;
3504		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3505			continue;
3506
3507		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3508			       NAT_OUTBOUND) == 1) {
3509			if ((fin->fin_flx & FI_IGNORE) != 0)
3510				break;
3511			if ((nflags & SI_CLONE) != 0) {
3512				nat = fr_natclone(fin, nat);
3513				if (nat == NULL)
3514					break;
3515			} else {
3516				MUTEX_ENTER(&ipf_nat_new);
3517				nat_stats.ns_wilds--;
3518				MUTEX_EXIT(&ipf_nat_new);
3519			}
3520			if ((nat->nat_ifps[1] == NULL) && (ifp != NULL))
3521				nat->nat_ifps[1] = ifp;
3522			nat->nat_inport = sport;
3523			nat->nat_oport = dport;
3524			if (nat->nat_outport == 0)
3525				nat->nat_outport = sport;
3526			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3527			nat_tabmove(nat);
3528			break;
3529		}
3530	}
3531
3532	MUTEX_DOWNGRADE(&ipf_nat);
3533
3534	return nat;
3535}
3536
3537
3538/* ------------------------------------------------------------------------ */
3539/* Function:    nat_lookupredir                                             */
3540/* Returns:     nat_t* - NULL == no match,                                  */
3541/*                       else pointer to matching NAT entry                 */
3542/* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3543/*                      entry for.                                          */
3544/*                                                                          */
3545/* Lookup the NAT tables to search for a matching redirect                  */
3546/* The contents of natlookup_t should imitate those found in a packet that  */
3547/* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3548/* We can do the lookup in one of two ways, imitating an inbound or         */
3549/* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3550/* For IN, the fields are set as follows:                                   */
3551/*     nl_real* = source information                                        */
3552/*     nl_out* = destination information (translated)                       */
3553/* For an out packet, the fields are set like this:                         */
3554/*     nl_in* = source information (untranslated)                           */
3555/*     nl_out* = destination information (translated)                       */
3556/* ------------------------------------------------------------------------ */
3557nat_t *
3558nat_lookupredir(natlookup_t *np)
3559{
3560	fr_info_t fi;
3561	nat_t *nat;
3562
3563	bzero((char *)&fi, sizeof(fi));
3564	if (np->nl_flags & IPN_IN) {
3565		fi.fin_data[0] = ntohs(np->nl_realport);
3566		fi.fin_data[1] = ntohs(np->nl_outport);
3567	} else {
3568		fi.fin_data[0] = ntohs(np->nl_inport);
3569		fi.fin_data[1] = ntohs(np->nl_outport);
3570	}
3571	if (np->nl_flags & IPN_TCP)
3572		fi.fin_p = IPPROTO_TCP;
3573	else if (np->nl_flags & IPN_UDP)
3574		fi.fin_p = IPPROTO_UDP;
3575	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3576		fi.fin_p = IPPROTO_ICMP;
3577
3578	/*
3579	 * We can do two sorts of lookups:
3580	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3581	 * - default: we have the `in' and `out' address, look for `real'.
3582	 */
3583	if (np->nl_flags & IPN_IN) {
3584		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3585					np->nl_realip, np->nl_outip))) {
3586			np->nl_inip = nat->nat_inip;
3587			np->nl_inport = nat->nat_inport;
3588		}
3589	} else {
3590		/*
3591		 * If nl_inip is non null, this is a lookup based on the real
3592		 * ip address. Else, we use the fake.
3593		 */
3594		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3595					 np->nl_inip, np->nl_outip))) {
3596
3597			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3598				fr_info_t fin;
3599				bzero((char *)&fin, sizeof(fin));
3600				fin.fin_p = nat->nat_p;
3601				fin.fin_data[0] = ntohs(nat->nat_outport);
3602				fin.fin_data[1] = ntohs(nat->nat_oport);
3603				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3604						 nat->nat_outip,
3605						 nat->nat_oip) != NULL) {
3606					np->nl_flags &= ~IPN_FINDFORWARD;
3607				}
3608			}
3609
3610			np->nl_realip = nat->nat_outip;
3611			np->nl_realport = nat->nat_outport;
3612		}
3613 	}
3614
3615	return nat;
3616}
3617
3618
3619/* ------------------------------------------------------------------------ */
3620/* Function:    nat_match                                                   */
3621/* Returns:     int - 0 == no match, 1 == match                             */
3622/* Parameters:  fin(I)   - pointer to packet information                    */
3623/*              np(I)    - pointer to NAT rule                              */
3624/*                                                                          */
3625/* Pull the matching of a packet against a NAT rule out of that complex     */
3626/* loop inside fr_checknatin() and lay it out properly in its own function. */
3627/* ------------------------------------------------------------------------ */
3628static int
3629nat_match(fr_info_t *fin, ipnat_t *np)
3630{
3631	frtuc_t *ft;
3632
3633	if (fin->fin_v != 4)
3634		return 0;
3635
3636	if (np->in_p && fin->fin_p != np->in_p)
3637		return 0;
3638
3639	if (fin->fin_out) {
3640		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3641			return 0;
3642		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3643		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3644			return 0;
3645		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3646		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3647			return 0;
3648	} else {
3649		if (!(np->in_redir & NAT_REDIRECT))
3650			return 0;
3651		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3652		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3653			return 0;
3654		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3655		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3656			return 0;
3657	}
3658
3659	ft = &np->in_tuc;
3660	if (!(fin->fin_flx & FI_TCPUDP) ||
3661	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3662		if (ft->ftu_scmp || ft->ftu_dcmp)
3663			return 0;
3664		return 1;
3665	}
3666
3667	return fr_tcpudpchk(fin, ft);
3668}
3669
3670
3671/* ------------------------------------------------------------------------ */
3672/* Function:    nat_update                                                  */
3673/* Returns:     Nil                                                         */
3674/* Parameters:  nat(I)    - pointer to NAT structure                        */
3675/*              np(I)     - pointer to NAT rule                             */
3676/* Locks:       nat_lock                                                    */
3677/*                                                                          */
3678/* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3679/* called with fin_rev updated - i.e. after calling nat_proto().            */
3680/*                                                                          */
3681/* This must be called *after* nat_proto() because we need fin_rev set.     */
3682/* ------------------------------------------------------------------------ */
3683void
3684nat_update(fr_info_t *fin, nat_t *nat)
3685{
3686	ipftq_t *ifq, *ifq2;
3687	ipftqent_t *tqe;
3688	ipnat_t *np = nat->nat_ptr;
3689
3690	tqe = &nat->nat_tqe;
3691	ifq = tqe->tqe_ifq;
3692
3693	/*
3694	 * We allow over-riding of NAT timeouts from NAT rules, even for
3695	 * TCP, however, if it is TCP and there is no rule timeout set,
3696	 * then do not update the timeout here.
3697	 */
3698	if (np != NULL)
3699		ifq2 = np->in_tqehead[fin->fin_rev];
3700	else
3701		ifq2 = NULL;
3702
3703	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3704		u_32_t end, ack;
3705		u_char tcpflags;
3706		tcphdr_t *tcp;
3707		int dsize;
3708
3709		tcp = fin->fin_dp;
3710		tcpflags = tcp->th_flags;
3711		dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3712			((tcpflags & TH_SYN) ? 1 : 0) +
3713			((tcpflags & TH_FIN) ? 1 : 0);
3714
3715		ack = ntohl(tcp->th_ack);
3716		end = ntohl(tcp->th_seq) + dsize;
3717
3718		if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3719			nat->nat_seqnext[1 - fin->fin_rev] = ack;
3720
3721		if (nat->nat_seqnext[fin->fin_rev] == 0)
3722			nat->nat_seqnext[fin->fin_rev] = end;
3723
3724		(void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3725	} else {
3726		if (ifq2 == NULL) {
3727			if (nat->nat_p == IPPROTO_UDP)
3728				ifq2 = &nat_udptq;
3729			else if (nat->nat_p == IPPROTO_ICMP)
3730				ifq2 = &nat_icmptq;
3731			else
3732				ifq2 = &nat_iptq;
3733		}
3734
3735		fr_movequeue(tqe, ifq, ifq2);
3736	}
3737}
3738
3739
3740/* ------------------------------------------------------------------------ */
3741/* Function:    fr_checknatout                                              */
3742/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3743/*                     0 == no packet translation occurred,                 */
3744/*                     1 == packet was successfully translated.             */
3745/* Parameters:  fin(I)   - pointer to packet information                    */
3746/*              passp(I) - pointer to filtering result flags                */
3747/*                                                                          */
3748/* Check to see if an outcoming packet should be changed.  ICMP packets are */
3749/* first checked to see if they match an existing entry (if an error),      */
3750/* otherwise a search of the current NAT table is made.  If neither results */
3751/* in a match then a search for a matching NAT rule is made.  Create a new  */
3752/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3753/* packet header(s) as required.                                            */
3754/* ------------------------------------------------------------------------ */
3755int
3756fr_checknatout(fr_info_t *fin, u_32_t *passp)
3757{
3758	ipnat_t *np = NULL, *npnext;
3759	struct ifnet *ifp, *sifp;
3760	icmphdr_t *icmp = NULL;
3761	tcphdr_t *tcp = NULL;
3762	int rval, natfailed;
3763	u_int nflags = 0;
3764	u_32_t ipa, iph;
3765	int natadd = 1;
3766	frentry_t *fr;
3767	nat_t *nat;
3768
3769	if (fr_nat_lock != 0)
3770		return 0;
3771	if (nat_stats.ns_rules == 0 && nat_instances == NULL)
3772		return 0;
3773
3774	natfailed = 0;
3775	fr = fin->fin_fr;
3776	sifp = fin->fin_ifp;
3777	if (fr != NULL) {
3778		ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3779		if ((ifp != NULL) && (ifp != (void *)-1))
3780			fin->fin_ifp = ifp;
3781	}
3782	ifp = fin->fin_ifp;
3783
3784	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3785		switch (fin->fin_p)
3786		{
3787		case IPPROTO_TCP :
3788			nflags = IPN_TCP;
3789			break;
3790		case IPPROTO_UDP :
3791			nflags = IPN_UDP;
3792			break;
3793		case IPPROTO_ICMP :
3794			icmp = fin->fin_dp;
3795
3796			/*
3797			 * This is an incoming packet, so the destination is
3798			 * the icmp_id and the source port equals 0
3799			 */
3800			if (nat_icmpquerytype4(icmp->icmp_type))
3801				nflags = IPN_ICMPQUERY;
3802			break;
3803		default :
3804			break;
3805		}
3806
3807		if ((nflags & IPN_TCPUDP))
3808			tcp = fin->fin_dp;
3809	}
3810
3811	ipa = fin->fin_saddr;
3812
3813	READ_ENTER(&ipf_nat);
3814
3815	if (((fin->fin_flx & FI_ICMPERR) != 0) &&
3816	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3817		/*EMPTY*/;
3818	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3819		natadd = 0;
3820	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3821				      fin->fin_src, fin->fin_dst))) {
3822		nflags = nat->nat_flags;
3823	} else if (fin->fin_off == 0) {
3824		u_32_t hv, msk, nmsk;
3825
3826		msk = 0xffffffff;
3827		nmsk = nat_masks;
3828		/*
3829		 * If there is no current entry in the nat table for this IP#,
3830		 * create one for it (if there is a matching rule).
3831		 */
3832maskloop:
3833		iph = ipa & htonl(msk);
3834		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3835		for (np = nat_rules[hv]; np; np = npnext) {
3836			npnext = np->in_mnext;
3837			if (np->in_ifps[1] && (np->in_ifps[1] != ifp))
3838				continue;
3839			if (np->in_v != fin->fin_v)
3840				continue;
3841			if (np->in_p && (np->in_p != fin->fin_p))
3842				continue;
3843			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3844				continue;
3845			if (np->in_flags & IPN_FILTER) {
3846				if (!nat_match(fin, np))
3847					continue;
3848			} else if ((ipa & np->in_inmsk) != np->in_inip)
3849				continue;
3850
3851			if ((fr != NULL) &&
3852			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3853				continue;
3854
3855			if (*np->in_plabel != '\0') {
3856				if (((np->in_flags & IPN_FILTER) == 0) &&
3857				    (np->in_dport != tcp->th_dport))
3858					continue;
3859				if (appr_ok(fin, tcp, np) == 0)
3860					continue;
3861			}
3862
3863			MUTEX_ENTER(&ipf_nat_new);
3864			nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3865			MUTEX_EXIT(&ipf_nat_new);
3866			if (nat != NULL) {
3867				natfailed = 0;
3868				break;
3869			}
3870			natfailed = -1;
3871		}
3872		if ((np == NULL) && (nmsk != 0)) {
3873			while (nmsk) {
3874				msk <<= 1;
3875				if (nmsk & 0x80000000)
3876					break;
3877				nmsk <<= 1;
3878			}
3879			if (nmsk != 0) {
3880				nmsk <<= 1;
3881				goto maskloop;
3882			}
3883		}
3884	}
3885
3886	if (nat != NULL) {
3887		rval = fr_natout(fin, nat, natadd, nflags);
3888		if (rval == 1) {
3889			MUTEX_ENTER(&nat->nat_lock);
3890			nat_update(fin, nat);
3891			nat->nat_bytes[1] += fin->fin_plen;
3892			nat->nat_pkts[1]++;
3893			fin->fin_pktnum = nat->nat_pkts[1];
3894			MUTEX_EXIT(&nat->nat_lock);
3895		}
3896	} else
3897		rval = natfailed;
3898	RWLOCK_EXIT(&ipf_nat);
3899
3900	if (rval == -1) {
3901		if (passp != NULL)
3902			*passp = FR_BLOCK;
3903		fin->fin_flx |= FI_BADNAT;
3904	}
3905	fin->fin_ifp = sifp;
3906	return rval;
3907}
3908
3909/* ------------------------------------------------------------------------ */
3910/* Function:    fr_natout                                                   */
3911/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3912/*                     1 == packet was successfully translated.             */
3913/* Parameters:  fin(I)    - pointer to packet information                   */
3914/*              nat(I)    - pointer to NAT structure                        */
3915/*              natadd(I) - flag indicating if it is safe to add frag cache */
3916/*              nflags(I) - NAT flags set for this packet                   */
3917/*                                                                          */
3918/* Translate a packet coming "out" on an interface.                         */
3919/* ------------------------------------------------------------------------ */
3920int
3921fr_natout(fr_info_t *fin, nat_t *nat, int natadd, u_32_t nflags)
3922{
3923	icmphdr_t *icmp;
3924	tcphdr_t *tcp;
3925	ipnat_t *np;
3926	int i;
3927
3928	tcp = NULL;
3929	icmp = NULL;
3930	np = nat->nat_ptr;
3931
3932	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3933		(void) fr_nat_newfrag(fin, 0, nat);
3934
3935	/*
3936	 * Fix up checksums, not by recalculating them, but
3937	 * simply computing adjustments.
3938	 * This is only done for STREAMS based IP implementations where the
3939	 * checksum has already been calculated by IP.  In all other cases,
3940	 * IPFilter is called before the checksum needs calculating so there
3941	 * is no call to modify whatever is in the header now.
3942	 */
3943	if (fin->fin_v == 4) {
3944		if (nflags == IPN_ICMPERR) {
3945			u_32_t s1, s2, sumd;
3946
3947			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3948			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3949			CALC_SUMD(s1, s2, sumd);
3950			fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3951		}
3952#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3953    defined(linux) || defined(BRIDGE_IPF)
3954		else {
3955			/*
3956			 * Strictly speaking, this isn't necessary on BSD
3957			 * kernels because they do checksum calculation after
3958			 * this code has run BUT if ipfilter is being used
3959			 * to do NAT as a bridge, that code doesn't exist.
3960			 */
3961			if (nat->nat_dir == NAT_OUTBOUND)
3962				fix_outcksum(fin, &fin->fin_ip->ip_sum,
3963					     nat->nat_ipsumd);
3964			else
3965				fix_incksum(fin, &fin->fin_ip->ip_sum,
3966					    nat->nat_ipsumd);
3967		}
3968#endif
3969	}
3970
3971	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3972		u_short *csump;
3973
3974		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3975			tcp = fin->fin_dp;
3976
3977			tcp->th_sport = nat->nat_outport;
3978			fin->fin_data[0] = ntohs(nat->nat_outport);
3979		}
3980
3981		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3982			icmp = fin->fin_dp;
3983			icmp->icmp_id = nat->nat_outport;
3984		}
3985
3986		csump = nat_proto(fin, nat, nflags);
3987
3988		/*
3989		 * The above comments do not hold for layer 4 (or higher)
3990		 * checksums...
3991		 */
3992		if (csump != NULL) {
3993			if (nat->nat_dir == NAT_OUTBOUND)
3994				fix_outcksum(fin, csump, nat->nat_sumd[1]);
3995			else
3996				fix_incksum(fin, csump, nat->nat_sumd[1]);
3997		}
3998	}
3999
4000	fin->fin_ip->ip_src = nat->nat_outip;
4001#ifdef	IPFILTER_SYNC
4002	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4003#endif
4004	/* ------------------------------------------------------------- */
4005	/* A few quick notes:						 */
4006	/*	Following are test conditions prior to calling the 	 */
4007	/*	appr_check routine.					 */
4008	/*								 */
4009	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4010	/*	with a redirect rule, we attempt to match the packet's	 */
4011	/*	source port against in_dport, otherwise	we'd compare the */
4012	/*	packet's destination.			 		 */
4013	/* ------------------------------------------------------------- */
4014	if ((np != NULL) && (np->in_apr != NULL)) {
4015		i = appr_check(fin, nat);
4016		if (i == 0)
4017			i = 1;
4018	} else
4019		i = 1;
4020	ATOMIC_INCL(nat_stats.ns_mapped[1]);
4021	fin->fin_flx |= FI_NATED;
4022	return i;
4023}
4024
4025
4026/* ------------------------------------------------------------------------ */
4027/* Function:    fr_checknatin                                               */
4028/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4029/*                     0 == no packet translation occurred,                 */
4030/*                     1 == packet was successfully translated.             */
4031/* Parameters:  fin(I)   - pointer to packet information                    */
4032/*              passp(I) - pointer to filtering result flags                */
4033/*                                                                          */
4034/* Check to see if an incoming packet should be changed.  ICMP packets are  */
4035/* first checked to see if they match an existing entry (if an error),      */
4036/* otherwise a search of the current NAT table is made.  If neither results */
4037/* in a match then a search for a matching NAT rule is made.  Create a new  */
4038/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4039/* packet header(s) as required.                                            */
4040/* ------------------------------------------------------------------------ */
4041int
4042fr_checknatin(fr_info_t *fin, u_32_t *passp)
4043{
4044	ipnat_t *np, *npnext;
4045	u_int nflags, natadd;
4046	int rval, natfailed;
4047	struct ifnet *ifp;
4048	struct in_addr in;
4049	icmphdr_t *icmp;
4050	tcphdr_t *tcp;
4051	u_short dport;
4052	nat_t *nat;
4053	u_32_t iph;
4054
4055	if (fr_nat_lock != 0)
4056		return 0;
4057	if (nat_stats.ns_rules == 0 && nat_instances == NULL)
4058		return 0;
4059
4060	tcp = NULL;
4061	icmp = NULL;
4062	dport = 0;
4063	natadd = 1;
4064	nflags = 0;
4065	natfailed = 0;
4066	ifp = fin->fin_ifp;
4067
4068	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4069		switch (fin->fin_p)
4070		{
4071		case IPPROTO_TCP :
4072			nflags = IPN_TCP;
4073			break;
4074		case IPPROTO_UDP :
4075			nflags = IPN_UDP;
4076			break;
4077		case IPPROTO_ICMP :
4078			icmp = fin->fin_dp;
4079
4080			/*
4081			 * This is an incoming packet, so the destination is
4082			 * the icmp_id and the source port equals 0
4083			 */
4084			if (nat_icmpquerytype4(icmp->icmp_type)) {
4085				nflags = IPN_ICMPQUERY;
4086				dport = icmp->icmp_id;
4087			} break;
4088		default :
4089			break;
4090		}
4091
4092		if ((nflags & IPN_TCPUDP)) {
4093			tcp = fin->fin_dp;
4094			dport = tcp->th_dport;
4095		}
4096	}
4097
4098	in = fin->fin_dst;
4099
4100	READ_ENTER(&ipf_nat);
4101
4102	if (((fin->fin_flx & FI_ICMPERR) != 0) &&
4103	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4104		/*EMPTY*/;
4105	else if ((fin->fin_flx & FI_FRAG) &&
4106		 (nat = fr_nat_knownfrag(fin)))
4107		natadd = 0;
4108	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4109				     fin->fin_src, in))) {
4110		nflags = nat->nat_flags;
4111	} else if (fin->fin_off == 0) {
4112		u_32_t hv, msk, rmsk;
4113
4114		msk = 0xffffffff;
4115		rmsk = rdr_masks;
4116		/*
4117		 * If there is no current entry in the nat table for this IP#,
4118		 * create one for it (if there is a matching rule).
4119		 */
4120maskloop:
4121		iph = in.s_addr & htonl(msk);
4122		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4123		for (np = rdr_rules[hv]; np; np = npnext) {
4124			npnext = np->in_rnext;
4125			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4126				continue;
4127			if (np->in_v != fin->fin_v)
4128				continue;
4129			if (np->in_p && (np->in_p != fin->fin_p))
4130				continue;
4131			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4132				continue;
4133			if (np->in_flags & IPN_FILTER) {
4134				if (!nat_match(fin, np))
4135					continue;
4136			} else {
4137				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4138					continue;
4139				if (np->in_pmin &&
4140				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4141				     (ntohs(dport) < ntohs(np->in_pmin))))
4142					continue;
4143			}
4144
4145			if (*np->in_plabel != '\0') {
4146				if (!appr_ok(fin, tcp, np)) {
4147					continue;
4148				}
4149			}
4150
4151			/*
4152			 * If we've matched a round-robin rule but it has
4153			 * moved in the list since we got it, start over as
4154			 * this is now no longer correct.
4155			 */
4156			MUTEX_ENTER(&ipf_nat_new);
4157			if ((npnext != np->in_rnext) &&
4158			    (np->in_flags & IPN_ROUNDR)) {
4159				MUTEX_EXIT(&ipf_nat_new);
4160				goto maskloop;
4161			}
4162			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4163			MUTEX_EXIT(&ipf_nat_new);
4164			if (nat != NULL) {
4165				natfailed = 0;
4166				break;
4167			}
4168			natfailed = -1;
4169		}
4170
4171		if ((np == NULL) && (rmsk != 0)) {
4172			while (rmsk) {
4173				msk <<= 1;
4174				if (rmsk & 0x80000000)
4175					break;
4176				rmsk <<= 1;
4177			}
4178			if (rmsk != 0) {
4179				rmsk <<= 1;
4180				goto maskloop;
4181			}
4182		}
4183	}
4184
4185	if (nat != NULL) {
4186		rval = fr_natin(fin, nat, natadd, nflags);
4187		if (rval == 1) {
4188			MUTEX_ENTER(&nat->nat_lock);
4189			nat_update(fin, nat);
4190			nat->nat_bytes[0] += fin->fin_plen;
4191			nat->nat_pkts[0]++;
4192			fin->fin_pktnum = nat->nat_pkts[0];
4193			MUTEX_EXIT(&nat->nat_lock);
4194		}
4195	} else
4196		rval = natfailed;
4197	RWLOCK_EXIT(&ipf_nat);
4198
4199	if (rval == -1) {
4200		if (passp != NULL)
4201			*passp = FR_BLOCK;
4202		fin->fin_flx |= FI_BADNAT;
4203	}
4204	return rval;
4205}
4206
4207
4208/* ------------------------------------------------------------------------ */
4209/* Function:    fr_natin                                                    */
4210/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4211/*                     1 == packet was successfully translated.             */
4212/* Parameters:  fin(I)    - pointer to packet information                   */
4213/*              nat(I)    - pointer to NAT structure                        */
4214/*              natadd(I) - flag indicating if it is safe to add frag cache */
4215/*              nflags(I) - NAT flags set for this packet                   */
4216/* Locks Held:  ipf_nat (READ)                                              */
4217/*                                                                          */
4218/* Translate a packet coming "in" on an interface.                          */
4219/* ------------------------------------------------------------------------ */
4220int
4221fr_natin(fr_info_t *fin, nat_t *nat, int natadd, u_32_t nflags)
4222{
4223	icmphdr_t *icmp;
4224	tcphdr_t *tcp;
4225	ipnat_t *np;
4226	int i;
4227
4228	tcp = NULL;
4229	np = nat->nat_ptr;
4230	fin->fin_fr = nat->nat_fr;
4231
4232	if (np != NULL) {
4233		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4234			(void) fr_nat_newfrag(fin, 0, nat);
4235
4236	/* ------------------------------------------------------------- */
4237	/* A few quick notes:						 */
4238	/*	Following are test conditions prior to calling the 	 */
4239	/*	appr_check routine.					 */
4240	/*								 */
4241	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4242	/*	with a map rule, we attempt to match the packet's	 */
4243	/*	source port against in_dport, otherwise	we'd compare the */
4244	/*	packet's destination.			 		 */
4245	/* ------------------------------------------------------------- */
4246		if (np->in_apr != NULL) {
4247			i = appr_check(fin, nat);
4248			if (i == -1) {
4249				return -1;
4250			}
4251		}
4252	}
4253
4254#ifdef	IPFILTER_SYNC
4255	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4256#endif
4257
4258	fin->fin_ip->ip_dst = nat->nat_inip;
4259	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4260	if (nflags & IPN_TCPUDP)
4261		tcp = fin->fin_dp;
4262
4263	/*
4264	 * Fix up checksums, not by recalculating them, but
4265	 * simply computing adjustments.
4266	 * Why only do this for some platforms on inbound packets ?
4267	 * Because for those that it is done, IP processing is yet to happen
4268	 * and so the IPv4 header checksum has not yet been evaluated.
4269	 * Perhaps it should always be done for the benefit of things like
4270	 * fast forwarding (so that it doesn't need to be recomputed) but with
4271	 * header checksum offloading, perhaps it is a moot point.
4272	 */
4273#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4274     defined(__osf__) || defined(linux)
4275	if (nat->nat_dir == NAT_OUTBOUND)
4276		fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4277	else
4278		fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4279#endif
4280
4281	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4282		u_short *csump;
4283
4284		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4285			tcp->th_dport = nat->nat_inport;
4286			fin->fin_data[1] = ntohs(nat->nat_inport);
4287		}
4288
4289
4290		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4291			icmp = fin->fin_dp;
4292
4293			icmp->icmp_id = nat->nat_inport;
4294		}
4295
4296		csump = nat_proto(fin, nat, nflags);
4297
4298		/*
4299		 * The above comments do not hold for layer 4 (or higher)
4300		 * checksums...
4301		 */
4302		if (csump != NULL) {
4303			if (nat->nat_dir == NAT_OUTBOUND)
4304				fix_incksum(fin, csump, nat->nat_sumd[0]);
4305			else
4306				fix_outcksum(fin, csump, nat->nat_sumd[0]);
4307		}
4308	}
4309	ATOMIC_INCL(nat_stats.ns_mapped[0]);
4310	fin->fin_flx |= FI_NATED;
4311	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4312		fin->fin_nattag = &np->in_tag;
4313	return 1;
4314}
4315
4316
4317/* ------------------------------------------------------------------------ */
4318/* Function:    nat_proto                                                   */
4319/* Returns:     u_short* - pointer to transport header checksum to update,  */
4320/*                         NULL if the transport protocol is not recognised */
4321/*                         as needing a checksum update.                    */
4322/* Parameters:  fin(I)    - pointer to packet information                   */
4323/*              nat(I)    - pointer to NAT structure                        */
4324/*              nflags(I) - NAT flags set for this packet                   */
4325/*                                                                          */
4326/* Return the pointer to the checksum field for each protocol so understood.*/
4327/* If support for making other changes to a protocol header is required,    */
4328/* that is not strictly 'address' translation, such as clamping the MSS in  */
4329/* TCP down to a specific value, then do it from here.                      */
4330/* ------------------------------------------------------------------------ */
4331u_short *
4332nat_proto(fr_info_t *fin, nat_t *nat, u_int nflags)
4333{
4334	icmphdr_t *icmp;
4335	u_short *csump;
4336	tcphdr_t *tcp;
4337	udphdr_t *udp;
4338
4339	csump = NULL;
4340	if (fin->fin_out == 0) {
4341		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4342	} else {
4343		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4344	}
4345
4346	switch (fin->fin_p)
4347	{
4348	case IPPROTO_TCP :
4349		tcp = fin->fin_dp;
4350
4351		csump = &tcp->th_sum;
4352
4353		/*
4354		 * Do a MSS CLAMPING on a SYN packet,
4355		 * only deal IPv4 for now.
4356		 */
4357		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4358			nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4359
4360		break;
4361
4362	case IPPROTO_UDP :
4363		udp = fin->fin_dp;
4364
4365		if (udp->uh_sum)
4366			csump = &udp->uh_sum;
4367		break;
4368
4369	case IPPROTO_ICMP :
4370		icmp = fin->fin_dp;
4371
4372		if ((nflags & IPN_ICMPQUERY) != 0) {
4373			if (icmp->icmp_cksum != 0)
4374				csump = &icmp->icmp_cksum;
4375		}
4376		break;
4377	}
4378	return csump;
4379}
4380
4381
4382/* ------------------------------------------------------------------------ */
4383/* Function:    fr_natunload                                                */
4384/* Returns:     Nil                                                         */
4385/* Parameters:  Nil                                                         */
4386/*                                                                          */
4387/* Free all memory used by NAT structures allocated at runtime.             */
4388/* ------------------------------------------------------------------------ */
4389void
4390fr_natunload(void)
4391{
4392	ipftq_t *ifq, *ifqnext;
4393
4394	(void) nat_clearlist();
4395	(void) nat_flushtable();
4396
4397	/*
4398	 * Proxy timeout queues are not cleaned here because although they
4399	 * exist on the NAT list, appr_unload is called after fr_natunload
4400	 * and the proxies actually are responsible for them being created.
4401	 * Should the proxy timeouts have their own list?  There's no real
4402	 * justification as this is the only complication.
4403	 */
4404	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4405		ifqnext = ifq->ifq_next;
4406		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4407		    (fr_deletetimeoutqueue(ifq) == 0))
4408			fr_freetimeoutqueue(ifq);
4409	}
4410
4411	if (nat_table[0] != NULL) {
4412		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4413		nat_table[0] = NULL;
4414	}
4415	if (nat_table[1] != NULL) {
4416		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4417		nat_table[1] = NULL;
4418	}
4419	if (nat_rules != NULL) {
4420		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4421		nat_rules = NULL;
4422	}
4423	if (rdr_rules != NULL) {
4424		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4425		rdr_rules = NULL;
4426	}
4427	if (ipf_hm_maptable != NULL) {
4428		KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4429		ipf_hm_maptable = NULL;
4430	}
4431	if (nat_stats.ns_bucketlen[0] != NULL) {
4432		KFREES(nat_stats.ns_bucketlen[0],
4433		       sizeof(u_long *) * ipf_nattable_sz);
4434		nat_stats.ns_bucketlen[0] = NULL;
4435	}
4436	if (nat_stats.ns_bucketlen[1] != NULL) {
4437		KFREES(nat_stats.ns_bucketlen[1],
4438		       sizeof(u_long *) * ipf_nattable_sz);
4439		nat_stats.ns_bucketlen[1] = NULL;
4440	}
4441
4442	if (fr_nat_maxbucket_reset == 1)
4443		fr_nat_maxbucket = 0;
4444
4445	if (fr_nat_init == 1) {
4446		fr_nat_init = 0;
4447		fr_sttab_destroy(nat_tqb);
4448
4449		RW_DESTROY(&ipf_natfrag);
4450		RW_DESTROY(&ipf_nat);
4451
4452		MUTEX_DESTROY(&ipf_nat_new);
4453		MUTEX_DESTROY(&ipf_natio);
4454
4455		MUTEX_DESTROY(&nat_udptq.ifq_lock);
4456		MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4457		MUTEX_DESTROY(&nat_iptq.ifq_lock);
4458	}
4459}
4460
4461
4462/* ------------------------------------------------------------------------ */
4463/* Function:    fr_natexpire                                                */
4464/* Returns:     Nil                                                         */
4465/* Parameters:  Nil                                                         */
4466/*                                                                          */
4467/* Check all of the timeout queues for entries at the top which need to be  */
4468/* expired.                                                                 */
4469/* ------------------------------------------------------------------------ */
4470void
4471fr_natexpire(void)
4472{
4473	ipftq_t *ifq, *ifqnext;
4474	ipftqent_t *tqe, *tqn;
4475	int i;
4476	SPL_INT(s);
4477
4478	SPL_NET(s);
4479	WRITE_ENTER(&ipf_nat);
4480	for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4481		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4482			if (tqe->tqe_die > fr_ticks)
4483				break;
4484			tqn = tqe->tqe_next;
4485			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4486		}
4487	}
4488
4489	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4490		ifqnext = ifq->ifq_next;
4491
4492		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4493			if (tqe->tqe_die > fr_ticks)
4494				break;
4495			tqn = tqe->tqe_next;
4496			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4497		}
4498	}
4499
4500	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4501		ifqnext = ifq->ifq_next;
4502
4503		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4504		    (ifq->ifq_ref == 0)) {
4505			fr_freetimeoutqueue(ifq);
4506		}
4507	}
4508
4509	if (fr_nat_doflush != 0) {
4510		nat_extraflush(2);
4511		fr_nat_doflush = 0;
4512	}
4513
4514	RWLOCK_EXIT(&ipf_nat);
4515	SPL_X(s);
4516}
4517
4518
4519/* ------------------------------------------------------------------------ */
4520/* Function:    fr_natsync                                                  */
4521/* Returns:     Nil                                                         */
4522/* Parameters:  ifp(I) - pointer to network interface                       */
4523/*                                                                          */
4524/* Walk through all of the currently active NAT sessions, looking for those */
4525/* which need to have their translated address updated.                     */
4526/* ------------------------------------------------------------------------ */
4527void
4528fr_natsync(void *ifp)
4529{
4530	u_32_t sum1, sum2, sumd;
4531	struct in_addr in;
4532	ipnat_t *n;
4533	nat_t *nat;
4534	void *ifp2;
4535	SPL_INT(s);
4536
4537	if (fr_running <= 0)
4538		return;
4539
4540	/*
4541	 * Change IP addresses for NAT sessions for any protocol except TCP
4542	 * since it will break the TCP connection anyway.  The only rules
4543	 * which will get changed are those which are "map ... -> 0/32",
4544	 * where the rule specifies the address is taken from the interface.
4545	 */
4546	SPL_NET(s);
4547	WRITE_ENTER(&ipf_nat);
4548
4549	if (fr_running <= 0) {
4550		RWLOCK_EXIT(&ipf_nat);
4551		return;
4552	}
4553
4554	for (nat = nat_instances; nat; nat = nat->nat_next) {
4555		if ((nat->nat_flags & IPN_TCP) != 0)
4556			continue;
4557		n = nat->nat_ptr;
4558		if ((n == NULL) ||
4559		    (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4560			continue;
4561		if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4562		     (ifp == nat->nat_ifps[1]))) {
4563			nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4564			if (nat->nat_ifnames[1][0] != '\0') {
4565				nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4566							  4);
4567			} else
4568				nat->nat_ifps[1] = nat->nat_ifps[0];
4569			ifp2 = nat->nat_ifps[0];
4570			if (ifp2 == NULL)
4571				continue;
4572
4573			/*
4574			 * Change the map-to address to be the same as the
4575			 * new one.
4576			 */
4577			sum1 = nat->nat_outip.s_addr;
4578			if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4579				nat->nat_outip = in;
4580			sum2 = nat->nat_outip.s_addr;
4581
4582			if (sum1 == sum2)
4583				continue;
4584			/*
4585			 * Readjust the checksum adjustment to take into
4586			 * account the new IP#.
4587			 */
4588			CALC_SUMD(sum1, sum2, sumd);
4589			/* XXX - dont change for TCP when solaris does
4590			 * hardware checksumming.
4591			 */
4592			sumd += nat->nat_sumd[0];
4593			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4594			nat->nat_sumd[1] = nat->nat_sumd[0];
4595		}
4596	}
4597
4598	for (n = nat_list; (n != NULL); n = n->in_next) {
4599		if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4600			n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4601		if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4602			n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4603	}
4604	RWLOCK_EXIT(&ipf_nat);
4605	SPL_X(s);
4606}
4607
4608
4609/* ------------------------------------------------------------------------ */
4610/* Function:    nat_icmpquerytype4                                          */
4611/* Returns:     int - 1 == success, 0 == failure                            */
4612/* Parameters:  icmptype(I) - ICMP type number                              */
4613/*                                                                          */
4614/* Tests to see if the ICMP type number passed is a query/response type or  */
4615/* not.                                                                     */
4616/* ------------------------------------------------------------------------ */
4617static int
4618nat_icmpquerytype4(int icmptype)
4619{
4620
4621	/*
4622	 * For the ICMP query NAT code, it is essential that both the query
4623	 * and the reply match on the NAT rule. Because the NAT structure
4624	 * does not keep track of the icmptype, and a single NAT structure
4625	 * is used for all icmp types with the same src, dest and id, we
4626	 * simply define the replies as queries as well. The funny thing is,
4627	 * although it seems silly to call a reply a query, this is exactly
4628	 * as it is defined in the IPv4 specification
4629	 */
4630
4631	switch (icmptype)
4632	{
4633
4634	case ICMP_ECHOREPLY:
4635	case ICMP_ECHO:
4636	/* route aedvertisement/solliciation is currently unsupported: */
4637	/* it would require rewriting the ICMP data section            */
4638	case ICMP_TSTAMP:
4639	case ICMP_TSTAMPREPLY:
4640	case ICMP_IREQ:
4641	case ICMP_IREQREPLY:
4642	case ICMP_MASKREQ:
4643	case ICMP_MASKREPLY:
4644		return 1;
4645	default:
4646		return 0;
4647	}
4648}
4649
4650
4651/* ------------------------------------------------------------------------ */
4652/* Function:    nat_log                                                     */
4653/* Returns:     Nil                                                         */
4654/* Parameters:  nat(I)  - pointer to NAT structure                          */
4655/*              type(I) - type of log entry to create                       */
4656/*                                                                          */
4657/* Creates a NAT log entry.                                                 */
4658/* ------------------------------------------------------------------------ */
4659void
4660nat_log(struct nat *nat, u_int type)
4661{
4662#ifdef	IPFILTER_LOG
4663# ifndef LARGE_NAT
4664	struct ipnat *np;
4665	int rulen;
4666# endif
4667	struct natlog natl;
4668	void *items[1];
4669	size_t sizes[1];
4670	int types[1];
4671
4672	natl.nl_inip = nat->nat_inip;
4673	natl.nl_outip = nat->nat_outip;
4674	natl.nl_origip = nat->nat_oip;
4675	natl.nl_bytes[0] = nat->nat_bytes[0];
4676	natl.nl_bytes[1] = nat->nat_bytes[1];
4677	natl.nl_pkts[0] = nat->nat_pkts[0];
4678	natl.nl_pkts[1] = nat->nat_pkts[1];
4679	natl.nl_origport = nat->nat_oport;
4680	natl.nl_inport = nat->nat_inport;
4681	natl.nl_outport = nat->nat_outport;
4682	natl.nl_p = nat->nat_p;
4683	natl.nl_type = type;
4684	natl.nl_rule = -1;
4685# ifndef LARGE_NAT
4686	if (nat->nat_ptr != NULL) {
4687		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4688			if (np == nat->nat_ptr) {
4689				natl.nl_rule = rulen;
4690				break;
4691			}
4692	}
4693# endif
4694	items[0] = &natl;
4695	sizes[0] = sizeof(natl);
4696	types[0] = 0;
4697
4698	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4699#endif
4700}
4701
4702
4703#if defined(__OpenBSD__)
4704/* ------------------------------------------------------------------------ */
4705/* Function:    nat_ifdetach                                                */
4706/* Returns:     Nil                                                         */
4707/* Parameters:  ifp(I) - pointer to network interface                       */
4708/*                                                                          */
4709/* Compatibility interface for OpenBSD to trigger the correct updating of   */
4710/* interface references within IPFilter.                                    */
4711/* ------------------------------------------------------------------------ */
4712void
4713nat_ifdetach(void *ifp)
4714{
4715	frsync(ifp);
4716	return;
4717}
4718#endif
4719
4720
4721/* ------------------------------------------------------------------------ */
4722/* Function:    fr_ipnatderef                                               */
4723/* Returns:     Nil                                                         */
4724/* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4725/* Write Locks: ipf_nat                                                     */
4726/*                                                                          */
4727/* ------------------------------------------------------------------------ */
4728void
4729fr_ipnatderef(ipnat_t **inp)
4730{
4731	ipnat_t *in;
4732
4733	in = *inp;
4734	*inp = NULL;
4735	in->in_space++;
4736	in->in_use--;
4737	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4738		if (in->in_apr)
4739			appr_free(in->in_apr);
4740		MUTEX_DESTROY(&in->in_lock);
4741		KFREE(in);
4742		nat_stats.ns_rules--;
4743#if SOLARIS && !defined(_INET_IP_STACK_H)
4744		if (nat_stats.ns_rules == 0)
4745			pfil_delayed_copy = 1;
4746#endif
4747	}
4748}
4749
4750
4751/* ------------------------------------------------------------------------ */
4752/* Function:    fr_natderef                                                 */
4753/* Returns:     Nil                                                         */
4754/* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4755/*                                                                          */
4756/* Decrement the reference counter for this NAT table entry and free it if  */
4757/* there are no more things using it.                                       */
4758/*                                                                          */
4759/* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4760/* structure *because* it only gets called on paths _after_ nat_ref has been*/
4761/* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4762/* because nat_delete() will do that and send nat_ref to -1.                */
4763/*                                                                          */
4764/* Holding the lock on nat_lock is required to serialise nat_delete() being */
4765/* called from a NAT flush ioctl with a deref happening because of a packet.*/
4766/* ------------------------------------------------------------------------ */
4767void
4768fr_natderef(nat_t **natp)
4769{
4770	nat_t *nat;
4771
4772	nat = *natp;
4773	*natp = NULL;
4774
4775	MUTEX_ENTER(&nat->nat_lock);
4776	if (nat->nat_ref > 1) {
4777		nat->nat_ref--;
4778		MUTEX_EXIT(&nat->nat_lock);
4779		return;
4780	}
4781	MUTEX_EXIT(&nat->nat_lock);
4782
4783	WRITE_ENTER(&ipf_nat);
4784	nat_delete(nat, NL_EXPIRE);
4785	RWLOCK_EXIT(&ipf_nat);
4786}
4787
4788
4789/* ------------------------------------------------------------------------ */
4790/* Function:    fr_natclone                                                 */
4791/* Returns:     ipstate_t* - NULL == cloning failed,                        */
4792/*                           else pointer to new state structure            */
4793/* Parameters:  fin(I) - pointer to packet information                      */
4794/*              is(I)  - pointer to master state structure                  */
4795/* Write Lock:  ipf_nat                                                     */
4796/*                                                                          */
4797/* Create a "duplcate" state table entry from the master.                   */
4798/* ------------------------------------------------------------------------ */
4799static nat_t *
4800fr_natclone(fr_info_t *fin, nat_t *nat)
4801{
4802	frentry_t *fr;
4803	nat_t *clone;
4804	ipnat_t *np;
4805
4806	KMALLOC(clone, nat_t *);
4807	if (clone == NULL)
4808		return NULL;
4809	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4810
4811	MUTEX_NUKE(&clone->nat_lock);
4812
4813	clone->nat_aps = NULL;
4814	/*
4815	 * Initialize all these so that nat_delete() doesn't cause a crash.
4816	 */
4817	clone->nat_tqe.tqe_pnext = NULL;
4818	clone->nat_tqe.tqe_next = NULL;
4819	clone->nat_tqe.tqe_ifq = NULL;
4820	clone->nat_tqe.tqe_parent = clone;
4821
4822	clone->nat_flags &= ~SI_CLONE;
4823	clone->nat_flags |= SI_CLONED;
4824
4825	if (clone->nat_hm)
4826		clone->nat_hm->hm_ref++;
4827
4828	if (nat_insert(clone, fin->fin_rev) == -1) {
4829		KFREE(clone);
4830		return NULL;
4831	}
4832	np = clone->nat_ptr;
4833	if (np != NULL) {
4834		if (nat_logging)
4835			nat_log(clone, (u_int)np->in_redir);
4836		np->in_use++;
4837	}
4838	fr = clone->nat_fr;
4839	if (fr != NULL) {
4840		MUTEX_ENTER(&fr->fr_lock);
4841		fr->fr_ref++;
4842		MUTEX_EXIT(&fr->fr_lock);
4843	}
4844
4845	/*
4846	 * Because the clone is created outside the normal loop of things and
4847	 * TCP has special needs in terms of state, initialise the timeout
4848	 * state of the new NAT from here.
4849	 */
4850	if (clone->nat_p == IPPROTO_TCP) {
4851		(void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4852				  clone->nat_flags);
4853	}
4854#ifdef	IPFILTER_SYNC
4855	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4856#endif
4857	if (nat_logging)
4858		nat_log(clone, NL_CLONE);
4859	return clone;
4860}
4861
4862
4863/* ------------------------------------------------------------------------ */
4864/* Function:   nat_wildok                                                   */
4865/* Returns:    int - 1 == packet's ports match wildcards                    */
4866/*                   0 == packet's ports don't match wildcards              */
4867/* Parameters: nat(I)   - NAT entry                                         */
4868/*             sport(I) - source port                                       */
4869/*             dport(I) - destination port                                  */
4870/*             flags(I) - wildcard flags                                    */
4871/*             dir(I)   - packet direction                                  */
4872/*                                                                          */
4873/* Use NAT entry and packet direction to determine which combination of     */
4874/* wildcard flags should be used.                                           */
4875/* ------------------------------------------------------------------------ */
4876static int
4877nat_wildok(nat_t *nat, int sport, int dport, int flags, int dir)
4878{
4879	/*
4880	 * When called by       dir is set to
4881	 * nat_inlookup         NAT_INBOUND (0)
4882	 * nat_outlookup        NAT_OUTBOUND (1)
4883	 *
4884	 * We simply combine the packet's direction in dir with the original
4885	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4886	 * which combination of wildcard flags to allow.
4887	 */
4888
4889	switch ((dir << 1) | nat->nat_dir)
4890	{
4891	case 3: /* outbound packet / outbound entry */
4892		if (((nat->nat_inport == sport) ||
4893		    (flags & SI_W_SPORT)) &&
4894		    ((nat->nat_oport == dport) ||
4895		    (flags & SI_W_DPORT)))
4896			return 1;
4897		break;
4898	case 2: /* outbound packet / inbound entry */
4899		if (((nat->nat_outport == sport) ||
4900		    (flags & SI_W_DPORT)) &&
4901		    ((nat->nat_oport == dport) ||
4902		    (flags & SI_W_SPORT)))
4903			return 1;
4904		break;
4905	case 1: /* inbound packet / outbound entry */
4906		if (((nat->nat_oport == sport) ||
4907		    (flags & SI_W_DPORT)) &&
4908		    ((nat->nat_outport == dport) ||
4909		    (flags & SI_W_SPORT)))
4910			return 1;
4911		break;
4912	case 0: /* inbound packet / inbound entry */
4913		if (((nat->nat_oport == sport) ||
4914		    (flags & SI_W_SPORT)) &&
4915		    ((nat->nat_outport == dport) ||
4916		    (flags & SI_W_DPORT)))
4917			return 1;
4918		break;
4919	default:
4920		break;
4921	}
4922
4923	return(0);
4924}
4925
4926
4927/* ------------------------------------------------------------------------ */
4928/* Function:    nat_mssclamp                                                */
4929/* Returns:     Nil                                                         */
4930/* Parameters:  tcp(I)    - pointer to TCP header                           */
4931/*              maxmss(I) - value to clamp the TCP MSS to                   */
4932/*              fin(I)    - pointer to packet information                   */
4933/*              csump(I)  - pointer to TCP checksum                         */
4934/*                                                                          */
4935/* Check for MSS option and clamp it if necessary.  If found and changed,   */
4936/* then the TCP header checksum will be updated to reflect the change in    */
4937/* the MSS.                                                                 */
4938/* ------------------------------------------------------------------------ */
4939static void
4940nat_mssclamp(tcphdr_t *tcp, u_32_t maxmss, fr_info_t *fin, u_short *csump)
4941{
4942	u_char *cp, *ep, opt;
4943	int hlen, advance;
4944	u_32_t mss, sumd;
4945
4946	hlen = TCP_OFF(tcp) << 2;
4947	if (hlen > sizeof(*tcp)) {
4948		cp = (u_char *)tcp + sizeof(*tcp);
4949		ep = (u_char *)tcp + hlen;
4950
4951		while (cp < ep) {
4952			opt = cp[0];
4953			if (opt == TCPOPT_EOL)
4954				break;
4955			else if (opt == TCPOPT_NOP) {
4956				cp++;
4957				continue;
4958			}
4959
4960			if (cp + 1 >= ep)
4961				break;
4962			advance = cp[1];
4963			if ((cp + advance > ep) || (advance <= 0))
4964				break;
4965			switch (opt)
4966			{
4967			case TCPOPT_MAXSEG:
4968				if (advance != 4)
4969					break;
4970				mss = cp[2] * 256 + cp[3];
4971				if (mss > maxmss) {
4972					cp[2] = maxmss / 256;
4973					cp[3] = maxmss & 0xff;
4974					CALC_SUMD(mss, maxmss, sumd);
4975					fix_outcksum(fin, csump, sumd);
4976				}
4977				break;
4978			default:
4979				/* ignore unknown options */
4980				break;
4981			}
4982
4983			cp += advance;
4984		}
4985	}
4986}
4987
4988
4989/* ------------------------------------------------------------------------ */
4990/* Function:    fr_setnatqueue                                              */
4991/* Returns:     Nil                                                         */
4992/* Parameters:  nat(I)- pointer to NAT structure                            */
4993/*              rev(I) - forward(0) or reverse(1) direction                 */
4994/* Locks:       ipf_nat (read or write)                                     */
4995/*                                                                          */
4996/* Put the NAT entry on its default queue entry, using rev as a helped in   */
4997/* determining which queue it should be placed on.                          */
4998/* ------------------------------------------------------------------------ */
4999void
5000fr_setnatqueue(nat_t *nat, int rev)
5001{
5002	ipftq_t *oifq, *nifq;
5003
5004	if (nat->nat_ptr != NULL)
5005		nifq = nat->nat_ptr->in_tqehead[rev];
5006	else
5007		nifq = NULL;
5008
5009	if (nifq == NULL) {
5010		switch (nat->nat_p)
5011		{
5012		case IPPROTO_UDP :
5013			nifq = &nat_udptq;
5014			break;
5015		case IPPROTO_ICMP :
5016			nifq = &nat_icmptq;
5017			break;
5018		case IPPROTO_TCP :
5019			nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5020			break;
5021		default :
5022			nifq = &nat_iptq;
5023			break;
5024		}
5025	}
5026
5027	oifq = nat->nat_tqe.tqe_ifq;
5028	/*
5029	 * If it's currently on a timeout queue, move it from one queue to
5030	 * another, else put it on the end of the newly determined queue.
5031	 */
5032	if (oifq != NULL)
5033		fr_movequeue(&nat->nat_tqe, oifq, nifq);
5034	else
5035		fr_queueappend(&nat->nat_tqe, nifq, nat);
5036	return;
5037}
5038
5039
5040/* ------------------------------------------------------------------------ */
5041/* Function:    nat_getnext                                                 */
5042/* Returns:     int - 0 == ok, else error                                   */
5043/* Parameters:  t(I)   - pointer to ipftoken structure                      */
5044/*              itp(I) - pointer to ipfgeniter_t structure                  */
5045/*                                                                          */
5046/* Fetch the next nat/ipnat structure pointer from the linked list and      */
5047/* copy it out to the storage space pointed to by itp.  The next item       */
5048/* in the list to look at is put back in the ipftoken struture.             */
5049/* ------------------------------------------------------------------------ */
5050static int
5051nat_getnext(ipftoken_t *t, ipfgeniter_t *itp, ipfobj_t *obj)
5052{
5053	hostmap_t *hm = NULL, *nexthm = NULL, zerohm;
5054	ipnat_t *ipn = NULL, *nextipnat = NULL, zeroipn;
5055	nat_t *nat = NULL, *nextnat = NULL, zeronat;
5056	int error = 0, count;
5057	char *dst;
5058
5059	if (itp->igi_nitems < 1)
5060		return ENOSPC;
5061
5062	READ_ENTER(&ipf_nat);
5063
5064	/*
5065	 * Get "previous" entry from the token and find the next entry.
5066	 */
5067	switch (itp->igi_type)
5068	{
5069	case IPFGENITER_HOSTMAP :
5070		hm = t->ipt_data;
5071		if (hm == NULL) {
5072			nexthm = ipf_hm_maplist;
5073		} else {
5074			nexthm = hm->hm_next;
5075		}
5076		break;
5077
5078	case IPFGENITER_IPNAT :
5079		ipn = t->ipt_data;
5080		if (ipn == NULL) {
5081			nextipnat = nat_list;
5082		} else {
5083			nextipnat = ipn->in_next;
5084		}
5085		break;
5086
5087	case IPFGENITER_NAT :
5088		nat = t->ipt_data;
5089		if (nat == NULL) {
5090			nextnat = nat_instances;
5091		} else {
5092			nextnat = nat->nat_next;
5093		}
5094		break;
5095
5096	default :
5097		RWLOCK_EXIT(&ipf_nat);
5098		return EINVAL;
5099	}
5100
5101	dst = itp->igi_data;
5102	for (count = itp->igi_nitems; count > 0; count--) {
5103		/*
5104		 * If we found an entry, add a reference and update the token.
5105		 * Otherwise, zero out data to be returned and NULL out token.
5106		 */
5107		switch (itp->igi_type)
5108		{
5109		case IPFGENITER_HOSTMAP :
5110			if (nexthm != NULL) {
5111				ATOMIC_INC32(nexthm->hm_ref);
5112				t->ipt_data = nexthm;
5113			} else {
5114				bzero(&zerohm, sizeof(zerohm));
5115				nexthm = &zerohm;
5116				t->ipt_data = NULL;
5117			}
5118			break;
5119
5120		case IPFGENITER_IPNAT :
5121			if (nextipnat != NULL) {
5122				ATOMIC_INC32(nextipnat->in_use);
5123				t->ipt_data = nextipnat;
5124			} else {
5125				bzero(&zeroipn, sizeof(zeroipn));
5126				nextipnat = &zeroipn;
5127				t->ipt_data = NULL;
5128			}
5129			break;
5130
5131		case IPFGENITER_NAT :
5132			if (nextnat != NULL) {
5133				MUTEX_ENTER(&nextnat->nat_lock);
5134				nextnat->nat_ref++;
5135				MUTEX_EXIT(&nextnat->nat_lock);
5136				t->ipt_data = nextnat;
5137			} else {
5138				bzero(&zeronat, sizeof(zeronat));
5139				nextnat = &zeronat;
5140				t->ipt_data = NULL;
5141			}
5142			break;
5143		}
5144
5145		/*
5146		 * Now that we have ref, it's save to give up lock.
5147		 */
5148		RWLOCK_EXIT(&ipf_nat);
5149
5150		/*
5151		 * Copy out data and clean up references and token as needed.
5152		 */
5153		switch (itp->igi_type)
5154		{
5155		case IPFGENITER_HOSTMAP :
5156			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5157			if (error != 0)
5158				error = EFAULT;
5159			if (hm != NULL) {
5160				WRITE_ENTER(&ipf_nat);
5161				fr_hostmapdel(&hm);
5162				RWLOCK_EXIT(&ipf_nat);
5163			}
5164			if (t->ipt_data != NULL) {
5165				if (nexthm->hm_next == NULL) {
5166					t->ipt_data = NULL;
5167					break;
5168				}
5169				dst += sizeof(*nexthm);
5170				hm = nexthm;
5171				nexthm = nexthm->hm_next;
5172			}
5173			break;
5174
5175		case IPFGENITER_IPNAT :
5176			obj->ipfo_size = sizeof(ipnat_t);
5177			obj->ipfo_ptr = dst;
5178			obj->ipfo_type = IPFOBJ_IPNAT;
5179			error = fr_outobjk(obj, nextipnat);
5180			if (error != 0)
5181				error = EFAULT;
5182			if (ipn != NULL) {
5183				WRITE_ENTER(&ipf_nat);
5184				fr_ipnatderef(&ipn);
5185				RWLOCK_EXIT(&ipf_nat);
5186			}
5187			if (t->ipt_data != NULL) {
5188				if (nextipnat->in_next == NULL) {
5189					t->ipt_data = NULL;
5190					break;
5191				}
5192				dst += sizeof(*nextipnat);
5193				ipn = nextipnat;
5194				nextipnat = nextipnat->in_next;
5195			}
5196			break;
5197
5198		case IPFGENITER_NAT :
5199			obj->ipfo_size = sizeof(nat_t);
5200			obj->ipfo_ptr = dst;
5201			obj->ipfo_type = IPFOBJ_NAT;
5202			error = fr_outobjk(obj, nextnat);
5203			if (error != 0)
5204				error = EFAULT;
5205			if (nat != NULL) {
5206				fr_natderef(&nat);
5207			}
5208			if (t->ipt_data != NULL) {
5209				if (nextnat->nat_next == NULL) {
5210					t->ipt_data = NULL;
5211					break;
5212				}
5213				dst += sizeof(*nextnat);
5214				nat = nextnat;
5215				nextnat = nextnat->nat_next;
5216			}
5217			break;
5218		}
5219
5220		if ((count == 1) || (error != 0))
5221			break;
5222
5223		READ_ENTER(&ipf_nat);
5224	}
5225
5226	return error;
5227}
5228
5229
5230/* ------------------------------------------------------------------------ */
5231/* Function:    nat_iterator                                                */
5232/* Returns:     int - 0 == ok, else error                                   */
5233/* Parameters:  token(I) - pointer to ipftoken structure                    */
5234/*              itp(I) - pointer to ipfgeniter_t structure                  */
5235/*                                                                          */
5236/* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5237/* generic structure to iterate through a list.  There are three different  */
5238/* linked lists of NAT related information to go through: NAT rules, active */
5239/* NAT mappings and the NAT fragment cache.                                 */
5240/* ------------------------------------------------------------------------ */
5241static int
5242nat_iterator(ipftoken_t *token, ipfgeniter_t *itp, ipfobj_t *obj)
5243{
5244	int error;
5245
5246	if (itp->igi_data == NULL)
5247		return EFAULT;
5248
5249	token->ipt_subtype = itp->igi_type;
5250
5251	switch (itp->igi_type)
5252	{
5253	case IPFGENITER_HOSTMAP :
5254	case IPFGENITER_IPNAT :
5255	case IPFGENITER_NAT :
5256		error = nat_getnext(token, itp, obj);
5257		break;
5258
5259	case IPFGENITER_NATFRAG :
5260#ifdef USE_MUTEXES
5261		error = fr_nextfrag(token, itp, &ipfr_natlist,
5262				    &ipfr_nattail, &ipf_natfrag);
5263#else
5264		error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5265#endif
5266		break;
5267	default :
5268		error = EINVAL;
5269		break;
5270	}
5271
5272	return error;
5273}
5274
5275
5276/* ------------------------------------------------------------------------ */
5277/* Function:    nat_extraflush                                              */
5278/* Returns:     int - 0 == success, -1 == failure                           */
5279/* Parameters:  which(I) - how to flush the active NAT table                */
5280/* Write Locks: ipf_nat                                                     */
5281/*                                                                          */
5282/* Flush nat tables.  Three actions currently defined:                      */
5283/* which == 0 : flush all nat table entries                                 */
5284/* which == 1 : flush TCP connections which have started to close but are   */
5285/*	      stuck for some reason.                                        */
5286/* which == 2 : flush TCP connections which have been idle for a long time, */
5287/*	      starting at > 4 days idle and working back in successive half-*/
5288/*	      days to at most 12 hours old.  If this fails to free enough   */
5289/*            slots then work backwards in half hour slots to 30 minutes.   */
5290/*            If that too fails, then work backwards in 30 second intervals */
5291/*            for the last 30 minutes to at worst 30 seconds idle.          */
5292/* ------------------------------------------------------------------------ */
5293static int
5294nat_extraflush(int which)
5295{
5296	ipftq_t *ifq, *ifqnext;
5297	nat_t *nat, **natp;
5298	ipftqent_t *tqn;
5299	int removed;
5300	SPL_INT(s);
5301
5302	removed = 0;
5303
5304	SPL_NET(s);
5305
5306	switch (which)
5307	{
5308	case 0 :
5309		/*
5310		 * Style 0 flush removes everything...
5311		 */
5312		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5313			nat_delete(nat, NL_FLUSH);
5314			removed++;
5315		}
5316		break;
5317
5318	case 1 :
5319		/*
5320		 * Since we're only interested in things that are closing,
5321		 * we can start with the appropriate timeout queue.
5322		 */
5323		for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5324		     ifq = ifq->ifq_next) {
5325
5326			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5327				nat = tqn->tqe_parent;
5328				tqn = tqn->tqe_next;
5329				if (nat->nat_p != IPPROTO_TCP)
5330					break;
5331				nat_delete(nat, NL_EXPIRE);
5332				removed++;
5333			}
5334		}
5335
5336		/*
5337		 * Also need to look through the user defined queues.
5338		 */
5339		for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5340			ifqnext = ifq->ifq_next;
5341			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5342				nat = tqn->tqe_parent;
5343				tqn = tqn->tqe_next;
5344				if (nat->nat_p != IPPROTO_TCP)
5345					continue;
5346
5347				if ((nat->nat_tcpstate[0] >
5348				     IPF_TCPS_ESTABLISHED) &&
5349				    (nat->nat_tcpstate[1] >
5350				     IPF_TCPS_ESTABLISHED)) {
5351					nat_delete(nat, NL_EXPIRE);
5352					removed++;
5353				}
5354			}
5355		}
5356		break;
5357
5358		/*
5359		 * Args 5-11 correspond to flushing those particular states
5360		 * for TCP connections.
5361		 */
5362	case IPF_TCPS_CLOSE_WAIT :
5363	case IPF_TCPS_FIN_WAIT_1 :
5364	case IPF_TCPS_CLOSING :
5365	case IPF_TCPS_LAST_ACK :
5366	case IPF_TCPS_FIN_WAIT_2 :
5367	case IPF_TCPS_TIME_WAIT :
5368	case IPF_TCPS_CLOSED :
5369		tqn = nat_tqb[which].ifq_head;
5370		while (tqn != NULL) {
5371			nat = tqn->tqe_parent;
5372			tqn = tqn->tqe_next;
5373			nat_delete(nat, NL_FLUSH);
5374			removed++;
5375		}
5376		break;
5377
5378	default :
5379		if (which < 30)
5380			break;
5381
5382		/*
5383		 * Take a large arbitrary number to mean the number of seconds
5384		 * for which which consider to be the maximum value we'll allow
5385		 * the expiration to be.
5386		 */
5387		which = IPF_TTLVAL(which);
5388		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5389			if (fr_ticks - nat->nat_touched > which) {
5390				nat_delete(nat, NL_FLUSH);
5391				removed++;
5392			} else
5393				natp = &nat->nat_next;
5394		}
5395		break;
5396	}
5397
5398	if (which != 2) {
5399		SPL_X(s);
5400		return removed;
5401	}
5402
5403	/*
5404	 * Asked to remove inactive entries because the table is full.
5405	 */
5406	if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5407		nat_last_force_flush = fr_ticks;
5408		removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5409	}
5410
5411	SPL_X(s);
5412	return removed;
5413}
5414
5415
5416/* ------------------------------------------------------------------------ */
5417/* Function:    nat_flush_entry                                             */
5418/* Returns:     0 - always succeeds                                         */
5419/* Parameters:  entry(I) - pointer to NAT entry                             */
5420/* Write Locks: ipf_nat                                                     */
5421/*                                                                          */
5422/* This function is a stepping stone between ipf_queueflush() and           */
5423/* nat_dlete().  It is used so we can provide a uniform interface via the   */
5424/* ipf_queueflush() function.  Since the nat_delete() function returns void */
5425/* we translate that to mean it always succeeds in deleting something.      */
5426/* ------------------------------------------------------------------------ */
5427static int
5428nat_flush_entry(void *entry)
5429{
5430	nat_delete(entry, NL_FLUSH);
5431	return 0;
5432}
5433
5434
5435/* ------------------------------------------------------------------------ */
5436/* Function:    nat_gettable                                                */
5437/* Returns:     int     - 0 = success, else error                           */
5438/* Parameters:  data(I) - pointer to ioctl data                             */
5439/*                                                                          */
5440/* This function handles ioctl requests for tables of nat information.      */
5441/* At present the only table it deals with is the hash bucket statistics.   */
5442/* ------------------------------------------------------------------------ */
5443static int
5444nat_gettable(char *data)
5445{
5446	ipftable_t table;
5447	int error;
5448
5449	error = fr_inobj(data, NULL, &table, IPFOBJ_GTABLE);
5450	if (error != 0)
5451		return error;
5452
5453	switch (table.ita_type)
5454	{
5455	case IPFTABLE_BUCKETS_NATIN :
5456		error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5457				ipf_nattable_sz * sizeof(u_long));
5458		break;
5459
5460	case IPFTABLE_BUCKETS_NATOUT :
5461		error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5462				ipf_nattable_sz * sizeof(u_long));
5463		break;
5464
5465	default :
5466		return EINVAL;
5467	}
5468
5469	if (error != 0) {
5470		error = EFAULT;
5471	}
5472	return error;
5473}
5474
5475
5476/* ------------------------------------------------------------------------ */
5477/* Function:    nat_uncreate                                                */
5478/* Returns:     Nil                                                         */
5479/* Parameters:  fin(I) - pointer to packet information                      */
5480/*                                                                          */
5481/* This function is used to remove a NAT entry from the NAT table when we   */
5482/* decide that the create was actually in error. It is thus assumed that    */
5483/* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */
5484/* with the translated packet (not the original), we have to reverse the    */
5485/* lookup. Although doing the lookup is expensive (relatively speaking), it */
5486/* is not anticipated that this will be a frequent occurance for normal     */
5487/* traffic patterns.                                                        */
5488/* ------------------------------------------------------------------------ */
5489void
5490nat_uncreate(fr_info_t *fin)
5491{
5492	int nflags;
5493	nat_t *nat;
5494
5495	switch (fin->fin_p)
5496	{
5497	case IPPROTO_TCP :
5498		nflags = IPN_TCP;
5499		break;
5500	case IPPROTO_UDP :
5501		nflags = IPN_UDP;
5502		break;
5503	default :
5504		nflags = 0;
5505		break;
5506	}
5507
5508	WRITE_ENTER(&ipf_nat);
5509
5510	if (fin->fin_out == 0) {
5511		nat = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
5512				    fin->fin_dst, fin->fin_src);
5513	} else {
5514		nat = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
5515				   fin->fin_src, fin->fin_dst);
5516	}
5517
5518	if (nat != NULL) {
5519		nat_stats.ns_uncreate[fin->fin_out][0]++;
5520		nat_delete(nat, NL_DESTROY);
5521	} else {
5522		nat_stats.ns_uncreate[fin->fin_out][1]++;
5523	}
5524
5525	RWLOCK_EXIT(&ipf_nat);
5526}
5527