ip_fw_nat.c revision 176669
1/*-
2 * Copyright (c) 2008 Paolo Pisati
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/netinet/ip_fw_nat.c 176669 2008-02-29 22:27:19Z piso $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/condvar.h>
33#include <sys/eventhandler.h>
34#include <sys/malloc.h>
35#include <sys/mbuf.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/jail.h>
39#include <sys/module.h>
40#include <sys/priv.h>
41#include <sys/proc.h>
42#include <sys/rwlock.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/sysctl.h>
46#include <sys/syslog.h>
47#include <sys/ucred.h>
48
49#include <netinet/libalias/alias.h>
50#include <netinet/libalias/alias_local.h>
51
52#define	IPFW_INTERNAL	/* Access to protected data structures in ip_fw.h. */
53
54#include <net/if.h>
55#include <netinet/in.h>
56#include <netinet/ip.h>
57#include <netinet/ip_var.h>
58#include <netinet/ip_icmp.h>
59#include <netinet/ip_fw.h>
60#include <netinet/tcp.h>
61#include <netinet/tcp_timer.h>
62#include <netinet/tcp_var.h>
63#include <netinet/tcpip.h>
64#include <netinet/udp.h>
65#include <netinet/udp_var.h>
66
67#include <machine/in_cksum.h>	/* XXX for in_cksum */
68
69MALLOC_DECLARE(M_IPFW);
70
71extern struct ip_fw_chain layer3_chain;
72
73static eventhandler_tag ifaddr_event_tag;
74
75extern ipfw_nat_t *ipfw_nat_ptr;
76extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
77extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
78extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
79extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
80
81static void
82ifaddr_change(void *arg __unused, struct ifnet *ifp)
83{
84	struct cfg_nat *ptr;
85	struct ifaddr *ifa;
86
87	IPFW_WLOCK(&layer3_chain);
88	/* Check every nat entry... */
89	LIST_FOREACH(ptr, &layer3_chain.nat, _next) {
90		/* ...using nic 'ifp->if_xname' as dynamic alias address. */
91		if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) == 0) {
92			mtx_lock(&ifp->if_addr_mtx);
93			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
94				if (ifa->ifa_addr == NULL)
95					continue;
96				if (ifa->ifa_addr->sa_family != AF_INET)
97					continue;
98				ptr->ip = ((struct sockaddr_in *)
99				    (ifa->ifa_addr))->sin_addr;
100				LibAliasSetAddress(ptr->lib, ptr->ip);
101			}
102			mtx_unlock(&ifp->if_addr_mtx);
103		}
104	}
105	IPFW_WUNLOCK(&layer3_chain);
106}
107
108static void
109flush_nat_ptrs(const int i)
110{
111	struct ip_fw *rule;
112
113	IPFW_WLOCK_ASSERT(&layer3_chain);
114	for (rule = layer3_chain.rules; rule; rule = rule->next) {
115		ipfw_insn_nat *cmd = (ipfw_insn_nat *)ACTION_PTR(rule);
116		if (cmd->o.opcode != O_NAT)
117			continue;
118		if (cmd->nat != NULL && cmd->nat->id == i)
119			cmd->nat = NULL;
120	}
121}
122
123#define HOOK_NAT(b, p) do {				\
124		IPFW_WLOCK_ASSERT(&layer3_chain);	\
125		LIST_INSERT_HEAD(b, p, _next);		\
126	} while (0)
127
128#define UNHOOK_NAT(p) do {				\
129		IPFW_WLOCK_ASSERT(&layer3_chain);	\
130		LIST_REMOVE(p, _next);			\
131	} while (0)
132
133#define HOOK_REDIR(b, p) do {			\
134		LIST_INSERT_HEAD(b, p, _next);	\
135	} while (0)
136
137#define HOOK_SPOOL(b, p) do {			\
138		LIST_INSERT_HEAD(b, p, _next);	\
139	} while (0)
140
141static void
142del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
143{
144	struct cfg_redir *r, *tmp_r;
145	struct cfg_spool *s, *tmp_s;
146	int i, num;
147
148	LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
149		num = 1; /* Number of alias_link to delete. */
150		switch (r->mode) {
151		case REDIR_PORT:
152			num = r->pport_cnt;
153			/* FALLTHROUGH */
154		case REDIR_ADDR:
155		case REDIR_PROTO:
156			/* Delete all libalias redirect entry. */
157			for (i = 0; i < num; i++)
158				LibAliasRedirectDelete(n->lib, r->alink[i]);
159			/* Del spool cfg if any. */
160			LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
161				LIST_REMOVE(s, _next);
162				free(s, M_IPFW);
163			}
164			free(r->alink, M_IPFW);
165			LIST_REMOVE(r, _next);
166			free(r, M_IPFW);
167			break;
168		default:
169			printf("unknown redirect mode: %u\n", r->mode);
170			/* XXX - panic?!?!? */
171			break;
172		}
173	}
174}
175
176static int
177add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
178{
179	struct cfg_redir *r, *ser_r;
180	struct cfg_spool *s, *ser_s;
181	int cnt, off, i;
182	char *panic_err;
183
184	for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
185		ser_r = (struct cfg_redir *)&buf[off];
186		r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
187		memcpy(r, ser_r, SOF_REDIR);
188		LIST_INIT(&r->spool_chain);
189		off += SOF_REDIR;
190		r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
191		    M_IPFW, M_WAITOK | M_ZERO);
192		switch (r->mode) {
193		case REDIR_ADDR:
194			r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
195			    r->paddr);
196			break;
197		case REDIR_PORT:
198			for (i = 0 ; i < r->pport_cnt; i++) {
199				/* If remotePort is all ports, set it to 0. */
200				u_short remotePortCopy = r->rport + i;
201				if (r->rport_cnt == 1 && r->rport == 0)
202					remotePortCopy = 0;
203				r->alink[i] = LibAliasRedirectPort(ptr->lib,
204				    r->laddr, htons(r->lport + i), r->raddr,
205				    htons(remotePortCopy), r->paddr,
206				    htons(r->pport + i), r->proto);
207				if (r->alink[i] == NULL) {
208					r->alink[0] = NULL;
209					break;
210				}
211			}
212			break;
213		case REDIR_PROTO:
214			r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
215			    r->raddr, r->paddr, r->proto);
216			break;
217		default:
218			printf("unknown redirect mode: %u\n", r->mode);
219			break;
220		}
221		if (r->alink[0] == NULL) {
222			panic_err = "LibAliasRedirect* returned NULL";
223			goto bad;
224		} else /* LSNAT handling. */
225			for (i = 0; i < r->spool_cnt; i++) {
226				ser_s = (struct cfg_spool *)&buf[off];
227				s = malloc(SOF_REDIR, M_IPFW,
228				    M_WAITOK | M_ZERO);
229				memcpy(s, ser_s, SOF_SPOOL);
230				LibAliasAddServer(ptr->lib, r->alink[0],
231				    s->addr, htons(s->port));
232				off += SOF_SPOOL;
233				/* Hook spool entry. */
234				HOOK_SPOOL(&r->spool_chain, s);
235			}
236		/* And finally hook this redir entry. */
237		HOOK_REDIR(&ptr->redir_chain, r);
238	}
239	return (1);
240bad:
241	/* something really bad happened: panic! */
242	panic("%s\n", panic_err);
243}
244
245static int
246ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
247{
248	struct mbuf *mcl;
249	struct ip *ip;
250	/* XXX - libalias duct tape */
251	int ldt, retval;
252	char *c;
253
254	ldt = 0;
255	retval = 0;
256	if ((mcl = m_megapullup(m, m->m_pkthdr.len)) ==
257	    NULL)
258		goto badnat;
259	ip = mtod(mcl, struct ip *);
260	if (args->eh == NULL) {
261		ip->ip_len = htons(ip->ip_len);
262		ip->ip_off = htons(ip->ip_off);
263	}
264
265	/*
266	 * XXX - Libalias checksum offload 'duct tape':
267	 *
268	 * locally generated packets have only
269	 * pseudo-header checksum calculated
270	 * and libalias will screw it[1], so
271	 * mark them for later fix.  Moreover
272	 * there are cases when libalias
273	 * modify tcp packet data[2], mark it
274	 * for later fix too.
275	 *
276	 * [1] libalias was never meant to run
277	 * in kernel, so it doesn't have any
278	 * knowledge about checksum
279	 * offloading, and it expects a packet
280	 * with a full internet
281	 * checksum. Unfortunately, packets
282	 * generated locally will have just the
283	 * pseudo header calculated, and when
284	 * libalias tries to adjust the
285	 * checksum it will actually screw it.
286	 *
287	 * [2] when libalias modify tcp's data
288	 * content, full TCP checksum has to
289	 * be recomputed: the problem is that
290	 * libalias doesn't have any idea
291	 * about checksum offloading To
292	 * workaround this, we do not do
293	 * checksumming in LibAlias, but only
294	 * mark the packets in th_x2 field. If
295	 * we receive a marked packet, we
296	 * calculate correct checksum for it
297	 * aware of offloading.  Why such a
298	 * terrible hack instead of
299	 * recalculating checksum for each
300	 * packet?  Because the previous
301	 * checksum was not checked!
302	 * Recalculating checksums for EVERY
303	 * packet will hide ALL transmission
304	 * errors. Yes, marked packets still
305	 * suffer from this problem. But,
306	 * sigh, natd(8) has this problem,
307	 * too.
308	 *
309	 * TODO: -make libalias mbuf aware (so
310	 * it can handle delayed checksum and tso)
311	 */
312
313	if (mcl->m_pkthdr.rcvif == NULL &&
314	    mcl->m_pkthdr.csum_flags &
315	    CSUM_DELAY_DATA)
316		ldt = 1;
317
318	c = mtod(mcl, char *);
319	if (args->oif == NULL)
320		retval = LibAliasIn(t->lib, c,
321				    MCLBYTES);
322	else
323		retval = LibAliasOut(t->lib, c,
324				     MCLBYTES);
325	if (retval != PKT_ALIAS_OK) {
326		/* XXX - should i add some logging? */
327		m_free(mcl);
328	badnat:
329		args->m = NULL;
330		return (IP_FW_DENY);
331	}
332	mcl->m_pkthdr.len = mcl->m_len =
333	    ntohs(ip->ip_len);
334
335	/*
336	 * XXX - libalias checksum offload
337	 * 'duct tape' (see above)
338	 */
339
340	if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
341	    ip->ip_p == IPPROTO_TCP) {
342		struct tcphdr 	*th;
343
344		th = (struct tcphdr *)(ip + 1);
345		if (th->th_x2)
346			ldt = 1;
347	}
348
349	if (ldt) {
350		struct tcphdr 	*th;
351		struct udphdr 	*uh;
352		u_short cksum;
353
354		ip->ip_len = ntohs(ip->ip_len);
355		cksum = in_pseudo(
356		    ip->ip_src.s_addr,
357		    ip->ip_dst.s_addr,
358		    htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2))
359		);
360
361		switch (ip->ip_p) {
362		case IPPROTO_TCP:
363			th = (struct tcphdr *)(ip + 1);
364			/*
365			 * Maybe it was set in
366			 * libalias...
367			 */
368			th->th_x2 = 0;
369			th->th_sum = cksum;
370			mcl->m_pkthdr.csum_data =
371			    offsetof(struct tcphdr, th_sum);
372			break;
373		case IPPROTO_UDP:
374			uh = (struct udphdr *)(ip + 1);
375			uh->uh_sum = cksum;
376			mcl->m_pkthdr.csum_data =
377			    offsetof(struct udphdr, uh_sum);
378			break;
379		}
380		/*
381		 * No hw checksum offloading: do it
382		 * by ourself.
383		 */
384		if ((mcl->m_pkthdr.csum_flags &
385		     CSUM_DELAY_DATA) == 0) {
386			in_delayed_cksum(mcl);
387			mcl->m_pkthdr.csum_flags &=
388			    ~CSUM_DELAY_DATA;
389		}
390		ip->ip_len = htons(ip->ip_len);
391	}
392
393	if (args->eh == NULL) {
394		ip->ip_len = ntohs(ip->ip_len);
395		ip->ip_off = ntohs(ip->ip_off);
396	}
397
398	args->m = mcl;
399	return (IP_FW_NAT);
400}
401
402static int
403ipfw_nat_cfg(struct sockopt *sopt)
404{
405	struct cfg_nat *ptr, *ser_n;
406	char *buf;
407
408	buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
409	sooptcopyin(sopt, buf, NAT_BUF_LEN,
410	    sizeof(struct cfg_nat));
411	ser_n = (struct cfg_nat *)buf;
412
413	/*
414	 * Find/create nat rule.
415	 */
416	IPFW_WLOCK(&layer3_chain);
417	LOOKUP_NAT(layer3_chain, ser_n->id, ptr);
418	if (ptr == NULL) {
419		/* New rule: allocate and init new instance. */
420		ptr = malloc(sizeof(struct cfg_nat),
421		    M_IPFW, M_NOWAIT | M_ZERO);
422		if (ptr == NULL) {
423			IPFW_WUNLOCK(&layer3_chain);
424			free(buf, M_IPFW);
425			return (ENOSPC);
426		}
427		ptr->lib = LibAliasInit(NULL);
428		if (ptr->lib == NULL) {
429			IPFW_WUNLOCK(&layer3_chain);
430			free(ptr, M_IPFW);
431			free(buf, M_IPFW);
432			return (EINVAL);
433		}
434		LIST_INIT(&ptr->redir_chain);
435	} else {
436		/* Entry already present: temporarly unhook it. */
437		UNHOOK_NAT(ptr);
438		flush_nat_ptrs(ser_n->id);
439	}
440	IPFW_WUNLOCK(&layer3_chain);
441
442	/*
443	 * Basic nat configuration.
444	 */
445	ptr->id = ser_n->id;
446	/*
447	 * XXX - what if this rule doesn't nat any ip and just
448	 * redirect?
449	 * do we set aliasaddress to 0.0.0.0?
450	 */
451	ptr->ip = ser_n->ip;
452	ptr->redir_cnt = ser_n->redir_cnt;
453	ptr->mode = ser_n->mode;
454	LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode);
455	LibAliasSetAddress(ptr->lib, ptr->ip);
456	memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE);
457
458	/*
459	 * Redir and LSNAT configuration.
460	 */
461	/* Delete old cfgs. */
462	del_redir_spool_cfg(ptr, &ptr->redir_chain);
463	/* Add new entries. */
464	add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
465	free(buf, M_IPFW);
466	IPFW_WLOCK(&layer3_chain);
467	HOOK_NAT(&layer3_chain.nat, ptr);
468	IPFW_WUNLOCK(&layer3_chain);
469	return (0);
470}
471
472static int
473ipfw_nat_del(struct sockopt *sopt)
474{
475	struct cfg_nat *ptr;
476	int i;
477
478	sooptcopyin(sopt, &i, sizeof i, sizeof i);
479	IPFW_WLOCK(&layer3_chain);
480	LOOKUP_NAT(layer3_chain, i, ptr);
481	if (ptr == NULL) {
482		IPFW_WUNLOCK(&layer3_chain);
483		return (EINVAL);
484	}
485	UNHOOK_NAT(ptr);
486	flush_nat_ptrs(i);
487	IPFW_WUNLOCK(&layer3_chain);
488	del_redir_spool_cfg(ptr, &ptr->redir_chain);
489	LibAliasUninit(ptr->lib);
490	free(ptr, M_IPFW);
491	return (0);
492}
493
494static int
495ipfw_nat_get_cfg(struct sockopt *sopt)
496{
497	uint8_t *data;
498	struct cfg_nat *n;
499	struct cfg_redir *r;
500	struct cfg_spool *s;
501	int nat_cnt, off;
502
503	nat_cnt = 0;
504	off = sizeof(nat_cnt);
505
506	data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
507	IPFW_RLOCK(&layer3_chain);
508	/* Serialize all the data. */
509	LIST_FOREACH(n, &layer3_chain.nat, _next) {
510		nat_cnt++;
511		if (off + SOF_NAT < NAT_BUF_LEN) {
512			bcopy(n, &data[off], SOF_NAT);
513			off += SOF_NAT;
514			LIST_FOREACH(r, &n->redir_chain, _next) {
515				if (off + SOF_REDIR < NAT_BUF_LEN) {
516					bcopy(r, &data[off],
517					    SOF_REDIR);
518					off += SOF_REDIR;
519					LIST_FOREACH(s, &r->spool_chain,
520					    _next) {
521						if (off + SOF_SPOOL <
522						    NAT_BUF_LEN) {
523							bcopy(s, &data[off],
524							    SOF_SPOOL);
525							off += SOF_SPOOL;
526						} else
527							goto nospace;
528					}
529				} else
530					goto nospace;
531			}
532		} else
533			goto nospace;
534	}
535	bcopy(&nat_cnt, data, sizeof(nat_cnt));
536	IPFW_RUNLOCK(&layer3_chain);
537	sooptcopyout(sopt, data, NAT_BUF_LEN);
538	free(data, M_IPFW);
539	return (0);
540nospace:
541	IPFW_RUNLOCK(&layer3_chain);
542	printf("serialized data buffer not big enough:"
543	    "please increase NAT_BUF_LEN\n");
544	free(data, M_IPFW);
545	return (ENOSPC);
546}
547
548static int
549ipfw_nat_get_log(struct sockopt *sopt)
550{
551	uint8_t *data;
552	struct cfg_nat *ptr;
553	int i, size, cnt, sof;
554
555	data = NULL;
556	sof = LIBALIAS_BUF_SIZE;
557	cnt = 0;
558
559	IPFW_RLOCK(&layer3_chain);
560	size = i = 0;
561	LIST_FOREACH(ptr, &layer3_chain.nat, _next) {
562		if (ptr->lib->logDesc == NULL)
563			continue;
564		cnt++;
565		size = cnt * (sof + sizeof(int));
566		data = realloc(data, size, M_IPFW, M_NOWAIT | M_ZERO);
567		if (data == NULL) {
568			IPFW_RUNLOCK(&layer3_chain);
569			return (ENOSPC);
570		}
571		bcopy(&ptr->id, &data[i], sizeof(int));
572		i += sizeof(int);
573		bcopy(ptr->lib->logDesc, &data[i], sof);
574		i += sof;
575	}
576	IPFW_RUNLOCK(&layer3_chain);
577	sooptcopyout(sopt, data, size);
578	free(data, M_IPFW);
579	return(0);
580}
581
582static void
583ipfw_nat_init(void)
584{
585
586	IPFW_WLOCK(&layer3_chain);
587	/* init ipfw hooks */
588	ipfw_nat_ptr = ipfw_nat;
589	ipfw_nat_cfg_ptr = ipfw_nat_cfg;
590	ipfw_nat_del_ptr = ipfw_nat_del;
591	ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
592	ipfw_nat_get_log_ptr = ipfw_nat_get_log;
593	IPFW_WUNLOCK(&layer3_chain);
594	ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
595	    NULL, EVENTHANDLER_PRI_ANY);
596}
597
598static void
599ipfw_nat_destroy(void)
600{
601	struct cfg_nat *ptr, *ptr_temp;
602
603	IPFW_WLOCK(&layer3_chain);
604	LIST_FOREACH_SAFE(ptr, &layer3_chain.nat, _next, ptr_temp) {
605		LIST_REMOVE(ptr, _next);
606		del_redir_spool_cfg(ptr, &ptr->redir_chain);
607		LibAliasUninit(ptr->lib);
608		free(ptr, M_IPFW);
609	}
610	EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
611	/* deregister ipfw_nat */
612	ipfw_nat_ptr = NULL;
613	IPFW_WUNLOCK(&layer3_chain);
614}
615
616static int
617ipfw_nat_modevent(module_t mod, int type, void *unused)
618{
619	int err = 0;
620
621	switch (type) {
622	case MOD_LOAD:
623		ipfw_nat_init();
624		break;
625
626	case MOD_UNLOAD:
627		ipfw_nat_destroy();
628		break;
629
630	default:
631		return EOPNOTSUPP;
632		break;
633	}
634	return err;
635}
636
637static moduledata_t ipfw_nat_mod = {
638	"ipfw_nat",
639	ipfw_nat_modevent,
640	0
641};
642
643DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
644MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
645MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
646MODULE_VERSION(ipfw_nat, 1);
647