ip_fw_nat.c revision 220914
1/*-
2 * Copyright (c) 2008 Paolo Pisati
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_nat.c 220914 2011-04-21 08:18:55Z glebius $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/eventhandler.h>
33#include <sys/malloc.h>
34#include <sys/kernel.h>
35#include <sys/lock.h>
36#include <sys/module.h>
37#include <sys/rwlock.h>
38
39#define        IPFW_INTERNAL   /* Access to protected data structures in ip_fw.h. */
40
41#include <netinet/libalias/alias.h>
42#include <netinet/libalias/alias_local.h>
43
44#include <net/if.h>
45#include <netinet/in.h>
46#include <netinet/ip.h>
47#include <netinet/ip_var.h>
48#include <netinet/ip_fw.h>
49#include <netinet/ipfw/ip_fw_private.h>
50#include <netinet/tcp.h>
51#include <netinet/udp.h>
52
53#include <machine/in_cksum.h>	/* XXX for in_cksum */
54
55static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
56#define	V_ifaddr_event_tag	VNET(ifaddr_event_tag)
57
58static void
59ifaddr_change(void *arg __unused, struct ifnet *ifp)
60{
61	struct cfg_nat *ptr;
62	struct ifaddr *ifa;
63	struct ip_fw_chain *chain;
64
65	chain = &V_layer3_chain;
66	IPFW_WLOCK(chain);
67	/* Check every nat entry... */
68	LIST_FOREACH(ptr, &chain->nat, _next) {
69		/* ...using nic 'ifp->if_xname' as dynamic alias address. */
70		if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0)
71			continue;
72		if_addr_rlock(ifp);
73		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
74			if (ifa->ifa_addr == NULL)
75				continue;
76			if (ifa->ifa_addr->sa_family != AF_INET)
77				continue;
78			ptr->ip = ((struct sockaddr_in *)
79			    (ifa->ifa_addr))->sin_addr;
80			LibAliasSetAddress(ptr->lib, ptr->ip);
81		}
82		if_addr_runlock(ifp);
83	}
84	IPFW_WUNLOCK(chain);
85}
86
87/*
88 * delete the pointers for nat entry ix, or all of them if ix < 0
89 */
90static void
91flush_nat_ptrs(struct ip_fw_chain *chain, const int ix)
92{
93	int i;
94	ipfw_insn_nat *cmd;
95
96	IPFW_WLOCK_ASSERT(chain);
97	for (i = 0; i < chain->n_rules; i++) {
98		cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]);
99		/* XXX skip log and the like ? */
100		if (cmd->o.opcode == O_NAT && cmd->nat != NULL &&
101			    (ix < 0 || cmd->nat->id == ix))
102			cmd->nat = NULL;
103	}
104}
105
106static void
107del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
108{
109	struct cfg_redir *r, *tmp_r;
110	struct cfg_spool *s, *tmp_s;
111	int i, num;
112
113	LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
114		num = 1; /* Number of alias_link to delete. */
115		switch (r->mode) {
116		case REDIR_PORT:
117			num = r->pport_cnt;
118			/* FALLTHROUGH */
119		case REDIR_ADDR:
120		case REDIR_PROTO:
121			/* Delete all libalias redirect entry. */
122			for (i = 0; i < num; i++)
123				LibAliasRedirectDelete(n->lib, r->alink[i]);
124			/* Del spool cfg if any. */
125			LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
126				LIST_REMOVE(s, _next);
127				free(s, M_IPFW);
128			}
129			free(r->alink, M_IPFW);
130			LIST_REMOVE(r, _next);
131			free(r, M_IPFW);
132			break;
133		default:
134			printf("unknown redirect mode: %u\n", r->mode);
135			/* XXX - panic?!?!? */
136			break;
137		}
138	}
139}
140
141static void
142add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
143{
144	struct cfg_redir *r, *ser_r;
145	struct cfg_spool *s, *ser_s;
146	int cnt, off, i;
147
148	for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
149		ser_r = (struct cfg_redir *)&buf[off];
150		r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
151		memcpy(r, ser_r, SOF_REDIR);
152		LIST_INIT(&r->spool_chain);
153		off += SOF_REDIR;
154		r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
155		    M_IPFW, M_WAITOK | M_ZERO);
156		switch (r->mode) {
157		case REDIR_ADDR:
158			r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
159			    r->paddr);
160			break;
161		case REDIR_PORT:
162			for (i = 0 ; i < r->pport_cnt; i++) {
163				/* If remotePort is all ports, set it to 0. */
164				u_short remotePortCopy = r->rport + i;
165				if (r->rport_cnt == 1 && r->rport == 0)
166					remotePortCopy = 0;
167				r->alink[i] = LibAliasRedirectPort(ptr->lib,
168				    r->laddr, htons(r->lport + i), r->raddr,
169				    htons(remotePortCopy), r->paddr,
170				    htons(r->pport + i), r->proto);
171				if (r->alink[i] == NULL) {
172					r->alink[0] = NULL;
173					break;
174				}
175			}
176			break;
177		case REDIR_PROTO:
178			r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
179			    r->raddr, r->paddr, r->proto);
180			break;
181		default:
182			printf("unknown redirect mode: %u\n", r->mode);
183			break;
184		}
185		/* XXX perhaps return an error instead of panic ? */
186		if (r->alink[0] == NULL)
187			panic("LibAliasRedirect* returned NULL");
188		/* LSNAT handling. */
189		for (i = 0; i < r->spool_cnt; i++) {
190			ser_s = (struct cfg_spool *)&buf[off];
191			s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
192			memcpy(s, ser_s, SOF_SPOOL);
193			LibAliasAddServer(ptr->lib, r->alink[0],
194			    s->addr, htons(s->port));
195			off += SOF_SPOOL;
196			/* Hook spool entry. */
197			LIST_INSERT_HEAD(&r->spool_chain, s, _next);
198		}
199		/* And finally hook this redir entry. */
200		LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
201	}
202}
203
204static int
205ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
206{
207	struct mbuf *mcl;
208	struct ip *ip;
209	/* XXX - libalias duct tape */
210	int ldt, retval;
211	char *c;
212
213	ldt = 0;
214	retval = 0;
215	mcl = m_megapullup(m, m->m_pkthdr.len);
216	if (mcl == NULL) {
217		args->m = NULL;
218		return (IP_FW_DENY);
219	}
220	ip = mtod(mcl, struct ip *);
221
222	/*
223	 * XXX - Libalias checksum offload 'duct tape':
224	 *
225	 * locally generated packets have only pseudo-header checksum
226	 * calculated and libalias will break it[1], so mark them for
227	 * later fix.  Moreover there are cases when libalias modifies
228	 * tcp packet data[2], mark them for later fix too.
229	 *
230	 * [1] libalias was never meant to run in kernel, so it does
231	 * not have any knowledge about checksum offloading, and
232	 * expects a packet with a full internet checksum.
233	 * Unfortunately, packets generated locally will have just the
234	 * pseudo header calculated, and when libalias tries to adjust
235	 * the checksum it will actually compute a wrong value.
236	 *
237	 * [2] when libalias modifies tcp's data content, full TCP
238	 * checksum has to be recomputed: the problem is that
239	 * libalias does not have any idea about checksum offloading.
240	 * To work around this, we do not do checksumming in LibAlias,
241	 * but only mark the packets in th_x2 field. If we receive a
242	 * marked packet, we calculate correct checksum for it
243	 * aware of offloading.  Why such a terrible hack instead of
244	 * recalculating checksum for each packet?
245	 * Because the previous checksum was not checked!
246	 * Recalculating checksums for EVERY packet will hide ALL
247	 * transmission errors. Yes, marked packets still suffer from
248	 * this problem. But, sigh, natd(8) has this problem, too.
249	 *
250	 * TODO: -make libalias mbuf aware (so
251	 * it can handle delayed checksum and tso)
252	 */
253
254	if (mcl->m_pkthdr.rcvif == NULL &&
255	    mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
256		ldt = 1;
257
258	c = mtod(mcl, char *);
259	if (args->oif == NULL)
260		retval = LibAliasIn(t->lib, c,
261			mcl->m_len + M_TRAILINGSPACE(mcl));
262	else
263		retval = LibAliasOut(t->lib, c,
264			mcl->m_len + M_TRAILINGSPACE(mcl));
265	if (retval == PKT_ALIAS_RESPOND) {
266		m->m_flags |= M_SKIP_FIREWALL;
267		retval = PKT_ALIAS_OK;
268	}
269	if (retval != PKT_ALIAS_OK &&
270	    retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
271		/* XXX - should i add some logging? */
272		m_free(mcl);
273		args->m = NULL;
274		return (IP_FW_DENY);
275	}
276	mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
277
278	/*
279	 * XXX - libalias checksum offload
280	 * 'duct tape' (see above)
281	 */
282
283	if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
284	    ip->ip_p == IPPROTO_TCP) {
285		struct tcphdr 	*th;
286
287		th = (struct tcphdr *)(ip + 1);
288		if (th->th_x2)
289			ldt = 1;
290	}
291
292	if (ldt) {
293		struct tcphdr 	*th;
294		struct udphdr 	*uh;
295		u_short cksum;
296
297		ip->ip_len = ntohs(ip->ip_len);
298		cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
299		    htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
300
301		switch (ip->ip_p) {
302		case IPPROTO_TCP:
303			th = (struct tcphdr *)(ip + 1);
304			/*
305			 * Maybe it was set in
306			 * libalias...
307			 */
308			th->th_x2 = 0;
309			th->th_sum = cksum;
310			mcl->m_pkthdr.csum_data =
311			    offsetof(struct tcphdr, th_sum);
312			break;
313		case IPPROTO_UDP:
314			uh = (struct udphdr *)(ip + 1);
315			uh->uh_sum = cksum;
316			mcl->m_pkthdr.csum_data =
317			    offsetof(struct udphdr, uh_sum);
318			break;
319		}
320		/* No hw checksum offloading: do it ourselves */
321		if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) {
322			in_delayed_cksum(mcl);
323			mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
324		}
325		ip->ip_len = htons(ip->ip_len);
326	}
327	args->m = mcl;
328	return (IP_FW_NAT);
329}
330
331static struct cfg_nat *
332lookup_nat(struct nat_list *l, int nat_id)
333{
334	struct cfg_nat *res;
335
336	LIST_FOREACH(res, l, _next) {
337		if (res->id == nat_id)
338			break;
339	}
340	return res;
341}
342
343static int
344ipfw_nat_cfg(struct sockopt *sopt)
345{
346	struct cfg_nat *cfg, *ptr;
347	char *buf;
348	struct ip_fw_chain *chain = &V_layer3_chain;
349	size_t len;
350	int gencnt, error = 0;
351
352	len = sopt->sopt_valsize;
353	buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
354	if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0)
355		goto out;
356
357	cfg = (struct cfg_nat *)buf;
358	if (cfg->id < 0) {
359		error = EINVAL;
360		goto out;
361	}
362
363	/*
364	 * Find/create nat rule.
365	 */
366	IPFW_WLOCK(chain);
367	gencnt = chain->gencnt;
368	ptr = lookup_nat(&chain->nat, cfg->id);
369	if (ptr == NULL) {
370		IPFW_WUNLOCK(chain);
371		/* New rule: allocate and init new instance. */
372		ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO);
373		ptr->lib = LibAliasInit(NULL);
374		LIST_INIT(&ptr->redir_chain);
375	} else {
376		/* Entry already present: temporarily unhook it. */
377		LIST_REMOVE(ptr, _next);
378		flush_nat_ptrs(chain, cfg->id);
379		IPFW_WUNLOCK(chain);
380	}
381
382	/*
383	 * Basic nat configuration.
384	 */
385	ptr->id = cfg->id;
386	/*
387	 * XXX - what if this rule doesn't nat any ip and just
388	 * redirect?
389	 * do we set aliasaddress to 0.0.0.0?
390	 */
391	ptr->ip = cfg->ip;
392	ptr->redir_cnt = cfg->redir_cnt;
393	ptr->mode = cfg->mode;
394	LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode);
395	LibAliasSetAddress(ptr->lib, ptr->ip);
396	memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
397
398	/*
399	 * Redir and LSNAT configuration.
400	 */
401	/* Delete old cfgs. */
402	del_redir_spool_cfg(ptr, &ptr->redir_chain);
403	/* Add new entries. */
404	add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
405
406	IPFW_WLOCK(chain);
407	/* Extra check to avoid race with another ipfw_nat_cfg() */
408	if (gencnt != chain->gencnt &&
409	    ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL))
410		LIST_REMOVE(cfg, _next);
411	LIST_INSERT_HEAD(&chain->nat, ptr, _next);
412	chain->gencnt++;
413	IPFW_WUNLOCK(chain);
414
415out:
416	free(buf, M_TEMP);
417	return (error);
418}
419
420static int
421ipfw_nat_del(struct sockopt *sopt)
422{
423	struct cfg_nat *ptr;
424	struct ip_fw_chain *chain = &V_layer3_chain;
425	int i;
426
427	sooptcopyin(sopt, &i, sizeof i, sizeof i);
428	/* XXX validate i */
429	IPFW_WLOCK(chain);
430	ptr = lookup_nat(&chain->nat, i);
431	if (ptr == NULL) {
432		IPFW_WUNLOCK(chain);
433		return (EINVAL);
434	}
435	LIST_REMOVE(ptr, _next);
436	flush_nat_ptrs(chain, i);
437	IPFW_WUNLOCK(chain);
438	del_redir_spool_cfg(ptr, &ptr->redir_chain);
439	LibAliasUninit(ptr->lib);
440	free(ptr, M_IPFW);
441	return (0);
442}
443
444static int
445ipfw_nat_get_cfg(struct sockopt *sopt)
446{
447	struct ip_fw_chain *chain = &V_layer3_chain;
448	struct cfg_nat *n;
449	struct cfg_redir *r;
450	struct cfg_spool *s;
451	char *data;
452	int gencnt, nat_cnt, len, error;
453
454	nat_cnt = 0;
455	len = sizeof(nat_cnt);
456
457	IPFW_RLOCK(chain);
458retry:
459	gencnt = chain->gencnt;
460	/* Estimate memory amount */
461	LIST_FOREACH(n, &chain->nat, _next) {
462		nat_cnt++;
463		len += sizeof(struct cfg_nat);
464		LIST_FOREACH(r, &n->redir_chain, _next) {
465			len += sizeof(struct cfg_redir);
466			LIST_FOREACH(s, &r->spool_chain, _next)
467				len += sizeof(struct cfg_spool);
468		}
469	}
470	IPFW_RUNLOCK(chain);
471
472	data = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
473	bcopy(&nat_cnt, data, sizeof(nat_cnt));
474
475	nat_cnt = 0;
476	len = sizeof(nat_cnt);
477
478	IPFW_RLOCK(chain);
479	if (gencnt != chain->gencnt) {
480		free(data, M_TEMP);
481		goto retry;
482	}
483	/* Serialize all the data. */
484	LIST_FOREACH(n, &chain->nat, _next) {
485		bcopy(n, &data[len], sizeof(struct cfg_nat));
486		len += sizeof(struct cfg_nat);
487		LIST_FOREACH(r, &n->redir_chain, _next) {
488			bcopy(r, &data[len], sizeof(struct cfg_redir));
489			len += sizeof(struct cfg_redir);
490			LIST_FOREACH(s, &r->spool_chain, _next) {
491				bcopy(s, &data[len], sizeof(struct cfg_spool));
492				len += sizeof(struct cfg_spool);
493			}
494		}
495	}
496	IPFW_RUNLOCK(chain);
497
498	error = sooptcopyout(sopt, data, len);
499	free(data, M_TEMP);
500
501	return (error);
502}
503
504static int
505ipfw_nat_get_log(struct sockopt *sopt)
506{
507	uint8_t *data;
508	struct cfg_nat *ptr;
509	int i, size;
510	struct ip_fw_chain *chain;
511
512	chain = &V_layer3_chain;
513
514	IPFW_RLOCK(chain);
515	/* one pass to count, one to copy the data */
516	i = 0;
517	LIST_FOREACH(ptr, &chain->nat, _next) {
518		if (ptr->lib->logDesc == NULL)
519			continue;
520		i++;
521	}
522	size = i * (LIBALIAS_BUF_SIZE + sizeof(int));
523	data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO);
524	if (data == NULL) {
525		IPFW_RUNLOCK(chain);
526		return (ENOSPC);
527	}
528	i = 0;
529	LIST_FOREACH(ptr, &chain->nat, _next) {
530		if (ptr->lib->logDesc == NULL)
531			continue;
532		bcopy(&ptr->id, &data[i], sizeof(int));
533		i += sizeof(int);
534		bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE);
535		i += LIBALIAS_BUF_SIZE;
536	}
537	IPFW_RUNLOCK(chain);
538	sooptcopyout(sopt, data, size);
539	free(data, M_IPFW);
540	return(0);
541}
542
543static void
544ipfw_nat_init(void)
545{
546
547	IPFW_WLOCK(&V_layer3_chain);
548	/* init ipfw hooks */
549	ipfw_nat_ptr = ipfw_nat;
550	lookup_nat_ptr = lookup_nat;
551	ipfw_nat_cfg_ptr = ipfw_nat_cfg;
552	ipfw_nat_del_ptr = ipfw_nat_del;
553	ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
554	ipfw_nat_get_log_ptr = ipfw_nat_get_log;
555	IPFW_WUNLOCK(&V_layer3_chain);
556	V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
557	    ifaddr_event, ifaddr_change,
558	    NULL, EVENTHANDLER_PRI_ANY);
559}
560
561static void
562ipfw_nat_destroy(void)
563{
564	struct cfg_nat *ptr, *ptr_temp;
565	struct ip_fw_chain *chain;
566
567	chain = &V_layer3_chain;
568	IPFW_WLOCK(chain);
569	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
570		LIST_REMOVE(ptr, _next);
571		del_redir_spool_cfg(ptr, &ptr->redir_chain);
572		LibAliasUninit(ptr->lib);
573		free(ptr, M_IPFW);
574	}
575	EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
576	flush_nat_ptrs(chain, -1 /* flush all */);
577	/* deregister ipfw_nat */
578	ipfw_nat_ptr = NULL;
579	lookup_nat_ptr = NULL;
580	ipfw_nat_cfg_ptr = NULL;
581	ipfw_nat_del_ptr = NULL;
582	ipfw_nat_get_cfg_ptr = NULL;
583	ipfw_nat_get_log_ptr = NULL;
584	IPFW_WUNLOCK(chain);
585}
586
587static int
588ipfw_nat_modevent(module_t mod, int type, void *unused)
589{
590	int err = 0;
591
592	switch (type) {
593	case MOD_LOAD:
594		ipfw_nat_init();
595		break;
596
597	case MOD_UNLOAD:
598		ipfw_nat_destroy();
599		break;
600
601	default:
602		return EOPNOTSUPP;
603		break;
604	}
605	return err;
606}
607
608static moduledata_t ipfw_nat_mod = {
609	"ipfw_nat",
610	ipfw_nat_modevent,
611	0
612};
613
614DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
615MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
616MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
617MODULE_VERSION(ipfw_nat, 1);
618/* end of file */
619