1/*
2 * (C) 2006-2012 by Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 *
18 * Part of this code has been sponsored by Vyatta Inc. <http://www.vyatta.com>
19 */
20
21#include "conntrackd.h"
22#include "netlink.h"
23#include "filter.h"
24#include "log.h"
25#include "alarm.h"
26#include "fds.h"
27#include "traffic_stats.h"
28#include "process.h"
29#include "origin.h"
30#include "date.h"
31#include "internal.h"
32
33#include <errno.h>
34#include <signal.h>
35#include <stdlib.h>
36#include <unistd.h>
37#include <sys/wait.h>
38#include <string.h>
39#include <time.h>
40#include <fcntl.h>
41
42void ctnl_kill(void)
43{
44	if (!(CONFIG(flags) & CTD_POLL))
45		nfct_close(STATE(event));
46
47	nfct_close(STATE(resync));
48	nfct_close(STATE(get));
49	origin_unregister(STATE(flush));
50	nfct_close(STATE(flush));
51
52	if (STATE(us_filter))
53		ct_filter_destroy(STATE(us_filter));
54	STATE(mode)->kill();
55
56	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
57		nfct_close(STATE(dump));
58	}
59}
60
61static void local_flush_master(void)
62{
63	STATE(stats).nl_kernel_table_flush++;
64	dlog(LOG_NOTICE, "flushing kernel conntrack table");
65
66	/* fork a child process that performs the flush operation,
67	 * meanwhile the parent process handles events. */
68	if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL,
69			     NULL, NULL) == 0) {
70		nl_flush_conntrack_table_selective();
71		exit(EXIT_SUCCESS);
72	}
73}
74
75static void local_resync_master(void)
76{
77	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
78		STATE(stats).nl_kernel_table_resync++;
79		dlog(LOG_NOTICE, "resync with master conntrack table");
80		nl_dump_conntrack_table(STATE(dump));
81	} else {
82		dlog(LOG_NOTICE, "resync is unsupported in this mode");
83	}
84}
85
86static void local_exp_flush_master(void)
87{
88	if (!(CONFIG(flags) & CTD_EXPECT))
89		return;
90
91	STATE(stats).nl_kernel_table_flush++;
92	dlog(LOG_NOTICE, "flushing kernel expect table");
93
94	/* fork a child process that performs the flush operation,
95	 * meanwhile the parent process handles events. */
96	if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL,
97			     NULL, NULL) == 0) {
98		nl_flush_expect_table(STATE(flush));
99		exit(EXIT_SUCCESS);
100	}
101}
102
103static void local_exp_resync_master(void)
104{
105	if (!(CONFIG(flags) & CTD_EXPECT))
106		return;
107
108	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
109		STATE(stats).nl_kernel_table_resync++;
110		dlog(LOG_NOTICE, "resync with master expect table");
111		nl_dump_expect_table(STATE(dump));
112	} else {
113		dlog(LOG_NOTICE, "resync is unsupported in this mode");
114	}
115}
116
117int ctnl_local(int fd, int type, void *data)
118{
119	int ret = LOCAL_RET_OK;
120
121	switch(type) {
122	case CT_FLUSH_MASTER:
123		local_flush_master();
124		break;
125	case CT_RESYNC_MASTER:
126		local_resync_master();
127		break;
128	case EXP_FLUSH_MASTER:
129		local_exp_flush_master();
130		break;
131	case EXP_RESYNC_MASTER:
132		local_exp_resync_master();
133		break;
134	case ALL_FLUSH_MASTER:
135		local_flush_master();
136		local_exp_flush_master();
137		break;
138	case ALL_RESYNC_MASTER:
139		local_resync_master();
140		local_exp_resync_master();
141		break;
142	}
143
144	ret = STATE(mode)->local(fd, type, data);
145	if (ret == LOCAL_RET_ERROR) {
146		STATE(stats).local_unknown_request++;
147		return LOCAL_RET_ERROR;
148	}
149	return ret;
150}
151
152static void do_overrun_resync_alarm(struct alarm_block *a, void *data)
153{
154	nl_send_resync(STATE(resync));
155	STATE(stats).nl_kernel_table_resync++;
156}
157
158static void do_polling_alarm(struct alarm_block *a, void *data)
159{
160	if (STATE(mode)->internal->ct.purge)
161		STATE(mode)->internal->ct.purge();
162
163	if (STATE(mode)->internal->exp.purge)
164		STATE(mode)->internal->exp.purge();
165
166	nl_send_resync(STATE(resync));
167	if (CONFIG(flags) & CTD_EXPECT)
168		nl_send_expect_resync(STATE(resync));
169
170	add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0);
171}
172
173static int event_handler(const struct nlmsghdr *nlh,
174			 enum nf_conntrack_msg_type type,
175			 struct nf_conntrack *ct,
176			 void *data)
177{
178	int origin_type;
179
180	STATE(stats).nl_events_received++;
181
182	/* skip user-space filtering if already do it in the kernel */
183	if (ct_filter_conntrack(ct, !CONFIG(filter_from_kernelspace))) {
184		STATE(stats).nl_events_filtered++;
185		goto out;
186	}
187
188	origin_type = origin_find(nlh);
189
190	switch(type) {
191	case NFCT_T_NEW:
192		STATE(mode)->internal->ct.new(ct, origin_type);
193		break;
194	case NFCT_T_UPDATE:
195		STATE(mode)->internal->ct.upd(ct, origin_type);
196		break;
197	case NFCT_T_DESTROY:
198		if (STATE(mode)->internal->ct.del(ct, origin_type))
199			update_traffic_stats(ct);
200		break;
201	default:
202		STATE(stats).nl_events_unknown_type++;
203		break;
204	}
205
206out:
207	/* we reset the iteration limiter in the main select loop. */
208	if (STATE(event_iterations_limit)-- <= 0)
209		return NFCT_CB_STOP;
210	else
211		return NFCT_CB_CONTINUE;
212}
213
214static int exp_event_handler(const struct nlmsghdr *nlh,
215			     enum nf_conntrack_msg_type type,
216			     struct nf_expect *exp,
217			     void *data)
218{
219	int origin_type;
220	const struct nf_conntrack *master =
221		nfexp_get_attr(exp, ATTR_EXP_MASTER);
222
223	STATE(stats).nl_events_received++;
224
225	if (!exp_filter_find(STATE(exp_filter), exp)) {
226		STATE(stats).nl_events_filtered++;
227		goto out;
228	}
229	if (ct_filter_conntrack(master, 1))
230		return NFCT_CB_CONTINUE;
231
232	origin_type = origin_find(nlh);
233
234	switch(type) {
235	case NFCT_T_NEW:
236		STATE(mode)->internal->exp.new(exp, origin_type);
237		break;
238	case NFCT_T_UPDATE:
239		STATE(mode)->internal->exp.upd(exp, origin_type);
240		break;
241	case NFCT_T_DESTROY:
242		STATE(mode)->internal->exp.del(exp, origin_type);
243		break;
244	default:
245		STATE(stats).nl_events_unknown_type++;
246		break;
247	}
248
249out:
250	/* we reset the iteration limiter in the main select loop. */
251	if (STATE(event_iterations_limit)-- <= 0)
252		return NFCT_CB_STOP;
253	else
254		return NFCT_CB_CONTINUE;
255}
256
257static int dump_handler(enum nf_conntrack_msg_type type,
258			struct nf_conntrack *ct,
259			void *data)
260{
261	if (ct_filter_conntrack(ct, 1))
262		return NFCT_CB_CONTINUE;
263
264	switch(type) {
265	case NFCT_T_UPDATE:
266		STATE(mode)->internal->ct.populate(ct);
267		break;
268	default:
269		STATE(stats).nl_dump_unknown_type++;
270		break;
271	}
272	return NFCT_CB_CONTINUE;
273}
274
275static int exp_dump_handler(enum nf_conntrack_msg_type type,
276			    struct nf_expect *exp, void *data)
277{
278	const struct nf_conntrack *master =
279		nfexp_get_attr(exp, ATTR_EXP_MASTER);
280
281	if (!exp_filter_find(STATE(exp_filter), exp))
282		return NFCT_CB_CONTINUE;
283
284	if (ct_filter_conntrack(master, 1))
285		return NFCT_CB_CONTINUE;
286
287	switch(type) {
288	case NFCT_T_UPDATE:
289		STATE(mode)->internal->exp.populate(exp);
290		break;
291	default:
292		STATE(stats).nl_dump_unknown_type++;
293		break;
294	}
295	return NFCT_CB_CONTINUE;
296}
297
298static int get_handler(enum nf_conntrack_msg_type type,
299		       struct nf_conntrack *ct,
300		       void *data)
301{
302	if (ct_filter_conntrack(ct, 1))
303		return NFCT_CB_CONTINUE;
304
305	STATE(get_retval) = 1;
306	return NFCT_CB_CONTINUE;
307}
308
309static int exp_get_handler(enum nf_conntrack_msg_type type,
310			   struct nf_expect *exp, void *data)
311{
312	const struct nf_conntrack *master =
313		nfexp_get_attr(exp, ATTR_EXP_MASTER);
314
315	if (!exp_filter_find(STATE(exp_filter), exp))
316		return NFCT_CB_CONTINUE;
317
318	if (ct_filter_conntrack(master, 1))
319		return NFCT_CB_CONTINUE;
320
321	STATE(get_retval) = 1;
322	return NFCT_CB_CONTINUE;
323}
324
325/* we have received an event from ctnetlink */
326static void event_cb(void *data)
327{
328	int ret;
329
330	ret = nfct_catch(STATE(event));
331	/* reset event iteration limit counter */
332	STATE(event_iterations_limit) = CONFIG(event_iterations_limit);
333	if (ret == -1) {
334		switch(errno) {
335		case ENOBUFS:
336			/* We have hit ENOBUFS, it's likely that we are
337			 * losing events. Two possible situations may
338			 * trigger this error:
339			 *
340			 * 1) The netlink receiver buffer is too small:
341			 *    increasing the netlink buffer size should
342			 *    be enough. However, some event messages
343			 *    got lost. We have to resync ourselves
344			 *    with the kernel table conntrack table to
345			 *    resolve the inconsistency.
346			 *
347			 * 2) The receiver is too slow to process the
348			 *    netlink messages so that the queue gets
349			 *    full quickly. This generally happens
350			 *    if the system is under heavy workload
351			 *    (busy CPU). In this case, increasing the
352			 *    size of the netlink receiver buffer
353			 *    would not help anymore since we would
354			 *    be delaying the overrun. Moreover, we
355			 *    should avoid resynchronizations. We
356			 *    should do our best here and keep
357			 *    replicating as much states as possible.
358			 *    If workload lowers at some point,
359			 *    we resync ourselves.
360			 */
361			nl_resize_socket_buffer(STATE(event));
362			if (CONFIG(nl_overrun_resync) > 0 &&
363			    STATE(mode)->internal->flags & INTERNAL_F_RESYNC) {
364				add_alarm(&STATE(resync_alarm),
365					  CONFIG(nl_overrun_resync),0);
366			}
367			STATE(stats).nl_catch_event_failed++;
368			STATE(stats).nl_overrun++;
369			break;
370		case ENOENT:
371			/*
372			 * We received a message from another
373			 * netfilter subsystem that we are not
374			 * interested in. Just ignore it.
375			 */
376		break;
377		case EAGAIN:
378			/* No more events to receive, try later. */
379			break;
380		default:
381			STATE(stats).nl_catch_event_failed++;
382			break;
383		}
384	}
385}
386
387/* we previously requested a resync due to buffer overrun. */
388static void resync_cb(void *data)
389{
390	nfct_catch(STATE(resync));
391	if (STATE(mode)->internal->ct.purge)
392		STATE(mode)->internal->ct.purge();
393}
394
395static void poll_cb(void *data)
396{
397	nfct_catch(STATE(resync));
398}
399
400int ctnl_init(void)
401{
402	if (CONFIG(flags) & CTD_STATS_MODE)
403		STATE(mode) = &stats_mode;
404	else if (CONFIG(flags) & CTD_SYNC_MODE)
405		STATE(mode) = &sync_mode;
406	else {
407		fprintf(stderr, "WARNING: No running mode specified. "
408				"Defaulting to statistics mode.\n");
409		CONFIG(flags) |= CTD_STATS_MODE;
410		STATE(mode) = &stats_mode;
411	}
412
413	/* Initialization */
414	if (STATE(mode)->init() == -1) {
415		dlog(LOG_ERR, "initialization failed");
416		return -1;
417	}
418
419	/* resynchronize (like 'dump' socket) but it also purges old entries */
420	STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);
421	if (STATE(resync)== NULL) {
422		dlog(LOG_ERR, "can't open netlink handler: %s",
423		     strerror(errno));
424		dlog(LOG_ERR, "no ctnetlink kernel support?");
425		return -1;
426	}
427	nfct_callback_register(STATE(resync),
428			       NFCT_T_ALL,
429			       STATE(mode)->internal->ct.resync,
430			       NULL);
431	if (CONFIG(flags) & CTD_POLL) {
432		register_fd(nfct_fd(STATE(resync)), poll_cb,
433				NULL, STATE(fds));
434	} else {
435		register_fd(nfct_fd(STATE(resync)), resync_cb,
436				NULL, STATE(fds));
437	}
438	fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);
439
440	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
441		STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);
442		if (STATE(dump) == NULL) {
443			dlog(LOG_ERR, "can't open netlink handler: %s",
444			     strerror(errno));
445			dlog(LOG_ERR, "no ctnetlink kernel support?");
446			return -1;
447		}
448		nfct_callback_register(STATE(dump), NFCT_T_ALL,
449				       dump_handler, NULL);
450
451		if (CONFIG(flags) & CTD_EXPECT) {
452			nfexp_callback_register(STATE(dump), NFCT_T_ALL,
453						exp_dump_handler, NULL);
454		}
455
456		if (nl_dump_conntrack_table(STATE(dump)) == -1) {
457			dlog(LOG_ERR, "can't get kernel conntrack table");
458			return -1;
459		}
460
461		if (CONFIG(flags) & CTD_EXPECT) {
462			if (nl_dump_expect_table(STATE(dump)) == -1) {
463				dlog(LOG_ERR, "can't get kernel "
464					      "expect table");
465				return -1;
466			}
467		}
468	}
469
470	STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);
471	if (STATE(get) == NULL) {
472		dlog(LOG_ERR, "can't open netlink handler: %s",
473		     strerror(errno));
474		dlog(LOG_ERR, "no ctnetlink kernel support?");
475		return -1;
476	}
477	nfct_callback_register(STATE(get), NFCT_T_ALL, get_handler, NULL);
478
479	if (CONFIG(flags) & CTD_EXPECT) {
480		nfexp_callback_register(STATE(get), NFCT_T_ALL,
481					exp_get_handler, NULL);
482	}
483
484	STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);
485	if (STATE(flush) == NULL) {
486		dlog(LOG_ERR, "cannot open flusher handler");
487		return -1;
488	}
489	/* register this handler as the origin of a flush operation */
490	origin_register(STATE(flush), CTD_ORIGIN_FLUSH);
491
492	if (CONFIG(flags) & CTD_POLL) {
493		init_alarm(&STATE(polling_alarm), NULL, do_polling_alarm);
494		add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0);
495		dlog(LOG_NOTICE, "running in polling mode");
496	} else {
497		init_alarm(&STATE(resync_alarm), NULL, do_overrun_resync_alarm);
498		/*
499		 * The last nfct handler that we register is the event handler.
500		 * The reason to do this is that we may receive events while
501		 * populating the internal cache. Thus, we hit ENOBUFS
502		 * prematurely. However, if we open the event handler before
503		 * populating the internal cache, we may still lose events
504		 * that have occured during the population.
505		 */
506		STATE(event) = nl_init_event_handler();
507		if (STATE(event) == NULL) {
508			dlog(LOG_ERR, "can't open netlink handler: %s",
509			     strerror(errno));
510			dlog(LOG_ERR, "no ctnetlink kernel support?");
511			return -1;
512		}
513		nfct_callback_register2(STATE(event), NFCT_T_ALL,
514				        event_handler, NULL);
515
516		if (CONFIG(flags) & CTD_EXPECT) {
517			nfexp_callback_register2(STATE(event), NFCT_T_ALL,
518						 exp_event_handler, NULL);
519		}
520		register_fd(nfct_fd(STATE(event)), event_cb, NULL, STATE(fds));
521	}
522
523	return 0;
524}
525