1/* 2 * (C) 2006-2012 by Pablo Neira Ayuso <pablo@netfilter.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 * 18 * Part of this code has been sponsored by Vyatta Inc. <http://www.vyatta.com> 19 */ 20 21#include "conntrackd.h" 22#include "netlink.h" 23#include "filter.h" 24#include "log.h" 25#include "alarm.h" 26#include "fds.h" 27#include "traffic_stats.h" 28#include "process.h" 29#include "origin.h" 30#include "date.h" 31#include "internal.h" 32 33#include <errno.h> 34#include <signal.h> 35#include <stdlib.h> 36#include <unistd.h> 37#include <sys/wait.h> 38#include <string.h> 39#include <time.h> 40#include <fcntl.h> 41 42void ctnl_kill(void) 43{ 44 if (!(CONFIG(flags) & CTD_POLL)) 45 nfct_close(STATE(event)); 46 47 nfct_close(STATE(resync)); 48 nfct_close(STATE(get)); 49 origin_unregister(STATE(flush)); 50 nfct_close(STATE(flush)); 51 52 if (STATE(us_filter)) 53 ct_filter_destroy(STATE(us_filter)); 54 STATE(mode)->kill(); 55 56 if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { 57 nfct_close(STATE(dump)); 58 } 59} 60 61static void local_flush_master(void) 62{ 63 STATE(stats).nl_kernel_table_flush++; 64 dlog(LOG_NOTICE, "flushing kernel conntrack table"); 65 66 /* fork a child process that performs the flush operation, 67 * meanwhile the parent process handles events. */ 68 if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, 69 NULL, NULL) == 0) { 70 nl_flush_conntrack_table_selective(); 71 exit(EXIT_SUCCESS); 72 } 73} 74 75static void local_resync_master(void) 76{ 77 if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { 78 STATE(stats).nl_kernel_table_resync++; 79 dlog(LOG_NOTICE, "resync with master conntrack table"); 80 nl_dump_conntrack_table(STATE(dump)); 81 } else { 82 dlog(LOG_NOTICE, "resync is unsupported in this mode"); 83 } 84} 85 86static void local_exp_flush_master(void) 87{ 88 if (!(CONFIG(flags) & CTD_EXPECT)) 89 return; 90 91 STATE(stats).nl_kernel_table_flush++; 92 dlog(LOG_NOTICE, "flushing kernel expect table"); 93 94 /* fork a child process that performs the flush operation, 95 * meanwhile the parent process handles events. */ 96 if (fork_process_new(CTD_PROC_FLUSH, CTD_PROC_F_EXCL, 97 NULL, NULL) == 0) { 98 nl_flush_expect_table(STATE(flush)); 99 exit(EXIT_SUCCESS); 100 } 101} 102 103static void local_exp_resync_master(void) 104{ 105 if (!(CONFIG(flags) & CTD_EXPECT)) 106 return; 107 108 if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { 109 STATE(stats).nl_kernel_table_resync++; 110 dlog(LOG_NOTICE, "resync with master expect table"); 111 nl_dump_expect_table(STATE(dump)); 112 } else { 113 dlog(LOG_NOTICE, "resync is unsupported in this mode"); 114 } 115} 116 117int ctnl_local(int fd, int type, void *data) 118{ 119 int ret = LOCAL_RET_OK; 120 121 switch(type) { 122 case CT_FLUSH_MASTER: 123 local_flush_master(); 124 break; 125 case CT_RESYNC_MASTER: 126 local_resync_master(); 127 break; 128 case EXP_FLUSH_MASTER: 129 local_exp_flush_master(); 130 break; 131 case EXP_RESYNC_MASTER: 132 local_exp_resync_master(); 133 break; 134 case ALL_FLUSH_MASTER: 135 local_flush_master(); 136 local_exp_flush_master(); 137 break; 138 case ALL_RESYNC_MASTER: 139 local_resync_master(); 140 local_exp_resync_master(); 141 break; 142 } 143 144 ret = STATE(mode)->local(fd, type, data); 145 if (ret == LOCAL_RET_ERROR) { 146 STATE(stats).local_unknown_request++; 147 return LOCAL_RET_ERROR; 148 } 149 return ret; 150} 151 152static void do_overrun_resync_alarm(struct alarm_block *a, void *data) 153{ 154 nl_send_resync(STATE(resync)); 155 STATE(stats).nl_kernel_table_resync++; 156} 157 158static void do_polling_alarm(struct alarm_block *a, void *data) 159{ 160 if (STATE(mode)->internal->ct.purge) 161 STATE(mode)->internal->ct.purge(); 162 163 if (STATE(mode)->internal->exp.purge) 164 STATE(mode)->internal->exp.purge(); 165 166 nl_send_resync(STATE(resync)); 167 if (CONFIG(flags) & CTD_EXPECT) 168 nl_send_expect_resync(STATE(resync)); 169 170 add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0); 171} 172 173static int event_handler(const struct nlmsghdr *nlh, 174 enum nf_conntrack_msg_type type, 175 struct nf_conntrack *ct, 176 void *data) 177{ 178 int origin_type; 179 180 STATE(stats).nl_events_received++; 181 182 /* skip user-space filtering if already do it in the kernel */ 183 if (ct_filter_conntrack(ct, !CONFIG(filter_from_kernelspace))) { 184 STATE(stats).nl_events_filtered++; 185 goto out; 186 } 187 188 origin_type = origin_find(nlh); 189 190 switch(type) { 191 case NFCT_T_NEW: 192 STATE(mode)->internal->ct.new(ct, origin_type); 193 break; 194 case NFCT_T_UPDATE: 195 STATE(mode)->internal->ct.upd(ct, origin_type); 196 break; 197 case NFCT_T_DESTROY: 198 if (STATE(mode)->internal->ct.del(ct, origin_type)) 199 update_traffic_stats(ct); 200 break; 201 default: 202 STATE(stats).nl_events_unknown_type++; 203 break; 204 } 205 206out: 207 /* we reset the iteration limiter in the main select loop. */ 208 if (STATE(event_iterations_limit)-- <= 0) 209 return NFCT_CB_STOP; 210 else 211 return NFCT_CB_CONTINUE; 212} 213 214static int exp_event_handler(const struct nlmsghdr *nlh, 215 enum nf_conntrack_msg_type type, 216 struct nf_expect *exp, 217 void *data) 218{ 219 int origin_type; 220 const struct nf_conntrack *master = 221 nfexp_get_attr(exp, ATTR_EXP_MASTER); 222 223 STATE(stats).nl_events_received++; 224 225 if (!exp_filter_find(STATE(exp_filter), exp)) { 226 STATE(stats).nl_events_filtered++; 227 goto out; 228 } 229 if (ct_filter_conntrack(master, 1)) 230 return NFCT_CB_CONTINUE; 231 232 origin_type = origin_find(nlh); 233 234 switch(type) { 235 case NFCT_T_NEW: 236 STATE(mode)->internal->exp.new(exp, origin_type); 237 break; 238 case NFCT_T_UPDATE: 239 STATE(mode)->internal->exp.upd(exp, origin_type); 240 break; 241 case NFCT_T_DESTROY: 242 STATE(mode)->internal->exp.del(exp, origin_type); 243 break; 244 default: 245 STATE(stats).nl_events_unknown_type++; 246 break; 247 } 248 249out: 250 /* we reset the iteration limiter in the main select loop. */ 251 if (STATE(event_iterations_limit)-- <= 0) 252 return NFCT_CB_STOP; 253 else 254 return NFCT_CB_CONTINUE; 255} 256 257static int dump_handler(enum nf_conntrack_msg_type type, 258 struct nf_conntrack *ct, 259 void *data) 260{ 261 if (ct_filter_conntrack(ct, 1)) 262 return NFCT_CB_CONTINUE; 263 264 switch(type) { 265 case NFCT_T_UPDATE: 266 STATE(mode)->internal->ct.populate(ct); 267 break; 268 default: 269 STATE(stats).nl_dump_unknown_type++; 270 break; 271 } 272 return NFCT_CB_CONTINUE; 273} 274 275static int exp_dump_handler(enum nf_conntrack_msg_type type, 276 struct nf_expect *exp, void *data) 277{ 278 const struct nf_conntrack *master = 279 nfexp_get_attr(exp, ATTR_EXP_MASTER); 280 281 if (!exp_filter_find(STATE(exp_filter), exp)) 282 return NFCT_CB_CONTINUE; 283 284 if (ct_filter_conntrack(master, 1)) 285 return NFCT_CB_CONTINUE; 286 287 switch(type) { 288 case NFCT_T_UPDATE: 289 STATE(mode)->internal->exp.populate(exp); 290 break; 291 default: 292 STATE(stats).nl_dump_unknown_type++; 293 break; 294 } 295 return NFCT_CB_CONTINUE; 296} 297 298static int get_handler(enum nf_conntrack_msg_type type, 299 struct nf_conntrack *ct, 300 void *data) 301{ 302 if (ct_filter_conntrack(ct, 1)) 303 return NFCT_CB_CONTINUE; 304 305 STATE(get_retval) = 1; 306 return NFCT_CB_CONTINUE; 307} 308 309static int exp_get_handler(enum nf_conntrack_msg_type type, 310 struct nf_expect *exp, void *data) 311{ 312 const struct nf_conntrack *master = 313 nfexp_get_attr(exp, ATTR_EXP_MASTER); 314 315 if (!exp_filter_find(STATE(exp_filter), exp)) 316 return NFCT_CB_CONTINUE; 317 318 if (ct_filter_conntrack(master, 1)) 319 return NFCT_CB_CONTINUE; 320 321 STATE(get_retval) = 1; 322 return NFCT_CB_CONTINUE; 323} 324 325/* we have received an event from ctnetlink */ 326static void event_cb(void *data) 327{ 328 int ret; 329 330 ret = nfct_catch(STATE(event)); 331 /* reset event iteration limit counter */ 332 STATE(event_iterations_limit) = CONFIG(event_iterations_limit); 333 if (ret == -1) { 334 switch(errno) { 335 case ENOBUFS: 336 /* We have hit ENOBUFS, it's likely that we are 337 * losing events. Two possible situations may 338 * trigger this error: 339 * 340 * 1) The netlink receiver buffer is too small: 341 * increasing the netlink buffer size should 342 * be enough. However, some event messages 343 * got lost. We have to resync ourselves 344 * with the kernel table conntrack table to 345 * resolve the inconsistency. 346 * 347 * 2) The receiver is too slow to process the 348 * netlink messages so that the queue gets 349 * full quickly. This generally happens 350 * if the system is under heavy workload 351 * (busy CPU). In this case, increasing the 352 * size of the netlink receiver buffer 353 * would not help anymore since we would 354 * be delaying the overrun. Moreover, we 355 * should avoid resynchronizations. We 356 * should do our best here and keep 357 * replicating as much states as possible. 358 * If workload lowers at some point, 359 * we resync ourselves. 360 */ 361 nl_resize_socket_buffer(STATE(event)); 362 if (CONFIG(nl_overrun_resync) > 0 && 363 STATE(mode)->internal->flags & INTERNAL_F_RESYNC) { 364 add_alarm(&STATE(resync_alarm), 365 CONFIG(nl_overrun_resync),0); 366 } 367 STATE(stats).nl_catch_event_failed++; 368 STATE(stats).nl_overrun++; 369 break; 370 case ENOENT: 371 /* 372 * We received a message from another 373 * netfilter subsystem that we are not 374 * interested in. Just ignore it. 375 */ 376 break; 377 case EAGAIN: 378 /* No more events to receive, try later. */ 379 break; 380 default: 381 STATE(stats).nl_catch_event_failed++; 382 break; 383 } 384 } 385} 386 387/* we previously requested a resync due to buffer overrun. */ 388static void resync_cb(void *data) 389{ 390 nfct_catch(STATE(resync)); 391 if (STATE(mode)->internal->ct.purge) 392 STATE(mode)->internal->ct.purge(); 393} 394 395static void poll_cb(void *data) 396{ 397 nfct_catch(STATE(resync)); 398} 399 400int ctnl_init(void) 401{ 402 if (CONFIG(flags) & CTD_STATS_MODE) 403 STATE(mode) = &stats_mode; 404 else if (CONFIG(flags) & CTD_SYNC_MODE) 405 STATE(mode) = &sync_mode; 406 else { 407 fprintf(stderr, "WARNING: No running mode specified. " 408 "Defaulting to statistics mode.\n"); 409 CONFIG(flags) |= CTD_STATS_MODE; 410 STATE(mode) = &stats_mode; 411 } 412 413 /* Initialization */ 414 if (STATE(mode)->init() == -1) { 415 dlog(LOG_ERR, "initialization failed"); 416 return -1; 417 } 418 419 /* resynchronize (like 'dump' socket) but it also purges old entries */ 420 STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); 421 if (STATE(resync)== NULL) { 422 dlog(LOG_ERR, "can't open netlink handler: %s", 423 strerror(errno)); 424 dlog(LOG_ERR, "no ctnetlink kernel support?"); 425 return -1; 426 } 427 nfct_callback_register(STATE(resync), 428 NFCT_T_ALL, 429 STATE(mode)->internal->ct.resync, 430 NULL); 431 if (CONFIG(flags) & CTD_POLL) { 432 register_fd(nfct_fd(STATE(resync)), poll_cb, 433 NULL, STATE(fds)); 434 } else { 435 register_fd(nfct_fd(STATE(resync)), resync_cb, 436 NULL, STATE(fds)); 437 } 438 fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); 439 440 if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { 441 STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); 442 if (STATE(dump) == NULL) { 443 dlog(LOG_ERR, "can't open netlink handler: %s", 444 strerror(errno)); 445 dlog(LOG_ERR, "no ctnetlink kernel support?"); 446 return -1; 447 } 448 nfct_callback_register(STATE(dump), NFCT_T_ALL, 449 dump_handler, NULL); 450 451 if (CONFIG(flags) & CTD_EXPECT) { 452 nfexp_callback_register(STATE(dump), NFCT_T_ALL, 453 exp_dump_handler, NULL); 454 } 455 456 if (nl_dump_conntrack_table(STATE(dump)) == -1) { 457 dlog(LOG_ERR, "can't get kernel conntrack table"); 458 return -1; 459 } 460 461 if (CONFIG(flags) & CTD_EXPECT) { 462 if (nl_dump_expect_table(STATE(dump)) == -1) { 463 dlog(LOG_ERR, "can't get kernel " 464 "expect table"); 465 return -1; 466 } 467 } 468 } 469 470 STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); 471 if (STATE(get) == NULL) { 472 dlog(LOG_ERR, "can't open netlink handler: %s", 473 strerror(errno)); 474 dlog(LOG_ERR, "no ctnetlink kernel support?"); 475 return -1; 476 } 477 nfct_callback_register(STATE(get), NFCT_T_ALL, get_handler, NULL); 478 479 if (CONFIG(flags) & CTD_EXPECT) { 480 nfexp_callback_register(STATE(get), NFCT_T_ALL, 481 exp_get_handler, NULL); 482 } 483 484 STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); 485 if (STATE(flush) == NULL) { 486 dlog(LOG_ERR, "cannot open flusher handler"); 487 return -1; 488 } 489 /* register this handler as the origin of a flush operation */ 490 origin_register(STATE(flush), CTD_ORIGIN_FLUSH); 491 492 if (CONFIG(flags) & CTD_POLL) { 493 init_alarm(&STATE(polling_alarm), NULL, do_polling_alarm); 494 add_alarm(&STATE(polling_alarm), CONFIG(poll_kernel_secs), 0); 495 dlog(LOG_NOTICE, "running in polling mode"); 496 } else { 497 init_alarm(&STATE(resync_alarm), NULL, do_overrun_resync_alarm); 498 /* 499 * The last nfct handler that we register is the event handler. 500 * The reason to do this is that we may receive events while 501 * populating the internal cache. Thus, we hit ENOBUFS 502 * prematurely. However, if we open the event handler before 503 * populating the internal cache, we may still lose events 504 * that have occured during the population. 505 */ 506 STATE(event) = nl_init_event_handler(); 507 if (STATE(event) == NULL) { 508 dlog(LOG_ERR, "can't open netlink handler: %s", 509 strerror(errno)); 510 dlog(LOG_ERR, "no ctnetlink kernel support?"); 511 return -1; 512 } 513 nfct_callback_register2(STATE(event), NFCT_T_ALL, 514 event_handler, NULL); 515 516 if (CONFIG(flags) & CTD_EXPECT) { 517 nfexp_callback_register2(STATE(event), NFCT_T_ALL, 518 exp_event_handler, NULL); 519 } 520 register_fd(nfct_fd(STATE(event)), event_cb, NULL, STATE(fds)); 521 } 522 523 return 0; 524} 525