1/* 2 * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org> 3 * (C) 2011 by Vyatta Inc. <http://www.vyatta.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 */ 19 20#include "netlink.h" 21#include "conntrackd.h" 22#include "filter.h" 23#include "log.h" 24 25#include <string.h> 26#include <errno.h> 27#include <sys/types.h> 28#include <sys/socket.h> 29#include <sys/fcntl.h> 30#include <libnetfilter_conntrack/libnetfilter_conntrack_tcp.h> 31 32struct nfct_handle *nl_init_event_handler(void) 33{ 34 struct nfct_handle *h; 35 36 h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups); 37 if (h == NULL) 38 return NULL; 39 40 if (CONFIG(netlink).events_reliable) { 41 int on = 1; 42 43 setsockopt(nfct_fd(h), SOL_NETLINK, 44 NETLINK_BROADCAST_SEND_ERROR, &on, sizeof(int)); 45 46 setsockopt(nfct_fd(h), SOL_NETLINK, 47 NETLINK_NO_ENOBUFS, &on, sizeof(int)); 48 49 dlog(LOG_NOTICE, "reliable ctnetlink event delivery " 50 "is ENABLED."); 51 } 52 53 if (STATE(filter)) { 54 if (CONFIG(filter_from_kernelspace)) { 55 if (nfct_filter_attach(nfct_fd(h), 56 STATE(filter)) == -1) { 57 dlog(LOG_ERR, "cannot set event filtering: %s", 58 strerror(errno)); 59 } 60 dlog(LOG_NOTICE, "using kernel-space event filtering"); 61 } else 62 dlog(LOG_NOTICE, "using user-space event filtering"); 63 64 nfct_filter_destroy(STATE(filter)); 65 } 66 67 fcntl(nfct_fd(h), F_SETFL, O_NONBLOCK); 68 69 /* set up socket buffer size */ 70 if (CONFIG(netlink_buffer_size) && 71 CONFIG(netlink_buffer_size) <= 72 CONFIG(netlink_buffer_size_max_grown)) { 73 /* we divide netlink_buffer_size by 2 here since value passed 74 to kernel gets doubled in SO_RCVBUF; see net/core/sock.c */ 75 CONFIG(netlink_buffer_size) = 76 nfnl_rcvbufsiz(nfct_nfnlh(h), CONFIG(netlink_buffer_size)/2); 77 } else { 78 dlog(LOG_NOTICE, "NetlinkBufferSize is either not set or " 79 "is greater than NetlinkBufferSizeMaxGrowth. " 80 "Using current system buffer size"); 81 82 socklen_t socklen = sizeof(unsigned int); 83 unsigned int read_size; 84 85 /* get current buffer size */ 86 getsockopt(nfct_fd(h), SOL_SOCKET, 87 SO_RCVBUF, &read_size, &socklen); 88 89 CONFIG(netlink_buffer_size) = read_size; 90 } 91 92 dlog(LOG_NOTICE, "netlink event socket buffer size has been set " 93 "to %u bytes", CONFIG(netlink_buffer_size)); 94 95 return h; 96} 97 98struct nlif_handle *nl_init_interface_handler(void) 99{ 100 struct nlif_handle *h; 101 h = nlif_open(); 102 if (h == NULL) 103 return NULL; 104 105 if (nlif_query(h) == -1) { 106 free(h); 107 return NULL; 108 } 109 fcntl(nlif_fd(h), F_SETFL, O_NONBLOCK); 110 111 return h; 112} 113 114static int warned = 0; 115 116void nl_resize_socket_buffer(struct nfct_handle *h) 117{ 118 unsigned int s = CONFIG(netlink_buffer_size); 119 120 /* already warned that we have reached the maximum buffer size */ 121 if (warned) 122 return; 123 124 /* since sock_setsockopt in net/core/sock.c doubles the size of socket 125 buffer passed to it using nfnl_rcvbufsiz, only call nfnl_rcvbufsiz 126 if new value is not greater than netlink_buffer_size_max_grown */ 127 if (s*2 > CONFIG(netlink_buffer_size_max_grown)) { 128 dlog(LOG_WARNING, 129 "netlink event socket buffer size cannot " 130 "be doubled further since it will exceed " 131 "NetlinkBufferSizeMaxGrowth. We are likely to " 132 "be losing events, this may lead to " 133 "unsynchronized replicas. Please, consider " 134 "increasing netlink socket buffer size via " 135 "NetlinkBufferSize and " 136 "NetlinkBufferSizeMaxGrowth clauses in " 137 "conntrackd.conf"); 138 warned = 1; 139 return; 140 } 141 142 CONFIG(netlink_buffer_size) = nfnl_rcvbufsiz(nfct_nfnlh(h), s); 143 144 /* notify the sysadmin */ 145 dlog(LOG_NOTICE, "netlink event socket buffer size has been doubled " 146 "to %u bytes", CONFIG(netlink_buffer_size)); 147} 148 149int nl_dump_conntrack_table(struct nfct_handle *h) 150{ 151 return nfct_query(h, NFCT_Q_DUMP, &CONFIG(family)); 152} 153 154static int 155nl_flush_selective_cb(enum nf_conntrack_msg_type type, 156 struct nf_conntrack *ct, void *data) 157{ 158 /* don't delete this conntrack, it's in the ignore filter */ 159 if (ct_filter_conntrack(ct, 1)) 160 return NFCT_CB_CONTINUE; 161 162 switch(type) { 163 case NFCT_T_UPDATE: 164 nl_destroy_conntrack(STATE(flush), ct); 165 break; 166 default: 167 STATE(stats).nl_dump_unknown_type++; 168 break; 169 } 170 return NFCT_CB_CONTINUE; 171} 172 173int nl_flush_conntrack_table_selective(void) 174{ 175 struct nfct_handle *h; 176 int ret; 177 178 h = nfct_open(CONNTRACK, 0); 179 if (h == NULL) { 180 dlog(LOG_ERR, "cannot open handle"); 181 return -1; 182 } 183 nfct_callback_register(h, NFCT_T_ALL, nl_flush_selective_cb, NULL); 184 185 ret = nfct_query(h, NFCT_Q_DUMP, &CONFIG(family)); 186 187 nfct_close(h); 188 189 return ret; 190} 191 192int nl_send_resync(struct nfct_handle *h) 193{ 194 int family = CONFIG(family); 195 return nfct_send(h, NFCT_Q_DUMP, &family); 196} 197 198/* if the handle has no callback, check for existence, otherwise, update */ 199int nl_get_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) 200{ 201 int ret = 1; 202 struct nf_conntrack *tmp; 203 204 tmp = nfct_new(); 205 if (tmp == NULL) 206 return -1; 207 208 /* use the original tuple to check if it is there */ 209 nfct_copy(tmp, ct, NFCT_CP_ORIG); 210 211 if (nfct_query(h, NFCT_Q_GET, tmp) == -1) 212 ret = (errno == ENOENT) ? 0 : -1; 213 214 nfct_destroy(tmp); 215 return ret; 216} 217 218int nl_create_conntrack(struct nfct_handle *h, 219 const struct nf_conntrack *orig, 220 int timeout) 221{ 222 int ret; 223 struct nf_conntrack *ct; 224 225 ct = nfct_clone(orig); 226 if (ct == NULL) 227 return -1; 228 229 if (timeout > 0) 230 nfct_set_attr_u32(ct, ATTR_TIMEOUT, timeout); 231 232 /* we hit error if we try to change the expected bit */ 233 if (nfct_attr_is_set(ct, ATTR_STATUS)) { 234 uint32_t status = nfct_get_attr_u32(ct, ATTR_STATUS); 235 status &= ~IPS_EXPECTED; 236 nfct_set_attr_u32(ct, ATTR_STATUS, status); 237 } 238 239 nfct_setobjopt(ct, NFCT_SOPT_SETUP_REPLY); 240 241 /* disable TCP window tracking for recovered connections if required */ 242 if (nfct_attr_is_set(ct, ATTR_TCP_STATE)) { 243 uint8_t flags = IP_CT_TCP_FLAG_SACK_PERM; 244 245 if (!CONFIG(sync).tcp_window_tracking) 246 flags |= IP_CT_TCP_FLAG_BE_LIBERAL; 247 else 248 flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; 249 250 /* FIXME: workaround, we should send TCP flags in updates */ 251 if (nfct_get_attr_u8(ct, ATTR_TCP_STATE) >= 252 TCP_CONNTRACK_TIME_WAIT) { 253 flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 254 } 255 nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_ORIG, flags); 256 nfct_set_attr_u8(ct, ATTR_TCP_MASK_ORIG, flags); 257 nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); 258 nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); 259 } 260 261 ret = nfct_query(h, NFCT_Q_CREATE, ct); 262 nfct_destroy(ct); 263 264 return ret; 265} 266 267int nl_update_conntrack(struct nfct_handle *h, 268 const struct nf_conntrack *orig, 269 int timeout) 270{ 271 int ret; 272 struct nf_conntrack *ct; 273 274 ct = nfct_clone(orig); 275 if (ct == NULL) 276 return -1; 277 278 if (timeout > 0) 279 nfct_set_attr_u32(ct, ATTR_TIMEOUT, timeout); 280 281 /* unset NAT info, otherwise we hit error */ 282 nfct_attr_unset(ct, ATTR_SNAT_IPV4); 283 nfct_attr_unset(ct, ATTR_DNAT_IPV4); 284 nfct_attr_unset(ct, ATTR_SNAT_PORT); 285 nfct_attr_unset(ct, ATTR_DNAT_PORT); 286 287 if (nfct_attr_is_set(ct, ATTR_STATUS)) { 288 uint32_t status = nfct_get_attr_u32(ct, ATTR_STATUS); 289 status &= ~IPS_NAT_MASK; 290 nfct_set_attr_u32(ct, ATTR_STATUS, status); 291 } 292 /* we have to unset the helper to avoid EBUSY in reset timers */ 293 if (nfct_attr_is_set(ct, ATTR_HELPER_NAME)) 294 nfct_attr_unset(ct, ATTR_HELPER_NAME); 295 296 /* we hit error if we try to update the master conntrack */ 297 if (ct_is_related(ct)) { 298 nfct_attr_unset(ct, ATTR_MASTER_L3PROTO); 299 nfct_attr_unset(ct, ATTR_MASTER_L4PROTO); 300 nfct_attr_unset(ct, ATTR_MASTER_IPV4_SRC); 301 nfct_attr_unset(ct, ATTR_MASTER_IPV4_DST); 302 nfct_attr_unset(ct, ATTR_MASTER_IPV6_SRC); 303 nfct_attr_unset(ct, ATTR_MASTER_IPV6_DST); 304 nfct_attr_unset(ct, ATTR_MASTER_PORT_SRC); 305 nfct_attr_unset(ct, ATTR_MASTER_PORT_DST); 306 } 307 308 /* disable TCP window tracking for recovered connections if required */ 309 if (nfct_attr_is_set(ct, ATTR_TCP_STATE)) { 310 uint8_t flags = IP_CT_TCP_FLAG_SACK_PERM; 311 312 if (!CONFIG(sync).tcp_window_tracking) 313 flags |= IP_CT_TCP_FLAG_BE_LIBERAL; 314 else 315 flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; 316 317 /* FIXME: workaround, we should send TCP flags in updates */ 318 if (nfct_get_attr_u8(ct, ATTR_TCP_STATE) >= 319 TCP_CONNTRACK_TIME_WAIT) { 320 flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 321 } 322 nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_ORIG, flags); 323 nfct_set_attr_u8(ct, ATTR_TCP_MASK_ORIG, flags); 324 nfct_set_attr_u8(ct, ATTR_TCP_FLAGS_REPL, flags); 325 nfct_set_attr_u8(ct, ATTR_TCP_MASK_REPL, flags); 326 } 327 328 ret = nfct_query(h, NFCT_Q_UPDATE, ct); 329 nfct_destroy(ct); 330 331 return ret; 332} 333 334int nl_destroy_conntrack(struct nfct_handle *h, const struct nf_conntrack *ct) 335{ 336 return nfct_query(h, NFCT_Q_DESTROY, ct); 337} 338 339int nl_create_expect(struct nfct_handle *h, const struct nf_expect *orig, 340 int timeout) 341{ 342 int ret; 343 struct nf_expect *exp; 344 345 exp = nfexp_clone(orig); 346 if (exp == NULL) 347 return -1; 348 349 if (timeout > 0) 350 nfexp_set_attr_u32(exp, ATTR_EXP_TIMEOUT, timeout); 351 352 ret = nfexp_query(h, NFCT_Q_CREATE, exp); 353 nfexp_destroy(exp); 354 355 return ret; 356} 357 358int nl_destroy_expect(struct nfct_handle *h, const struct nf_expect *exp) 359{ 360 return nfexp_query(h, NFCT_Q_DESTROY, exp); 361} 362 363/* if the handle has no callback, check for existence, otherwise, update */ 364int nl_get_expect(struct nfct_handle *h, const struct nf_expect *exp) 365{ 366 int ret = 1; 367 struct nf_expect *tmp; 368 369 /* XXX: we only need the expectation, not the mask and the master. */ 370 tmp = nfexp_clone(exp); 371 if (tmp == NULL) 372 return -1; 373 374 if (nfexp_query(h, NFCT_Q_GET, tmp) == -1) 375 ret = (errno == ENOENT) ? 0 : -1; 376 377 nfexp_destroy(tmp); 378 return ret; 379} 380 381int nl_dump_expect_table(struct nfct_handle *h) 382{ 383 return nfexp_query(h, NFCT_Q_DUMP, &CONFIG(family)); 384} 385 386int nl_flush_expect_table(struct nfct_handle *h) 387{ 388 return nfexp_query(h, NFCT_Q_FLUSH, &CONFIG(family)); 389} 390 391int nl_send_expect_resync(struct nfct_handle *h) 392{ 393 int family = CONFIG(family); 394 return nfexp_send(h, NFCT_Q_DUMP, &family); 395} 396