1/* 2 * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org> 3 * (C) 2011 by Vyatta Inc. <http://www.vyatta.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 */ 19 20#include "cache.h" 21#include "hash.h" 22#include "log.h" 23#include "conntrackd.h" 24#include "netlink.h" 25#include "event.h" 26#include "jhash.h" 27#include "network.h" 28 29#include <errno.h> 30#include <string.h> 31#include <time.h> 32#include <libnetfilter_conntrack/libnetfilter_conntrack.h> 33 34static uint32_t 35cache_hash4_ct(const struct nf_conntrack *ct, const struct hashtable *table) 36{ 37 uint32_t a[4] = { 38 [0] = nfct_get_attr_u32(ct, ATTR_IPV4_SRC), 39 [1] = nfct_get_attr_u32(ct, ATTR_IPV4_DST), 40 [2] = nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 | 41 nfct_get_attr_u8(ct, ATTR_L4PROTO), 42 [3] = nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 | 43 nfct_get_attr_u16(ct, ATTR_PORT_DST), 44 }; 45 46 /* 47 * Instead of returning hash % table->hashsize (implying a divide) 48 * we return the high 32 bits of the (hash * table->hashsize) that will 49 * give results between [0 and hashsize-1] and same hash distribution, 50 * but using a multiply, less expensive than a divide. See: 51 * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html 52 */ 53 return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32; 54} 55 56static uint32_t 57cache_hash6_ct(const struct nf_conntrack *ct, const struct hashtable *table) 58{ 59 uint32_t a[10]; 60 61 memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); 62 memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4); 63 a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 | 64 nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO); 65 a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 | 66 nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST); 67 68 return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32; 69} 70 71static uint32_t 72cache_ct_hash(const void *data, const struct hashtable *table) 73{ 74 int ret = 0; 75 const struct nf_conntrack *ct = data; 76 77 switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) { 78 case AF_INET: 79 ret = cache_hash4_ct(ct, table); 80 break; 81 case AF_INET6: 82 ret = cache_hash6_ct(ct, table); 83 break; 84 default: 85 dlog(LOG_ERR, "unknown layer 3 proto in hash"); 86 break; 87 } 88 return ret; 89} 90 91static int cache_ct_cmp(const void *data1, const void *data2) 92{ 93 const struct cache_object *obj = data1; 94 const struct nf_conntrack *ct = data2; 95 96 return nfct_cmp(obj->ptr, ct, NFCT_CMP_ORIG) && 97 nfct_get_attr_u32(obj->ptr, ATTR_ID) == 98 nfct_get_attr_u32(ct, ATTR_ID); 99} 100 101static void *cache_ct_alloc(void) 102{ 103 return nfct_new(); 104} 105 106static void cache_ct_free(void *ptr) 107{ 108 nfct_destroy(ptr); 109} 110 111static void cache_ct_copy(void *dst, void *src, unsigned int flags) 112{ 113 nfct_copy(dst, src, flags); 114} 115 116static int cache_ct_dump_step(void *data1, void *n) 117{ 118 char buf[1024]; 119 int size; 120 struct __dump_container *container = data1; 121 struct cache_object *obj = n; 122 char *data = obj->data; 123 unsigned i; 124 125 /* 126 * XXX: Do not dump the entries that are scheduled to expire. 127 * These entries talk about already destroyed connections 128 * that we keep for some time just in case that we have to 129 * resent some lost messages. We do not show them to the 130 * user as he may think that the firewall replicas are not 131 * in sync. The branch below is a hack as it is quite 132 * specific and it breaks conntrackd modularity. Probably 133 * there's a nicer way to do this but until I come up with it... 134 */ 135 if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD) 136 return 0; 137 138 /* do not show cached timeout, this may confuse users */ 139 if (nfct_attr_is_set(obj->ptr, ATTR_TIMEOUT)) 140 nfct_attr_unset(obj->ptr, ATTR_TIMEOUT); 141 142 memset(buf, 0, sizeof(buf)); 143 size = nfct_snprintf(buf, 144 sizeof(buf), 145 obj->ptr, 146 NFCT_T_UNKNOWN, 147 container->type, 148 0); 149 150 for (i = 0; i < obj->cache->num_features; i++) { 151 if (obj->cache->features[i]->dump) { 152 size += obj->cache->features[i]->dump(obj, 153 data, 154 buf+size, 155 container->type); 156 data += obj->cache->features[i]->size; 157 } 158 } 159 if (container->type != NFCT_O_XML) { 160 long tm = time(NULL); 161 size += sprintf(buf+size, " [active since %lds]", 162 tm - obj->lifetime); 163 } 164 size += sprintf(buf+size, "\n"); 165 if (send(container->fd, buf, size, 0) == -1) { 166 if (errno != EPIPE) 167 return -1; 168 } 169 170 return 0; 171} 172 173static void 174cache_ct_commit_step(struct __commit_container *tmp, struct cache_object *obj) 175{ 176 int ret, retry = 1, timeout; 177 struct nf_conntrack *ct = obj->ptr; 178 179 if (CONFIG(commit_timeout)) { 180 timeout = CONFIG(commit_timeout); 181 } else { 182 timeout = time(NULL) - obj->lastupdate; 183 if (timeout < 0) { 184 /* XXX: Arbitrarily set the timer to one minute, how 185 * can this happen? For example, an adjustment due to 186 * daylight-saving. Probably other situations can 187 * trigger this. */ 188 timeout = 60; 189 } 190 /* calculate an estimation of the current timeout */ 191 timeout = nfct_get_attr_u32(ct, ATTR_TIMEOUT) - timeout; 192 if (timeout < 0) { 193 timeout = 60; 194 } 195 } 196 197retry: 198 if (nl_create_conntrack(tmp->h, ct, timeout) == -1) { 199 if (errno == EEXIST && retry == 1) { 200 ret = nl_destroy_conntrack(tmp->h, ct); 201 if (ret == 0 || (ret == -1 && errno == ENOENT)) { 202 if (retry) { 203 retry = 0; 204 goto retry; 205 } 206 } 207 dlog(LOG_ERR, "commit-destroy: %s", strerror(errno)); 208 dlog_ct(STATE(log), ct, NFCT_O_PLAIN); 209 tmp->c->stats.commit_fail++; 210 } else { 211 dlog(LOG_ERR, "commit-create: %s", strerror(errno)); 212 dlog_ct(STATE(log), ct, NFCT_O_PLAIN); 213 tmp->c->stats.commit_fail++; 214 } 215 } else { 216 tmp->c->stats.commit_ok++; 217 } 218} 219 220static int cache_ct_commit_related(void *data, void *n) 221{ 222 struct cache_object *obj = n; 223 224 if (ct_is_related(obj->ptr)) 225 cache_ct_commit_step(data, obj); 226 227 /* keep iterating even if we have found errors */ 228 return 0; 229} 230 231static int cache_ct_commit_master(void *data, void *n) 232{ 233 struct cache_object *obj = n; 234 235 if (ct_is_related(obj->ptr)) 236 return 0; 237 238 cache_ct_commit_step(data, obj); 239 return 0; 240} 241 242static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd) 243{ 244 unsigned int commit_ok, commit_fail; 245 struct __commit_container tmp = { 246 .h = h, 247 .c = c, 248 }; 249 struct timeval commit_stop, res; 250 251 /* we already have one commit in progress, skip this. The clientfd 252 * descriptor has to be closed by the caller. */ 253 if (clientfd && STATE_SYNC(commit).clientfd != -1) 254 return -1; 255 256 switch(STATE_SYNC(commit).state) { 257 case COMMIT_STATE_INACTIVE: 258 gettimeofday(&STATE_SYNC(commit).stats.start, NULL); 259 STATE_SYNC(commit).stats.ok = c->stats.commit_ok; 260 STATE_SYNC(commit).stats.fail = c->stats.commit_fail; 261 STATE_SYNC(commit).clientfd = clientfd; 262 case COMMIT_STATE_MASTER: 263 STATE_SYNC(commit).current = 264 hashtable_iterate_limit(c->h, &tmp, 265 STATE_SYNC(commit).current, 266 CONFIG(general).commit_steps, 267 cache_ct_commit_master); 268 if (STATE_SYNC(commit).current < CONFIG(hashsize)) { 269 STATE_SYNC(commit).state = COMMIT_STATE_MASTER; 270 /* give it another step as soon as possible */ 271 write_evfd(STATE_SYNC(commit).evfd); 272 return 1; 273 } 274 STATE_SYNC(commit).current = 0; 275 STATE_SYNC(commit).state = COMMIT_STATE_RELATED; 276 case COMMIT_STATE_RELATED: 277 STATE_SYNC(commit).current = 278 hashtable_iterate_limit(c->h, &tmp, 279 STATE_SYNC(commit).current, 280 CONFIG(general).commit_steps, 281 cache_ct_commit_related); 282 if (STATE_SYNC(commit).current < CONFIG(hashsize)) { 283 STATE_SYNC(commit).state = COMMIT_STATE_RELATED; 284 /* give it another step as soon as possible */ 285 write_evfd(STATE_SYNC(commit).evfd); 286 return 1; 287 } 288 /* calculate the time that commit has taken */ 289 gettimeofday(&commit_stop, NULL); 290 timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res); 291 292 /* calculate new entries committed */ 293 commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok; 294 commit_fail = 295 c->stats.commit_fail - STATE_SYNC(commit).stats.fail; 296 297 /* log results */ 298 dlog(LOG_NOTICE, "Committed %u new entries", commit_ok); 299 300 if (commit_fail) 301 dlog(LOG_NOTICE, "%u entries can't be " 302 "committed", commit_fail); 303 304 dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds", 305 res.tv_sec, res.tv_usec); 306 307 /* prepare the state machine for new commits */ 308 STATE_SYNC(commit).current = 0; 309 STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE; 310 311 return 0; 312 } 313 return 1; 314} 315 316static struct nethdr * 317cache_ct_build_msg(const struct cache_object *obj, int type) 318{ 319 return BUILD_NETMSG_FROM_CT(obj->ptr, type); 320} 321 322/* template to cache conntracks coming from the kernel. */ 323struct cache_ops cache_sync_internal_ct_ops = { 324 .hash = cache_ct_hash, 325 .cmp = cache_ct_cmp, 326 .alloc = cache_ct_alloc, 327 .free = cache_ct_free, 328 .copy = cache_ct_copy, 329 .dump_step = cache_ct_dump_step, 330 .commit = NULL, 331 .build_msg = cache_ct_build_msg, 332}; 333 334/* template to cache conntracks coming from the network. */ 335struct cache_ops cache_sync_external_ct_ops = { 336 .hash = cache_ct_hash, 337 .cmp = cache_ct_cmp, 338 .alloc = cache_ct_alloc, 339 .free = cache_ct_free, 340 .copy = cache_ct_copy, 341 .dump_step = cache_ct_dump_step, 342 .commit = cache_ct_commit, 343 .build_msg = NULL, 344}; 345 346/* template to cache conntracks for the statistics mode. */ 347struct cache_ops cache_stats_ct_ops = { 348 .hash = cache_ct_hash, 349 .cmp = cache_ct_cmp, 350 .alloc = cache_ct_alloc, 351 .free = cache_ct_free, 352 .copy = cache_ct_copy, 353 .dump_step = cache_ct_dump_step, 354 .commit = NULL, 355 .build_msg = NULL, 356}; 357