1/*
2 * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
3 * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20#include "cache.h"
21#include "hash.h"
22#include "log.h"
23#include "conntrackd.h"
24#include "netlink.h"
25#include "event.h"
26#include "jhash.h"
27#include "network.h"
28
29#include <errno.h>
30#include <string.h>
31#include <time.h>
32#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
33
34static uint32_t
35cache_hash4_ct(const struct nf_conntrack *ct, const struct hashtable *table)
36{
37	uint32_t a[4] = {
38		[0]	= nfct_get_attr_u32(ct, ATTR_IPV4_SRC),
39		[1]	= nfct_get_attr_u32(ct, ATTR_IPV4_DST),
40		[2]	= nfct_get_attr_u8(ct, ATTR_L3PROTO) << 16 |
41			  nfct_get_attr_u8(ct, ATTR_L4PROTO),
42		[3]	= nfct_get_attr_u16(ct, ATTR_PORT_SRC) << 16 |
43			  nfct_get_attr_u16(ct, ATTR_PORT_DST),
44	};
45
46	/*
47	 * Instead of returning hash % table->hashsize (implying a divide)
48	 * we return the high 32 bits of the (hash * table->hashsize) that will
49	 * give results between [0 and hashsize-1] and same hash distribution,
50	 * but using a multiply, less expensive than a divide. See:
51	 * http://www.mail-archive.com/netdev@vger.kernel.org/msg56623.html
52	 */
53	return ((uint64_t)jhash2(a, 4, 0) * table->hashsize) >> 32;
54}
55
56static uint32_t
57cache_hash6_ct(const struct nf_conntrack *ct, const struct hashtable *table)
58{
59	uint32_t a[10];
60
61	memcpy(&a[0], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4);
62	memcpy(&a[4], nfct_get_attr(ct, ATTR_IPV6_SRC), sizeof(uint32_t)*4);
63	a[8] = nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16 |
64	       nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO);
65	a[9] = nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16 |
66	       nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST);
67
68	return ((uint64_t)jhash2(a, 10, 0) * table->hashsize) >> 32;
69}
70
71static uint32_t
72cache_ct_hash(const void *data, const struct hashtable *table)
73{
74	int ret = 0;
75	const struct nf_conntrack *ct = data;
76
77	switch(nfct_get_attr_u8(ct, ATTR_L3PROTO)) {
78		case AF_INET:
79			ret = cache_hash4_ct(ct, table);
80			break;
81		case AF_INET6:
82			ret = cache_hash6_ct(ct, table);
83			break;
84		default:
85			dlog(LOG_ERR, "unknown layer 3 proto in hash");
86			break;
87	}
88	return ret;
89}
90
91static int cache_ct_cmp(const void *data1, const void *data2)
92{
93	const struct cache_object *obj = data1;
94	const struct nf_conntrack *ct = data2;
95
96	return nfct_cmp(obj->ptr, ct, NFCT_CMP_ORIG) &&
97	       nfct_get_attr_u32(obj->ptr, ATTR_ID) ==
98	       nfct_get_attr_u32(ct, ATTR_ID);
99}
100
101static void *cache_ct_alloc(void)
102{
103	return nfct_new();
104}
105
106static void cache_ct_free(void *ptr)
107{
108	nfct_destroy(ptr);
109}
110
111static void cache_ct_copy(void *dst, void *src, unsigned int flags)
112{
113	nfct_copy(dst, src, flags);
114}
115
116static int cache_ct_dump_step(void *data1, void *n)
117{
118	char buf[1024];
119	int size;
120	struct __dump_container *container = data1;
121	struct cache_object *obj = n;
122	char *data = obj->data;
123	unsigned i;
124
125	/*
126	 * XXX: Do not dump the entries that are scheduled to expire.
127	 * 	These entries talk about already destroyed connections
128	 * 	that we keep for some time just in case that we have to
129	 * 	resent some lost messages. We do not show them to the
130	 * 	user as he may think that the firewall replicas are not
131	 * 	in sync. The branch below is a hack as it is quite
132	 * 	specific and it breaks conntrackd modularity. Probably
133	 * 	there's a nicer way to do this but until I come up with it...
134	 */
135	if (CONFIG(flags) & CTD_SYNC_FTFW && obj->status == C_OBJ_DEAD)
136		return 0;
137
138	/* do not show cached timeout, this may confuse users */
139	if (nfct_attr_is_set(obj->ptr, ATTR_TIMEOUT))
140		nfct_attr_unset(obj->ptr, ATTR_TIMEOUT);
141
142	memset(buf, 0, sizeof(buf));
143	size = nfct_snprintf(buf,
144			     sizeof(buf),
145			     obj->ptr,
146			     NFCT_T_UNKNOWN,
147			     container->type,
148			     0);
149
150	for (i = 0; i < obj->cache->num_features; i++) {
151		if (obj->cache->features[i]->dump) {
152			size += obj->cache->features[i]->dump(obj,
153							      data,
154							      buf+size,
155							      container->type);
156			data += obj->cache->features[i]->size;
157		}
158	}
159	if (container->type != NFCT_O_XML) {
160		long tm = time(NULL);
161		size += sprintf(buf+size, " [active since %lds]",
162				tm - obj->lifetime);
163	}
164	size += sprintf(buf+size, "\n");
165	if (send(container->fd, buf, size, 0) == -1) {
166		if (errno != EPIPE)
167			return -1;
168	}
169
170	return 0;
171}
172
173static void
174cache_ct_commit_step(struct __commit_container *tmp, struct cache_object *obj)
175{
176	int ret, retry = 1, timeout;
177	struct nf_conntrack *ct = obj->ptr;
178
179	if (CONFIG(commit_timeout)) {
180		timeout = CONFIG(commit_timeout);
181	} else {
182		timeout = time(NULL) - obj->lastupdate;
183		if (timeout < 0) {
184			/* XXX: Arbitrarily set the timer to one minute, how
185			 * can this happen? For example, an adjustment due to
186			 * daylight-saving. Probably other situations can
187			 * trigger this. */
188			timeout = 60;
189		}
190		/* calculate an estimation of the current timeout */
191		timeout = nfct_get_attr_u32(ct, ATTR_TIMEOUT) - timeout;
192		if (timeout < 0) {
193			timeout = 60;
194		}
195	}
196
197retry:
198	if (nl_create_conntrack(tmp->h, ct, timeout) == -1) {
199		if (errno == EEXIST && retry == 1) {
200			ret = nl_destroy_conntrack(tmp->h, ct);
201			if (ret == 0 || (ret == -1 && errno == ENOENT)) {
202				if (retry) {
203					retry = 0;
204					goto retry;
205				}
206			}
207			dlog(LOG_ERR, "commit-destroy: %s", strerror(errno));
208			dlog_ct(STATE(log), ct, NFCT_O_PLAIN);
209			tmp->c->stats.commit_fail++;
210		} else {
211			dlog(LOG_ERR, "commit-create: %s", strerror(errno));
212			dlog_ct(STATE(log), ct, NFCT_O_PLAIN);
213			tmp->c->stats.commit_fail++;
214		}
215	} else {
216		tmp->c->stats.commit_ok++;
217	}
218}
219
220static int cache_ct_commit_related(void *data, void *n)
221{
222	struct cache_object *obj = n;
223
224	if (ct_is_related(obj->ptr))
225		cache_ct_commit_step(data, obj);
226
227	/* keep iterating even if we have found errors */
228	return 0;
229}
230
231static int cache_ct_commit_master(void *data, void *n)
232{
233	struct cache_object *obj = n;
234
235	if (ct_is_related(obj->ptr))
236		return 0;
237
238	cache_ct_commit_step(data, obj);
239	return 0;
240}
241
242static int cache_ct_commit(struct cache *c, struct nfct_handle *h, int clientfd)
243{
244	unsigned int commit_ok, commit_fail;
245	struct __commit_container tmp = {
246		.h = h,
247		.c = c,
248	};
249	struct timeval commit_stop, res;
250
251	/* we already have one commit in progress, skip this. The clientfd
252	 * descriptor has to be closed by the caller. */
253	if (clientfd && STATE_SYNC(commit).clientfd != -1)
254		return -1;
255
256	switch(STATE_SYNC(commit).state) {
257	case COMMIT_STATE_INACTIVE:
258		gettimeofday(&STATE_SYNC(commit).stats.start, NULL);
259		STATE_SYNC(commit).stats.ok = c->stats.commit_ok;
260		STATE_SYNC(commit).stats.fail = c->stats.commit_fail;
261		STATE_SYNC(commit).clientfd = clientfd;
262	case COMMIT_STATE_MASTER:
263		STATE_SYNC(commit).current =
264			hashtable_iterate_limit(c->h, &tmp,
265						STATE_SYNC(commit).current,
266						CONFIG(general).commit_steps,
267						cache_ct_commit_master);
268		if (STATE_SYNC(commit).current < CONFIG(hashsize)) {
269			STATE_SYNC(commit).state = COMMIT_STATE_MASTER;
270			/* give it another step as soon as possible */
271			write_evfd(STATE_SYNC(commit).evfd);
272			return 1;
273		}
274		STATE_SYNC(commit).current = 0;
275		STATE_SYNC(commit).state = COMMIT_STATE_RELATED;
276	case COMMIT_STATE_RELATED:
277		STATE_SYNC(commit).current =
278			hashtable_iterate_limit(c->h, &tmp,
279						STATE_SYNC(commit).current,
280						CONFIG(general).commit_steps,
281						cache_ct_commit_related);
282		if (STATE_SYNC(commit).current < CONFIG(hashsize)) {
283			STATE_SYNC(commit).state = COMMIT_STATE_RELATED;
284			/* give it another step as soon as possible */
285			write_evfd(STATE_SYNC(commit).evfd);
286			return 1;
287		}
288		/* calculate the time that commit has taken */
289		gettimeofday(&commit_stop, NULL);
290		timersub(&commit_stop, &STATE_SYNC(commit).stats.start, &res);
291
292		/* calculate new entries committed */
293		commit_ok = c->stats.commit_ok - STATE_SYNC(commit).stats.ok;
294		commit_fail =
295			c->stats.commit_fail - STATE_SYNC(commit).stats.fail;
296
297		/* log results */
298		dlog(LOG_NOTICE, "Committed %u new entries", commit_ok);
299
300		if (commit_fail)
301			dlog(LOG_NOTICE, "%u entries can't be "
302					 "committed", commit_fail);
303
304		dlog(LOG_NOTICE, "commit has taken %lu.%06lu seconds",
305				res.tv_sec, res.tv_usec);
306
307		/* prepare the state machine for new commits */
308		STATE_SYNC(commit).current = 0;
309		STATE_SYNC(commit).state = COMMIT_STATE_INACTIVE;
310
311		return 0;
312	}
313	return 1;
314}
315
316static struct nethdr *
317cache_ct_build_msg(const struct cache_object *obj, int type)
318{
319	return BUILD_NETMSG_FROM_CT(obj->ptr, type);
320}
321
322/* template to cache conntracks coming from the kernel. */
323struct cache_ops cache_sync_internal_ct_ops = {
324	.hash		= cache_ct_hash,
325	.cmp		= cache_ct_cmp,
326	.alloc		= cache_ct_alloc,
327	.free		= cache_ct_free,
328	.copy		= cache_ct_copy,
329	.dump_step	= cache_ct_dump_step,
330	.commit		= NULL,
331	.build_msg	= cache_ct_build_msg,
332};
333
334/* template to cache conntracks coming from the network. */
335struct cache_ops cache_sync_external_ct_ops = {
336	.hash		= cache_ct_hash,
337	.cmp		= cache_ct_cmp,
338	.alloc		= cache_ct_alloc,
339	.free		= cache_ct_free,
340	.copy		= cache_ct_copy,
341	.dump_step	= cache_ct_dump_step,
342	.commit		= cache_ct_commit,
343	.build_msg	= NULL,
344};
345
346/* template to cache conntracks for the statistics mode. */
347struct cache_ops cache_stats_ct_ops = {
348	.hash		= cache_ct_hash,
349	.cmp		= cache_ct_cmp,
350	.alloc		= cache_ct_alloc,
351	.free		= cache_ct_free,
352	.copy		= cache_ct_copy,
353	.dump_step	= cache_ct_dump_step,
354	.commit		= NULL,
355	.build_msg	= NULL,
356};
357