1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Packet matching code.
4 *
5 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
6 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/cache.h>
11#include <linux/capability.h>
12#include <linux/skbuff.h>
13#include <linux/kmod.h>
14#include <linux/vmalloc.h>
15#include <linux/netdevice.h>
16#include <linux/module.h>
17#include <net/ip.h>
18#include <net/compat.h>
19#include <linux/uaccess.h>
20#include <linux/mutex.h>
21#include <linux/proc_fs.h>
22#include <linux/err.h>
23#include <linux/cpumask.h>
24
25#include <linux/netfilter/x_tables.h>
26#include <linux/netfilter_ipv4/ip_tables.h>
27#include <net/netfilter/nf_log.h>
28#include "../../netfilter/xt_repldata.h"
29
30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
32MODULE_DESCRIPTION("IPv4 packet filter");
33
34void *ipt_alloc_initial_table(const struct xt_table *info)
35{
36	return xt_alloc_initial_table(ipt, IPT);
37}
38EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
39
40/* Returns whether matches rule or not. */
41/* Performance critical - called for every packet */
42static inline bool
43ip_packet_match(const struct iphdr *ip,
44		const char *indev,
45		const char *outdev,
46		const struct ipt_ip *ipinfo,
47		int isfrag)
48{
49	unsigned long ret;
50
51	if (NF_INVF(ipinfo, IPT_INV_SRCIP,
52		    (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
53	    NF_INVF(ipinfo, IPT_INV_DSTIP,
54		    (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
55		return false;
56
57	ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
58
59	if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
60		return false;
61
62	ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
63
64	if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
65		return false;
66
67	/* Check specific protocol */
68	if (ipinfo->proto &&
69	    NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
70		return false;
71
72	/* If we have a fragment rule but the packet is not a fragment
73	 * then we return zero */
74	if (NF_INVF(ipinfo, IPT_INV_FRAG,
75		    (ipinfo->flags & IPT_F_FRAG) && !isfrag))
76		return false;
77
78	return true;
79}
80
81static bool
82ip_checkentry(const struct ipt_ip *ip)
83{
84	if (ip->flags & ~IPT_F_MASK)
85		return false;
86	if (ip->invflags & ~IPT_INV_MASK)
87		return false;
88	return true;
89}
90
91static unsigned int
92ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
93{
94	net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
95
96	return NF_DROP;
97}
98
99/* Performance critical */
100static inline struct ipt_entry *
101get_entry(const void *base, unsigned int offset)
102{
103	return (struct ipt_entry *)(base + offset);
104}
105
106/* All zeroes == unconditional rule. */
107/* Mildly perf critical (only if packet tracing is on) */
108static inline bool unconditional(const struct ipt_entry *e)
109{
110	static const struct ipt_ip uncond;
111
112	return e->target_offset == sizeof(struct ipt_entry) &&
113	       memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
114}
115
116/* for const-correctness */
117static inline const struct xt_entry_target *
118ipt_get_target_c(const struct ipt_entry *e)
119{
120	return ipt_get_target((struct ipt_entry *)e);
121}
122
123#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
124static const char *const hooknames[] = {
125	[NF_INET_PRE_ROUTING]		= "PREROUTING",
126	[NF_INET_LOCAL_IN]		= "INPUT",
127	[NF_INET_FORWARD]		= "FORWARD",
128	[NF_INET_LOCAL_OUT]		= "OUTPUT",
129	[NF_INET_POST_ROUTING]		= "POSTROUTING",
130};
131
132enum nf_ip_trace_comments {
133	NF_IP_TRACE_COMMENT_RULE,
134	NF_IP_TRACE_COMMENT_RETURN,
135	NF_IP_TRACE_COMMENT_POLICY,
136};
137
138static const char *const comments[] = {
139	[NF_IP_TRACE_COMMENT_RULE]	= "rule",
140	[NF_IP_TRACE_COMMENT_RETURN]	= "return",
141	[NF_IP_TRACE_COMMENT_POLICY]	= "policy",
142};
143
144static const struct nf_loginfo trace_loginfo = {
145	.type = NF_LOG_TYPE_LOG,
146	.u = {
147		.log = {
148			.level = 4,
149			.logflags = NF_LOG_DEFAULT_MASK,
150		},
151	},
152};
153
154/* Mildly perf critical (only if packet tracing is on) */
155static inline int
156get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
157		      const char *hookname, const char **chainname,
158		      const char **comment, unsigned int *rulenum)
159{
160	const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
161
162	if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
163		/* Head of user chain: ERROR target with chainname */
164		*chainname = t->target.data;
165		(*rulenum) = 0;
166	} else if (s == e) {
167		(*rulenum)++;
168
169		if (unconditional(s) &&
170		    strcmp(t->target.u.kernel.target->name,
171			   XT_STANDARD_TARGET) == 0 &&
172		   t->verdict < 0) {
173			/* Tail of chains: STANDARD target (return/policy) */
174			*comment = *chainname == hookname
175				? comments[NF_IP_TRACE_COMMENT_POLICY]
176				: comments[NF_IP_TRACE_COMMENT_RETURN];
177		}
178		return 1;
179	} else
180		(*rulenum)++;
181
182	return 0;
183}
184
185static void trace_packet(struct net *net,
186			 const struct sk_buff *skb,
187			 unsigned int hook,
188			 const struct net_device *in,
189			 const struct net_device *out,
190			 const char *tablename,
191			 const struct xt_table_info *private,
192			 const struct ipt_entry *e)
193{
194	const struct ipt_entry *root;
195	const char *hookname, *chainname, *comment;
196	const struct ipt_entry *iter;
197	unsigned int rulenum = 0;
198
199	root = get_entry(private->entries, private->hook_entry[hook]);
200
201	hookname = chainname = hooknames[hook];
202	comment = comments[NF_IP_TRACE_COMMENT_RULE];
203
204	xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
205		if (get_chainname_rulenum(iter, e, hookname,
206		    &chainname, &comment, &rulenum) != 0)
207			break;
208
209	nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
210		     "TRACE: %s:%s:%s:%u ",
211		     tablename, chainname, comment, rulenum);
212}
213#endif
214
215static inline
216struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
217{
218	return (void *)entry + entry->next_offset;
219}
220
221/* Returns one of the generic firewall policies, like NF_ACCEPT. */
222unsigned int
223ipt_do_table(void *priv,
224	     struct sk_buff *skb,
225	     const struct nf_hook_state *state)
226{
227	const struct xt_table *table = priv;
228	unsigned int hook = state->hook;
229	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
230	const struct iphdr *ip;
231	/* Initializing verdict to NF_DROP keeps gcc happy. */
232	unsigned int verdict = NF_DROP;
233	const char *indev, *outdev;
234	const void *table_base;
235	struct ipt_entry *e, **jumpstack;
236	unsigned int stackidx, cpu;
237	const struct xt_table_info *private;
238	struct xt_action_param acpar;
239	unsigned int addend;
240
241	/* Initialization */
242	stackidx = 0;
243	ip = ip_hdr(skb);
244	indev = state->in ? state->in->name : nulldevname;
245	outdev = state->out ? state->out->name : nulldevname;
246	/* We handle fragments by dealing with the first fragment as
247	 * if it was a normal packet.  All other fragments are treated
248	 * normally, except that they will NEVER match rules that ask
249	 * things we don't know, ie. tcp syn flag or ports).  If the
250	 * rule is also a fragment-specific rule, non-fragments won't
251	 * match it. */
252	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
253	acpar.thoff   = ip_hdrlen(skb);
254	acpar.hotdrop = false;
255	acpar.state   = state;
256
257	WARN_ON(!(table->valid_hooks & (1 << hook)));
258	local_bh_disable();
259	addend = xt_write_recseq_begin();
260	private = READ_ONCE(table->private); /* Address dependency. */
261	cpu        = smp_processor_id();
262	table_base = private->entries;
263	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
264
265	/* Switch to alternate jumpstack if we're being invoked via TEE.
266	 * TEE issues XT_CONTINUE verdict on original skb so we must not
267	 * clobber the jumpstack.
268	 *
269	 * For recursion via REJECT or SYNPROXY the stack will be clobbered
270	 * but it is no problem since absolute verdict is issued by these.
271	 */
272	if (static_key_false(&xt_tee_enabled))
273		jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
274
275	e = get_entry(table_base, private->hook_entry[hook]);
276
277	do {
278		const struct xt_entry_target *t;
279		const struct xt_entry_match *ematch;
280		struct xt_counters *counter;
281
282		WARN_ON(!e);
283		if (!ip_packet_match(ip, indev, outdev,
284		    &e->ip, acpar.fragoff)) {
285 no_match:
286			e = ipt_next_entry(e);
287			continue;
288		}
289
290		xt_ematch_foreach(ematch, e) {
291			acpar.match     = ematch->u.kernel.match;
292			acpar.matchinfo = ematch->data;
293			if (!acpar.match->match(skb, &acpar))
294				goto no_match;
295		}
296
297		counter = xt_get_this_cpu_counter(&e->counters);
298		ADD_COUNTER(*counter, skb->len, 1);
299
300		t = ipt_get_target_c(e);
301		WARN_ON(!t->u.kernel.target);
302
303#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
304		/* The packet is traced: log it */
305		if (unlikely(skb->nf_trace))
306			trace_packet(state->net, skb, hook, state->in,
307				     state->out, table->name, private, e);
308#endif
309		/* Standard target? */
310		if (!t->u.kernel.target->target) {
311			int v;
312
313			v = ((struct xt_standard_target *)t)->verdict;
314			if (v < 0) {
315				/* Pop from stack? */
316				if (v != XT_RETURN) {
317					verdict = (unsigned int)(-v) - 1;
318					break;
319				}
320				if (stackidx == 0) {
321					e = get_entry(table_base,
322					    private->underflow[hook]);
323				} else {
324					e = jumpstack[--stackidx];
325					e = ipt_next_entry(e);
326				}
327				continue;
328			}
329			if (table_base + v != ipt_next_entry(e) &&
330			    !(e->ip.flags & IPT_F_GOTO)) {
331				if (unlikely(stackidx >= private->stacksize)) {
332					verdict = NF_DROP;
333					break;
334				}
335				jumpstack[stackidx++] = e;
336			}
337
338			e = get_entry(table_base, v);
339			continue;
340		}
341
342		acpar.target   = t->u.kernel.target;
343		acpar.targinfo = t->data;
344
345		verdict = t->u.kernel.target->target(skb, &acpar);
346		if (verdict == XT_CONTINUE) {
347			/* Target might have changed stuff. */
348			ip = ip_hdr(skb);
349			e = ipt_next_entry(e);
350		} else {
351			/* Verdict */
352			break;
353		}
354	} while (!acpar.hotdrop);
355
356	xt_write_recseq_end(addend);
357	local_bh_enable();
358
359	if (acpar.hotdrop)
360		return NF_DROP;
361	else return verdict;
362}
363
364/* Figures out from what hook each rule can be called: returns 0 if
365   there are loops.  Puts hook bitmask in comefrom. */
366static int
367mark_source_chains(const struct xt_table_info *newinfo,
368		   unsigned int valid_hooks, void *entry0,
369		   unsigned int *offsets)
370{
371	unsigned int hook;
372
373	/* No recursion; use packet counter to save back ptrs (reset
374	   to 0 as we leave), and comefrom to save source hook bitmask */
375	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
376		unsigned int pos = newinfo->hook_entry[hook];
377		struct ipt_entry *e = entry0 + pos;
378
379		if (!(valid_hooks & (1 << hook)))
380			continue;
381
382		/* Set initial back pointer. */
383		e->counters.pcnt = pos;
384
385		for (;;) {
386			const struct xt_standard_target *t
387				= (void *)ipt_get_target_c(e);
388			int visited = e->comefrom & (1 << hook);
389
390			if (e->comefrom & (1 << NF_INET_NUMHOOKS))
391				return 0;
392
393			e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
394
395			/* Unconditional return/END. */
396			if ((unconditional(e) &&
397			     (strcmp(t->target.u.user.name,
398				     XT_STANDARD_TARGET) == 0) &&
399			     t->verdict < 0) || visited) {
400				unsigned int oldpos, size;
401
402				/* Return: backtrack through the last
403				   big jump. */
404				do {
405					e->comefrom ^= (1<<NF_INET_NUMHOOKS);
406					oldpos = pos;
407					pos = e->counters.pcnt;
408					e->counters.pcnt = 0;
409
410					/* We're at the start. */
411					if (pos == oldpos)
412						goto next;
413
414					e = entry0 + pos;
415				} while (oldpos == pos + e->next_offset);
416
417				/* Move along one */
418				size = e->next_offset;
419				e = entry0 + pos + size;
420				if (pos + size >= newinfo->size)
421					return 0;
422				e->counters.pcnt = pos;
423				pos += size;
424			} else {
425				int newpos = t->verdict;
426
427				if (strcmp(t->target.u.user.name,
428					   XT_STANDARD_TARGET) == 0 &&
429				    newpos >= 0) {
430					/* This a jump; chase it. */
431					if (!xt_find_jump_offset(offsets, newpos,
432								 newinfo->number))
433						return 0;
434				} else {
435					/* ... this is a fallthru */
436					newpos = pos + e->next_offset;
437					if (newpos >= newinfo->size)
438						return 0;
439				}
440				e = entry0 + newpos;
441				e->counters.pcnt = pos;
442				pos = newpos;
443			}
444		}
445next:		;
446	}
447	return 1;
448}
449
450static void cleanup_match(struct xt_entry_match *m, struct net *net)
451{
452	struct xt_mtdtor_param par;
453
454	par.net       = net;
455	par.match     = m->u.kernel.match;
456	par.matchinfo = m->data;
457	par.family    = NFPROTO_IPV4;
458	if (par.match->destroy != NULL)
459		par.match->destroy(&par);
460	module_put(par.match->me);
461}
462
463static int
464check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
465{
466	const struct ipt_ip *ip = par->entryinfo;
467
468	par->match     = m->u.kernel.match;
469	par->matchinfo = m->data;
470
471	return xt_check_match(par, m->u.match_size - sizeof(*m),
472			      ip->proto, ip->invflags & IPT_INV_PROTO);
473}
474
475static int
476find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
477{
478	struct xt_match *match;
479	int ret;
480
481	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
482				      m->u.user.revision);
483	if (IS_ERR(match))
484		return PTR_ERR(match);
485	m->u.kernel.match = match;
486
487	ret = check_match(m, par);
488	if (ret)
489		goto err;
490
491	return 0;
492err:
493	module_put(m->u.kernel.match->me);
494	return ret;
495}
496
497static int check_target(struct ipt_entry *e, struct net *net, const char *name)
498{
499	struct xt_entry_target *t = ipt_get_target(e);
500	struct xt_tgchk_param par = {
501		.net       = net,
502		.table     = name,
503		.entryinfo = e,
504		.target    = t->u.kernel.target,
505		.targinfo  = t->data,
506		.hook_mask = e->comefrom,
507		.family    = NFPROTO_IPV4,
508	};
509
510	return xt_check_target(&par, t->u.target_size - sizeof(*t),
511			       e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
512}
513
514static int
515find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
516		 unsigned int size,
517		 struct xt_percpu_counter_alloc_state *alloc_state)
518{
519	struct xt_entry_target *t;
520	struct xt_target *target;
521	int ret;
522	unsigned int j;
523	struct xt_mtchk_param mtpar;
524	struct xt_entry_match *ematch;
525
526	if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
527		return -ENOMEM;
528
529	j = 0;
530	memset(&mtpar, 0, sizeof(mtpar));
531	mtpar.net	= net;
532	mtpar.table     = name;
533	mtpar.entryinfo = &e->ip;
534	mtpar.hook_mask = e->comefrom;
535	mtpar.family    = NFPROTO_IPV4;
536	xt_ematch_foreach(ematch, e) {
537		ret = find_check_match(ematch, &mtpar);
538		if (ret != 0)
539			goto cleanup_matches;
540		++j;
541	}
542
543	t = ipt_get_target(e);
544	target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
545					t->u.user.revision);
546	if (IS_ERR(target)) {
547		ret = PTR_ERR(target);
548		goto cleanup_matches;
549	}
550	t->u.kernel.target = target;
551
552	ret = check_target(e, net, name);
553	if (ret)
554		goto err;
555
556	return 0;
557 err:
558	module_put(t->u.kernel.target->me);
559 cleanup_matches:
560	xt_ematch_foreach(ematch, e) {
561		if (j-- == 0)
562			break;
563		cleanup_match(ematch, net);
564	}
565
566	xt_percpu_counter_free(&e->counters);
567
568	return ret;
569}
570
571static bool check_underflow(const struct ipt_entry *e)
572{
573	const struct xt_entry_target *t;
574	unsigned int verdict;
575
576	if (!unconditional(e))
577		return false;
578	t = ipt_get_target_c(e);
579	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
580		return false;
581	verdict = ((struct xt_standard_target *)t)->verdict;
582	verdict = -verdict - 1;
583	return verdict == NF_DROP || verdict == NF_ACCEPT;
584}
585
586static int
587check_entry_size_and_hooks(struct ipt_entry *e,
588			   struct xt_table_info *newinfo,
589			   const unsigned char *base,
590			   const unsigned char *limit,
591			   const unsigned int *hook_entries,
592			   const unsigned int *underflows,
593			   unsigned int valid_hooks)
594{
595	unsigned int h;
596	int err;
597
598	if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
599	    (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
600	    (unsigned char *)e + e->next_offset > limit)
601		return -EINVAL;
602
603	if (e->next_offset
604	    < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
605		return -EINVAL;
606
607	if (!ip_checkentry(&e->ip))
608		return -EINVAL;
609
610	err = xt_check_entry_offsets(e, e->elems, e->target_offset,
611				     e->next_offset);
612	if (err)
613		return err;
614
615	/* Check hooks & underflows */
616	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
617		if (!(valid_hooks & (1 << h)))
618			continue;
619		if ((unsigned char *)e - base == hook_entries[h])
620			newinfo->hook_entry[h] = hook_entries[h];
621		if ((unsigned char *)e - base == underflows[h]) {
622			if (!check_underflow(e))
623				return -EINVAL;
624
625			newinfo->underflow[h] = underflows[h];
626		}
627	}
628
629	/* Clear counters and comefrom */
630	e->counters = ((struct xt_counters) { 0, 0 });
631	e->comefrom = 0;
632	return 0;
633}
634
635static void
636cleanup_entry(struct ipt_entry *e, struct net *net)
637{
638	struct xt_tgdtor_param par;
639	struct xt_entry_target *t;
640	struct xt_entry_match *ematch;
641
642	/* Cleanup all matches */
643	xt_ematch_foreach(ematch, e)
644		cleanup_match(ematch, net);
645	t = ipt_get_target(e);
646
647	par.net      = net;
648	par.target   = t->u.kernel.target;
649	par.targinfo = t->data;
650	par.family   = NFPROTO_IPV4;
651	if (par.target->destroy != NULL)
652		par.target->destroy(&par);
653	module_put(par.target->me);
654	xt_percpu_counter_free(&e->counters);
655}
656
657/* Checks and translates the user-supplied table segment (held in
658   newinfo) */
659static int
660translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
661		const struct ipt_replace *repl)
662{
663	struct xt_percpu_counter_alloc_state alloc_state = { 0 };
664	struct ipt_entry *iter;
665	unsigned int *offsets;
666	unsigned int i;
667	int ret = 0;
668
669	newinfo->size = repl->size;
670	newinfo->number = repl->num_entries;
671
672	/* Init all hooks to impossible value. */
673	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
674		newinfo->hook_entry[i] = 0xFFFFFFFF;
675		newinfo->underflow[i] = 0xFFFFFFFF;
676	}
677
678	offsets = xt_alloc_entry_offsets(newinfo->number);
679	if (!offsets)
680		return -ENOMEM;
681	i = 0;
682	/* Walk through entries, checking offsets. */
683	xt_entry_foreach(iter, entry0, newinfo->size) {
684		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
685						 entry0 + repl->size,
686						 repl->hook_entry,
687						 repl->underflow,
688						 repl->valid_hooks);
689		if (ret != 0)
690			goto out_free;
691		if (i < repl->num_entries)
692			offsets[i] = (void *)iter - entry0;
693		++i;
694		if (strcmp(ipt_get_target(iter)->u.user.name,
695		    XT_ERROR_TARGET) == 0)
696			++newinfo->stacksize;
697	}
698
699	ret = -EINVAL;
700	if (i != repl->num_entries)
701		goto out_free;
702
703	ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
704	if (ret)
705		goto out_free;
706
707	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
708		ret = -ELOOP;
709		goto out_free;
710	}
711	kvfree(offsets);
712
713	/* Finally, each sanity check must pass */
714	i = 0;
715	xt_entry_foreach(iter, entry0, newinfo->size) {
716		ret = find_check_entry(iter, net, repl->name, repl->size,
717				       &alloc_state);
718		if (ret != 0)
719			break;
720		++i;
721	}
722
723	if (ret != 0) {
724		xt_entry_foreach(iter, entry0, newinfo->size) {
725			if (i-- == 0)
726				break;
727			cleanup_entry(iter, net);
728		}
729		return ret;
730	}
731
732	return ret;
733 out_free:
734	kvfree(offsets);
735	return ret;
736}
737
738static void
739get_counters(const struct xt_table_info *t,
740	     struct xt_counters counters[])
741{
742	struct ipt_entry *iter;
743	unsigned int cpu;
744	unsigned int i;
745
746	for_each_possible_cpu(cpu) {
747		seqcount_t *s = &per_cpu(xt_recseq, cpu);
748
749		i = 0;
750		xt_entry_foreach(iter, t->entries, t->size) {
751			struct xt_counters *tmp;
752			u64 bcnt, pcnt;
753			unsigned int start;
754
755			tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
756			do {
757				start = read_seqcount_begin(s);
758				bcnt = tmp->bcnt;
759				pcnt = tmp->pcnt;
760			} while (read_seqcount_retry(s, start));
761
762			ADD_COUNTER(counters[i], bcnt, pcnt);
763			++i; /* macro does multi eval of i */
764			cond_resched();
765		}
766	}
767}
768
769static void get_old_counters(const struct xt_table_info *t,
770			     struct xt_counters counters[])
771{
772	struct ipt_entry *iter;
773	unsigned int cpu, i;
774
775	for_each_possible_cpu(cpu) {
776		i = 0;
777		xt_entry_foreach(iter, t->entries, t->size) {
778			const struct xt_counters *tmp;
779
780			tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
781			ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
782			++i; /* macro does multi eval of i */
783		}
784
785		cond_resched();
786	}
787}
788
789static struct xt_counters *alloc_counters(const struct xt_table *table)
790{
791	unsigned int countersize;
792	struct xt_counters *counters;
793	const struct xt_table_info *private = table->private;
794
795	/* We need atomic snapshot of counters: rest doesn't change
796	   (other than comefrom, which userspace doesn't care
797	   about). */
798	countersize = sizeof(struct xt_counters) * private->number;
799	counters = vzalloc(countersize);
800
801	if (counters == NULL)
802		return ERR_PTR(-ENOMEM);
803
804	get_counters(private, counters);
805
806	return counters;
807}
808
809static int
810copy_entries_to_user(unsigned int total_size,
811		     const struct xt_table *table,
812		     void __user *userptr)
813{
814	unsigned int off, num;
815	const struct ipt_entry *e;
816	struct xt_counters *counters;
817	const struct xt_table_info *private = table->private;
818	int ret = 0;
819	const void *loc_cpu_entry;
820
821	counters = alloc_counters(table);
822	if (IS_ERR(counters))
823		return PTR_ERR(counters);
824
825	loc_cpu_entry = private->entries;
826
827	/* FIXME: use iterator macros --RR */
828	/* ... then go back and fix counters and names */
829	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
830		unsigned int i;
831		const struct xt_entry_match *m;
832		const struct xt_entry_target *t;
833
834		e = loc_cpu_entry + off;
835		if (copy_to_user(userptr + off, e, sizeof(*e))) {
836			ret = -EFAULT;
837			goto free_counters;
838		}
839		if (copy_to_user(userptr + off
840				 + offsetof(struct ipt_entry, counters),
841				 &counters[num],
842				 sizeof(counters[num])) != 0) {
843			ret = -EFAULT;
844			goto free_counters;
845		}
846
847		for (i = sizeof(struct ipt_entry);
848		     i < e->target_offset;
849		     i += m->u.match_size) {
850			m = (void *)e + i;
851
852			if (xt_match_to_user(m, userptr + off + i)) {
853				ret = -EFAULT;
854				goto free_counters;
855			}
856		}
857
858		t = ipt_get_target_c(e);
859		if (xt_target_to_user(t, userptr + off + e->target_offset)) {
860			ret = -EFAULT;
861			goto free_counters;
862		}
863	}
864
865 free_counters:
866	vfree(counters);
867	return ret;
868}
869
870#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
871static void compat_standard_from_user(void *dst, const void *src)
872{
873	int v = *(compat_int_t *)src;
874
875	if (v > 0)
876		v += xt_compat_calc_jump(AF_INET, v);
877	memcpy(dst, &v, sizeof(v));
878}
879
880static int compat_standard_to_user(void __user *dst, const void *src)
881{
882	compat_int_t cv = *(int *)src;
883
884	if (cv > 0)
885		cv -= xt_compat_calc_jump(AF_INET, cv);
886	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
887}
888
889static int compat_calc_entry(const struct ipt_entry *e,
890			     const struct xt_table_info *info,
891			     const void *base, struct xt_table_info *newinfo)
892{
893	const struct xt_entry_match *ematch;
894	const struct xt_entry_target *t;
895	unsigned int entry_offset;
896	int off, i, ret;
897
898	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
899	entry_offset = (void *)e - base;
900	xt_ematch_foreach(ematch, e)
901		off += xt_compat_match_offset(ematch->u.kernel.match);
902	t = ipt_get_target_c(e);
903	off += xt_compat_target_offset(t->u.kernel.target);
904	newinfo->size -= off;
905	ret = xt_compat_add_offset(AF_INET, entry_offset, off);
906	if (ret)
907		return ret;
908
909	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
910		if (info->hook_entry[i] &&
911		    (e < (struct ipt_entry *)(base + info->hook_entry[i])))
912			newinfo->hook_entry[i] -= off;
913		if (info->underflow[i] &&
914		    (e < (struct ipt_entry *)(base + info->underflow[i])))
915			newinfo->underflow[i] -= off;
916	}
917	return 0;
918}
919
920static int compat_table_info(const struct xt_table_info *info,
921			     struct xt_table_info *newinfo)
922{
923	struct ipt_entry *iter;
924	const void *loc_cpu_entry;
925	int ret;
926
927	if (!newinfo || !info)
928		return -EINVAL;
929
930	/* we dont care about newinfo->entries */
931	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
932	newinfo->initial_entries = 0;
933	loc_cpu_entry = info->entries;
934	ret = xt_compat_init_offsets(AF_INET, info->number);
935	if (ret)
936		return ret;
937	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
938		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
939		if (ret != 0)
940			return ret;
941	}
942	return 0;
943}
944#endif
945
946static int get_info(struct net *net, void __user *user, const int *len)
947{
948	char name[XT_TABLE_MAXNAMELEN];
949	struct xt_table *t;
950	int ret;
951
952	if (*len != sizeof(struct ipt_getinfo))
953		return -EINVAL;
954
955	if (copy_from_user(name, user, sizeof(name)) != 0)
956		return -EFAULT;
957
958	name[XT_TABLE_MAXNAMELEN-1] = '\0';
959#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
960	if (in_compat_syscall())
961		xt_compat_lock(AF_INET);
962#endif
963	t = xt_request_find_table_lock(net, AF_INET, name);
964	if (!IS_ERR(t)) {
965		struct ipt_getinfo info;
966		const struct xt_table_info *private = t->private;
967#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
968		struct xt_table_info tmp;
969
970		if (in_compat_syscall()) {
971			ret = compat_table_info(private, &tmp);
972			xt_compat_flush_offsets(AF_INET);
973			private = &tmp;
974		}
975#endif
976		memset(&info, 0, sizeof(info));
977		info.valid_hooks = t->valid_hooks;
978		memcpy(info.hook_entry, private->hook_entry,
979		       sizeof(info.hook_entry));
980		memcpy(info.underflow, private->underflow,
981		       sizeof(info.underflow));
982		info.num_entries = private->number;
983		info.size = private->size;
984		strcpy(info.name, name);
985
986		if (copy_to_user(user, &info, *len) != 0)
987			ret = -EFAULT;
988		else
989			ret = 0;
990
991		xt_table_unlock(t);
992		module_put(t->me);
993	} else
994		ret = PTR_ERR(t);
995#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
996	if (in_compat_syscall())
997		xt_compat_unlock(AF_INET);
998#endif
999	return ret;
1000}
1001
1002static int
1003get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1004	    const int *len)
1005{
1006	int ret;
1007	struct ipt_get_entries get;
1008	struct xt_table *t;
1009
1010	if (*len < sizeof(get))
1011		return -EINVAL;
1012	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1013		return -EFAULT;
1014	if (*len != sizeof(struct ipt_get_entries) + get.size)
1015		return -EINVAL;
1016	get.name[sizeof(get.name) - 1] = '\0';
1017
1018	t = xt_find_table_lock(net, AF_INET, get.name);
1019	if (!IS_ERR(t)) {
1020		const struct xt_table_info *private = t->private;
1021		if (get.size == private->size)
1022			ret = copy_entries_to_user(private->size,
1023						   t, uptr->entrytable);
1024		else
1025			ret = -EAGAIN;
1026
1027		module_put(t->me);
1028		xt_table_unlock(t);
1029	} else
1030		ret = PTR_ERR(t);
1031
1032	return ret;
1033}
1034
1035static int
1036__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1037	     struct xt_table_info *newinfo, unsigned int num_counters,
1038	     void __user *counters_ptr)
1039{
1040	int ret;
1041	struct xt_table *t;
1042	struct xt_table_info *oldinfo;
1043	struct xt_counters *counters;
1044	struct ipt_entry *iter;
1045
1046	counters = xt_counters_alloc(num_counters);
1047	if (!counters) {
1048		ret = -ENOMEM;
1049		goto out;
1050	}
1051
1052	t = xt_request_find_table_lock(net, AF_INET, name);
1053	if (IS_ERR(t)) {
1054		ret = PTR_ERR(t);
1055		goto free_newinfo_counters_untrans;
1056	}
1057
1058	/* You lied! */
1059	if (valid_hooks != t->valid_hooks) {
1060		ret = -EINVAL;
1061		goto put_module;
1062	}
1063
1064	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1065	if (!oldinfo)
1066		goto put_module;
1067
1068	/* Update module usage count based on number of rules */
1069	if ((oldinfo->number > oldinfo->initial_entries) ||
1070	    (newinfo->number <= oldinfo->initial_entries))
1071		module_put(t->me);
1072	if ((oldinfo->number > oldinfo->initial_entries) &&
1073	    (newinfo->number <= oldinfo->initial_entries))
1074		module_put(t->me);
1075
1076	xt_table_unlock(t);
1077
1078	get_old_counters(oldinfo, counters);
1079
1080	/* Decrease module usage counts and free resource */
1081	xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
1082		cleanup_entry(iter, net);
1083
1084	xt_free_table_info(oldinfo);
1085	if (copy_to_user(counters_ptr, counters,
1086			 sizeof(struct xt_counters) * num_counters) != 0) {
1087		/* Silent error, can't fail, new table is already in place */
1088		net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
1089	}
1090	vfree(counters);
1091	return 0;
1092
1093 put_module:
1094	module_put(t->me);
1095	xt_table_unlock(t);
1096 free_newinfo_counters_untrans:
1097	vfree(counters);
1098 out:
1099	return ret;
1100}
1101
1102static int
1103do_replace(struct net *net, sockptr_t arg, unsigned int len)
1104{
1105	int ret;
1106	struct ipt_replace tmp;
1107	struct xt_table_info *newinfo;
1108	void *loc_cpu_entry;
1109	struct ipt_entry *iter;
1110
1111	if (len < sizeof(tmp))
1112		return -EINVAL;
1113	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
1114		return -EFAULT;
1115
1116	/* overflow check */
1117	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1118		return -ENOMEM;
1119	if (tmp.num_counters == 0)
1120		return -EINVAL;
1121	if ((u64)len < (u64)tmp.size + sizeof(tmp))
1122		return -EINVAL;
1123
1124	tmp.name[sizeof(tmp.name)-1] = 0;
1125
1126	newinfo = xt_alloc_table_info(tmp.size);
1127	if (!newinfo)
1128		return -ENOMEM;
1129
1130	loc_cpu_entry = newinfo->entries;
1131	if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
1132			tmp.size) != 0) {
1133		ret = -EFAULT;
1134		goto free_newinfo;
1135	}
1136
1137	ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
1138	if (ret != 0)
1139		goto free_newinfo;
1140
1141	ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1142			   tmp.num_counters, tmp.counters);
1143	if (ret)
1144		goto free_newinfo_untrans;
1145	return 0;
1146
1147 free_newinfo_untrans:
1148	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1149		cleanup_entry(iter, net);
1150 free_newinfo:
1151	xt_free_table_info(newinfo);
1152	return ret;
1153}
1154
1155static int
1156do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
1157{
1158	unsigned int i;
1159	struct xt_counters_info tmp;
1160	struct xt_counters *paddc;
1161	struct xt_table *t;
1162	const struct xt_table_info *private;
1163	int ret = 0;
1164	struct ipt_entry *iter;
1165	unsigned int addend;
1166
1167	paddc = xt_copy_counters(arg, len, &tmp);
1168	if (IS_ERR(paddc))
1169		return PTR_ERR(paddc);
1170
1171	t = xt_find_table_lock(net, AF_INET, tmp.name);
1172	if (IS_ERR(t)) {
1173		ret = PTR_ERR(t);
1174		goto free;
1175	}
1176
1177	local_bh_disable();
1178	private = t->private;
1179	if (private->number != tmp.num_counters) {
1180		ret = -EINVAL;
1181		goto unlock_up_free;
1182	}
1183
1184	i = 0;
1185	addend = xt_write_recseq_begin();
1186	xt_entry_foreach(iter, private->entries, private->size) {
1187		struct xt_counters *tmp;
1188
1189		tmp = xt_get_this_cpu_counter(&iter->counters);
1190		ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
1191		++i;
1192	}
1193	xt_write_recseq_end(addend);
1194 unlock_up_free:
1195	local_bh_enable();
1196	xt_table_unlock(t);
1197	module_put(t->me);
1198 free:
1199	vfree(paddc);
1200
1201	return ret;
1202}
1203
1204#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1205struct compat_ipt_replace {
1206	char			name[XT_TABLE_MAXNAMELEN];
1207	u32			valid_hooks;
1208	u32			num_entries;
1209	u32			size;
1210	u32			hook_entry[NF_INET_NUMHOOKS];
1211	u32			underflow[NF_INET_NUMHOOKS];
1212	u32			num_counters;
1213	compat_uptr_t		counters;	/* struct xt_counters * */
1214	struct compat_ipt_entry	entries[];
1215};
1216
1217static int
1218compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1219			  unsigned int *size, struct xt_counters *counters,
1220			  unsigned int i)
1221{
1222	struct xt_entry_target *t;
1223	struct compat_ipt_entry __user *ce;
1224	u_int16_t target_offset, next_offset;
1225	compat_uint_t origsize;
1226	const struct xt_entry_match *ematch;
1227	int ret = 0;
1228
1229	origsize = *size;
1230	ce = *dstptr;
1231	if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
1232	    copy_to_user(&ce->counters, &counters[i],
1233	    sizeof(counters[i])) != 0)
1234		return -EFAULT;
1235
1236	*dstptr += sizeof(struct compat_ipt_entry);
1237	*size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1238
1239	xt_ematch_foreach(ematch, e) {
1240		ret = xt_compat_match_to_user(ematch, dstptr, size);
1241		if (ret != 0)
1242			return ret;
1243	}
1244	target_offset = e->target_offset - (origsize - *size);
1245	t = ipt_get_target(e);
1246	ret = xt_compat_target_to_user(t, dstptr, size);
1247	if (ret)
1248		return ret;
1249	next_offset = e->next_offset - (origsize - *size);
1250	if (put_user(target_offset, &ce->target_offset) != 0 ||
1251	    put_user(next_offset, &ce->next_offset) != 0)
1252		return -EFAULT;
1253	return 0;
1254}
1255
1256static int
1257compat_find_calc_match(struct xt_entry_match *m,
1258		       const struct ipt_ip *ip,
1259		       int *size)
1260{
1261	struct xt_match *match;
1262
1263	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1264				      m->u.user.revision);
1265	if (IS_ERR(match))
1266		return PTR_ERR(match);
1267
1268	m->u.kernel.match = match;
1269	*size += xt_compat_match_offset(match);
1270	return 0;
1271}
1272
1273static void compat_release_entry(struct compat_ipt_entry *e)
1274{
1275	struct xt_entry_target *t;
1276	struct xt_entry_match *ematch;
1277
1278	/* Cleanup all matches */
1279	xt_ematch_foreach(ematch, e)
1280		module_put(ematch->u.kernel.match->me);
1281	t = compat_ipt_get_target(e);
1282	module_put(t->u.kernel.target->me);
1283}
1284
1285static int
1286check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1287				  struct xt_table_info *newinfo,
1288				  unsigned int *size,
1289				  const unsigned char *base,
1290				  const unsigned char *limit)
1291{
1292	struct xt_entry_match *ematch;
1293	struct xt_entry_target *t;
1294	struct xt_target *target;
1295	unsigned int entry_offset;
1296	unsigned int j;
1297	int ret, off;
1298
1299	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1300	    (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
1301	    (unsigned char *)e + e->next_offset > limit)
1302		return -EINVAL;
1303
1304	if (e->next_offset < sizeof(struct compat_ipt_entry) +
1305			     sizeof(struct compat_xt_entry_target))
1306		return -EINVAL;
1307
1308	if (!ip_checkentry(&e->ip))
1309		return -EINVAL;
1310
1311	ret = xt_compat_check_entry_offsets(e, e->elems,
1312					    e->target_offset, e->next_offset);
1313	if (ret)
1314		return ret;
1315
1316	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1317	entry_offset = (void *)e - (void *)base;
1318	j = 0;
1319	xt_ematch_foreach(ematch, e) {
1320		ret = compat_find_calc_match(ematch, &e->ip, &off);
1321		if (ret != 0)
1322			goto release_matches;
1323		++j;
1324	}
1325
1326	t = compat_ipt_get_target(e);
1327	target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1328					t->u.user.revision);
1329	if (IS_ERR(target)) {
1330		ret = PTR_ERR(target);
1331		goto release_matches;
1332	}
1333	t->u.kernel.target = target;
1334
1335	off += xt_compat_target_offset(target);
1336	*size += off;
1337	ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1338	if (ret)
1339		goto out;
1340
1341	return 0;
1342
1343out:
1344	module_put(t->u.kernel.target->me);
1345release_matches:
1346	xt_ematch_foreach(ematch, e) {
1347		if (j-- == 0)
1348			break;
1349		module_put(ematch->u.kernel.match->me);
1350	}
1351	return ret;
1352}
1353
1354static void
1355compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1356			    unsigned int *size,
1357			    struct xt_table_info *newinfo, unsigned char *base)
1358{
1359	struct xt_entry_target *t;
1360	struct ipt_entry *de;
1361	unsigned int origsize;
1362	int h;
1363	struct xt_entry_match *ematch;
1364
1365	origsize = *size;
1366	de = *dstptr;
1367	memcpy(de, e, sizeof(struct ipt_entry));
1368	memcpy(&de->counters, &e->counters, sizeof(e->counters));
1369
1370	*dstptr += sizeof(struct ipt_entry);
1371	*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1372
1373	xt_ematch_foreach(ematch, e)
1374		xt_compat_match_from_user(ematch, dstptr, size);
1375
1376	de->target_offset = e->target_offset - (origsize - *size);
1377	t = compat_ipt_get_target(e);
1378	xt_compat_target_from_user(t, dstptr, size);
1379
1380	de->next_offset = e->next_offset - (origsize - *size);
1381
1382	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1383		if ((unsigned char *)de - base < newinfo->hook_entry[h])
1384			newinfo->hook_entry[h] -= origsize - *size;
1385		if ((unsigned char *)de - base < newinfo->underflow[h])
1386			newinfo->underflow[h] -= origsize - *size;
1387	}
1388}
1389
1390static int
1391translate_compat_table(struct net *net,
1392		       struct xt_table_info **pinfo,
1393		       void **pentry0,
1394		       const struct compat_ipt_replace *compatr)
1395{
1396	unsigned int i, j;
1397	struct xt_table_info *newinfo, *info;
1398	void *pos, *entry0, *entry1;
1399	struct compat_ipt_entry *iter0;
1400	struct ipt_replace repl;
1401	unsigned int size;
1402	int ret;
1403
1404	info = *pinfo;
1405	entry0 = *pentry0;
1406	size = compatr->size;
1407	info->number = compatr->num_entries;
1408
1409	j = 0;
1410	xt_compat_lock(AF_INET);
1411	ret = xt_compat_init_offsets(AF_INET, compatr->num_entries);
1412	if (ret)
1413		goto out_unlock;
1414	/* Walk through entries, checking offsets. */
1415	xt_entry_foreach(iter0, entry0, compatr->size) {
1416		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1417							entry0,
1418							entry0 + compatr->size);
1419		if (ret != 0)
1420			goto out_unlock;
1421		++j;
1422	}
1423
1424	ret = -EINVAL;
1425	if (j != compatr->num_entries)
1426		goto out_unlock;
1427
1428	ret = -ENOMEM;
1429	newinfo = xt_alloc_table_info(size);
1430	if (!newinfo)
1431		goto out_unlock;
1432
1433	memset(newinfo->entries, 0, size);
1434
1435	newinfo->number = compatr->num_entries;
1436	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1437		newinfo->hook_entry[i] = compatr->hook_entry[i];
1438		newinfo->underflow[i] = compatr->underflow[i];
1439	}
1440	entry1 = newinfo->entries;
1441	pos = entry1;
1442	size = compatr->size;
1443	xt_entry_foreach(iter0, entry0, compatr->size)
1444		compat_copy_entry_from_user(iter0, &pos, &size,
1445					    newinfo, entry1);
1446
1447	/* all module references in entry0 are now gone.
1448	 * entry1/newinfo contains a 64bit ruleset that looks exactly as
1449	 * generated by 64bit userspace.
1450	 *
1451	 * Call standard translate_table() to validate all hook_entrys,
1452	 * underflows, check for loops, etc.
1453	 */
1454	xt_compat_flush_offsets(AF_INET);
1455	xt_compat_unlock(AF_INET);
1456
1457	memcpy(&repl, compatr, sizeof(*compatr));
1458
1459	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1460		repl.hook_entry[i] = newinfo->hook_entry[i];
1461		repl.underflow[i] = newinfo->underflow[i];
1462	}
1463
1464	repl.num_counters = 0;
1465	repl.counters = NULL;
1466	repl.size = newinfo->size;
1467	ret = translate_table(net, newinfo, entry1, &repl);
1468	if (ret)
1469		goto free_newinfo;
1470
1471	*pinfo = newinfo;
1472	*pentry0 = entry1;
1473	xt_free_table_info(info);
1474	return 0;
1475
1476free_newinfo:
1477	xt_free_table_info(newinfo);
1478	return ret;
1479out_unlock:
1480	xt_compat_flush_offsets(AF_INET);
1481	xt_compat_unlock(AF_INET);
1482	xt_entry_foreach(iter0, entry0, compatr->size) {
1483		if (j-- == 0)
1484			break;
1485		compat_release_entry(iter0);
1486	}
1487	return ret;
1488}
1489
1490static int
1491compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
1492{
1493	int ret;
1494	struct compat_ipt_replace tmp;
1495	struct xt_table_info *newinfo;
1496	void *loc_cpu_entry;
1497	struct ipt_entry *iter;
1498
1499	if (len < sizeof(tmp))
1500		return -EINVAL;
1501	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
1502		return -EFAULT;
1503
1504	/* overflow check */
1505	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1506		return -ENOMEM;
1507	if (tmp.num_counters == 0)
1508		return -EINVAL;
1509	if ((u64)len < (u64)tmp.size + sizeof(tmp))
1510		return -EINVAL;
1511
1512	tmp.name[sizeof(tmp.name)-1] = 0;
1513
1514	newinfo = xt_alloc_table_info(tmp.size);
1515	if (!newinfo)
1516		return -ENOMEM;
1517
1518	loc_cpu_entry = newinfo->entries;
1519	if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
1520			tmp.size) != 0) {
1521		ret = -EFAULT;
1522		goto free_newinfo;
1523	}
1524
1525	ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
1526	if (ret != 0)
1527		goto free_newinfo;
1528
1529	ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1530			   tmp.num_counters, compat_ptr(tmp.counters));
1531	if (ret)
1532		goto free_newinfo_untrans;
1533	return 0;
1534
1535 free_newinfo_untrans:
1536	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1537		cleanup_entry(iter, net);
1538 free_newinfo:
1539	xt_free_table_info(newinfo);
1540	return ret;
1541}
1542
1543struct compat_ipt_get_entries {
1544	char name[XT_TABLE_MAXNAMELEN];
1545	compat_uint_t size;
1546	struct compat_ipt_entry entrytable[];
1547};
1548
1549static int
1550compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1551			    void __user *userptr)
1552{
1553	struct xt_counters *counters;
1554	const struct xt_table_info *private = table->private;
1555	void __user *pos;
1556	unsigned int size;
1557	int ret = 0;
1558	unsigned int i = 0;
1559	struct ipt_entry *iter;
1560
1561	counters = alloc_counters(table);
1562	if (IS_ERR(counters))
1563		return PTR_ERR(counters);
1564
1565	pos = userptr;
1566	size = total_size;
1567	xt_entry_foreach(iter, private->entries, total_size) {
1568		ret = compat_copy_entry_to_user(iter, &pos,
1569						&size, counters, i++);
1570		if (ret != 0)
1571			break;
1572	}
1573
1574	vfree(counters);
1575	return ret;
1576}
1577
1578static int
1579compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1580		   int *len)
1581{
1582	int ret;
1583	struct compat_ipt_get_entries get;
1584	struct xt_table *t;
1585
1586	if (*len < sizeof(get))
1587		return -EINVAL;
1588
1589	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1590		return -EFAULT;
1591
1592	if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
1593		return -EINVAL;
1594
1595	get.name[sizeof(get.name) - 1] = '\0';
1596
1597	xt_compat_lock(AF_INET);
1598	t = xt_find_table_lock(net, AF_INET, get.name);
1599	if (!IS_ERR(t)) {
1600		const struct xt_table_info *private = t->private;
1601		struct xt_table_info info;
1602		ret = compat_table_info(private, &info);
1603		if (!ret && get.size == info.size)
1604			ret = compat_copy_entries_to_user(private->size,
1605							  t, uptr->entrytable);
1606		else if (!ret)
1607			ret = -EAGAIN;
1608
1609		xt_compat_flush_offsets(AF_INET);
1610		module_put(t->me);
1611		xt_table_unlock(t);
1612	} else
1613		ret = PTR_ERR(t);
1614
1615	xt_compat_unlock(AF_INET);
1616	return ret;
1617}
1618#endif
1619
1620static int
1621do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
1622{
1623	int ret;
1624
1625	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1626		return -EPERM;
1627
1628	switch (cmd) {
1629	case IPT_SO_SET_REPLACE:
1630#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1631		if (in_compat_syscall())
1632			ret = compat_do_replace(sock_net(sk), arg, len);
1633		else
1634#endif
1635			ret = do_replace(sock_net(sk), arg, len);
1636		break;
1637
1638	case IPT_SO_SET_ADD_COUNTERS:
1639		ret = do_add_counters(sock_net(sk), arg, len);
1640		break;
1641
1642	default:
1643		ret = -EINVAL;
1644	}
1645
1646	return ret;
1647}
1648
1649static int
1650do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1651{
1652	int ret;
1653
1654	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1655		return -EPERM;
1656
1657	switch (cmd) {
1658	case IPT_SO_GET_INFO:
1659		ret = get_info(sock_net(sk), user, len);
1660		break;
1661
1662	case IPT_SO_GET_ENTRIES:
1663#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1664		if (in_compat_syscall())
1665			ret = compat_get_entries(sock_net(sk), user, len);
1666		else
1667#endif
1668			ret = get_entries(sock_net(sk), user, len);
1669		break;
1670
1671	case IPT_SO_GET_REVISION_MATCH:
1672	case IPT_SO_GET_REVISION_TARGET: {
1673		struct xt_get_revision rev;
1674		int target;
1675
1676		if (*len != sizeof(rev)) {
1677			ret = -EINVAL;
1678			break;
1679		}
1680		if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1681			ret = -EFAULT;
1682			break;
1683		}
1684		rev.name[sizeof(rev.name)-1] = 0;
1685
1686		if (cmd == IPT_SO_GET_REVISION_TARGET)
1687			target = 1;
1688		else
1689			target = 0;
1690
1691		try_then_request_module(xt_find_revision(AF_INET, rev.name,
1692							 rev.revision,
1693							 target, &ret),
1694					"ipt_%s", rev.name);
1695		break;
1696	}
1697
1698	default:
1699		ret = -EINVAL;
1700	}
1701
1702	return ret;
1703}
1704
1705static void __ipt_unregister_table(struct net *net, struct xt_table *table)
1706{
1707	struct xt_table_info *private;
1708	void *loc_cpu_entry;
1709	struct module *table_owner = table->me;
1710	struct ipt_entry *iter;
1711
1712	private = xt_unregister_table(table);
1713
1714	/* Decrease module usage counts and free resources */
1715	loc_cpu_entry = private->entries;
1716	xt_entry_foreach(iter, loc_cpu_entry, private->size)
1717		cleanup_entry(iter, net);
1718	if (private->number > private->initial_entries)
1719		module_put(table_owner);
1720	xt_free_table_info(private);
1721}
1722
1723int ipt_register_table(struct net *net, const struct xt_table *table,
1724		       const struct ipt_replace *repl,
1725		       const struct nf_hook_ops *template_ops)
1726{
1727	struct nf_hook_ops *ops;
1728	unsigned int num_ops;
1729	int ret, i;
1730	struct xt_table_info *newinfo;
1731	struct xt_table_info bootstrap = {0};
1732	void *loc_cpu_entry;
1733	struct xt_table *new_table;
1734
1735	newinfo = xt_alloc_table_info(repl->size);
1736	if (!newinfo)
1737		return -ENOMEM;
1738
1739	loc_cpu_entry = newinfo->entries;
1740	memcpy(loc_cpu_entry, repl->entries, repl->size);
1741
1742	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
1743	if (ret != 0) {
1744		xt_free_table_info(newinfo);
1745		return ret;
1746	}
1747
1748	new_table = xt_register_table(net, table, &bootstrap, newinfo);
1749	if (IS_ERR(new_table)) {
1750		struct ipt_entry *iter;
1751
1752		xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1753			cleanup_entry(iter, net);
1754		xt_free_table_info(newinfo);
1755		return PTR_ERR(new_table);
1756	}
1757
1758	/* No template? No need to do anything. This is used by 'nat' table, it registers
1759	 * with the nat core instead of the netfilter core.
1760	 */
1761	if (!template_ops)
1762		return 0;
1763
1764	num_ops = hweight32(table->valid_hooks);
1765	if (num_ops == 0) {
1766		ret = -EINVAL;
1767		goto out_free;
1768	}
1769
1770	ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
1771	if (!ops) {
1772		ret = -ENOMEM;
1773		goto out_free;
1774	}
1775
1776	for (i = 0; i < num_ops; i++)
1777		ops[i].priv = new_table;
1778
1779	new_table->ops = ops;
1780
1781	ret = nf_register_net_hooks(net, ops, num_ops);
1782	if (ret != 0)
1783		goto out_free;
1784
1785	return ret;
1786
1787out_free:
1788	__ipt_unregister_table(net, new_table);
1789	return ret;
1790}
1791
1792void ipt_unregister_table_pre_exit(struct net *net, const char *name)
1793{
1794	struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
1795
1796	if (table)
1797		nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
1798}
1799
1800void ipt_unregister_table_exit(struct net *net, const char *name)
1801{
1802	struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
1803
1804	if (table)
1805		__ipt_unregister_table(net, table);
1806}
1807
1808static struct xt_target ipt_builtin_tg[] __read_mostly = {
1809	{
1810		.name             = XT_STANDARD_TARGET,
1811		.targetsize       = sizeof(int),
1812		.family           = NFPROTO_IPV4,
1813#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1814		.compatsize       = sizeof(compat_int_t),
1815		.compat_from_user = compat_standard_from_user,
1816		.compat_to_user   = compat_standard_to_user,
1817#endif
1818	},
1819	{
1820		.name             = XT_ERROR_TARGET,
1821		.target           = ipt_error,
1822		.targetsize       = XT_FUNCTION_MAXNAMELEN,
1823		.family           = NFPROTO_IPV4,
1824	},
1825};
1826
1827static struct nf_sockopt_ops ipt_sockopts = {
1828	.pf		= PF_INET,
1829	.set_optmin	= IPT_BASE_CTL,
1830	.set_optmax	= IPT_SO_SET_MAX+1,
1831	.set		= do_ipt_set_ctl,
1832	.get_optmin	= IPT_BASE_CTL,
1833	.get_optmax	= IPT_SO_GET_MAX+1,
1834	.get		= do_ipt_get_ctl,
1835	.owner		= THIS_MODULE,
1836};
1837
1838static int __net_init ip_tables_net_init(struct net *net)
1839{
1840	return xt_proto_init(net, NFPROTO_IPV4);
1841}
1842
1843static void __net_exit ip_tables_net_exit(struct net *net)
1844{
1845	xt_proto_fini(net, NFPROTO_IPV4);
1846}
1847
1848static struct pernet_operations ip_tables_net_ops = {
1849	.init = ip_tables_net_init,
1850	.exit = ip_tables_net_exit,
1851};
1852
1853static int __init ip_tables_init(void)
1854{
1855	int ret;
1856
1857	ret = register_pernet_subsys(&ip_tables_net_ops);
1858	if (ret < 0)
1859		goto err1;
1860
1861	/* No one else will be downing sem now, so we won't sleep */
1862	ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1863	if (ret < 0)
1864		goto err2;
1865
1866	/* Register setsockopt */
1867	ret = nf_register_sockopt(&ipt_sockopts);
1868	if (ret < 0)
1869		goto err4;
1870
1871	return 0;
1872
1873err4:
1874	xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1875err2:
1876	unregister_pernet_subsys(&ip_tables_net_ops);
1877err1:
1878	return ret;
1879}
1880
1881static void __exit ip_tables_fini(void)
1882{
1883	nf_unregister_sockopt(&ipt_sockopts);
1884
1885	xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1886	unregister_pernet_subsys(&ip_tables_net_ops);
1887}
1888
1889EXPORT_SYMBOL(ipt_register_table);
1890EXPORT_SYMBOL(ipt_unregister_table_pre_exit);
1891EXPORT_SYMBOL(ipt_unregister_table_exit);
1892EXPORT_SYMBOL(ipt_do_table);
1893module_init(ip_tables_init);
1894module_exit(ip_tables_fini);
1895