1
2/* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/types.h>
11#include <linux/ip.h>
12#include <linux/netfilter.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/icmp.h>
16#include <linux/sysctl.h>
17#include <net/route.h>
18#include <net/ip.h>
19
20#include <linux/netfilter_ipv4.h>
21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_helper.h>
23#include <net/netfilter/nf_conntrack_l4proto.h>
24#include <net/netfilter/nf_conntrack_l3proto.h>
25#include <net/netfilter/nf_conntrack_zones.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
28#include <net/netfilter/nf_nat_helper.h>
29#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
30#include <net/netfilter/nf_log.h>
31
32int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
33			      struct nf_conn *ct,
34			      enum ip_conntrack_info ctinfo);
35EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
36
37static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
38			      struct nf_conntrack_tuple *tuple)
39{
40	const __be32 *ap;
41	__be32 _addrs[2];
42	ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
43				sizeof(u_int32_t) * 2, _addrs);
44	if (ap == NULL)
45		return false;
46
47	tuple->src.u3.ip = ap[0];
48	tuple->dst.u3.ip = ap[1];
49
50	return true;
51}
52
53static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
54			      const struct nf_conntrack_tuple *orig)
55{
56	tuple->src.u3.ip = orig->dst.u3.ip;
57	tuple->dst.u3.ip = orig->src.u3.ip;
58
59	return true;
60}
61
62static int ipv4_print_tuple(struct seq_file *s,
63			    const struct nf_conntrack_tuple *tuple)
64{
65	return seq_printf(s, "src=%pI4 dst=%pI4 ",
66			  &tuple->src.u3.ip, &tuple->dst.u3.ip);
67}
68
69static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
70			    unsigned int *dataoff, u_int8_t *protonum)
71{
72	const struct iphdr *iph;
73	struct iphdr _iph;
74
75	iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
76	if (iph == NULL)
77		return -NF_DROP;
78
79	/* Conntrack defragments packets, we might still see fragments
80	 * inside ICMP packets though. */
81	if (iph->frag_off & htons(IP_OFFSET))
82		return -NF_DROP;
83
84	*dataoff = nhoff + (iph->ihl << 2);
85	*protonum = iph->protocol;
86
87	return NF_ACCEPT;
88}
89
90static unsigned int ipv4_confirm(unsigned int hooknum,
91				 struct sk_buff *skb,
92				 const struct net_device *in,
93				 const struct net_device *out,
94				 int (*okfn)(struct sk_buff *))
95{
96	struct nf_conn *ct;
97	enum ip_conntrack_info ctinfo;
98	const struct nf_conn_help *help;
99	const struct nf_conntrack_helper *helper;
100	unsigned int ret;
101
102	/* This is where we call the helper: as the packet goes out. */
103	ct = nf_ct_get(skb, &ctinfo);
104	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
105		goto out;
106
107	help = nfct_help(ct);
108	if (!help)
109		goto out;
110
111	/* rcu_read_lock()ed by nf_hook_slow */
112	helper = rcu_dereference(help->helper);
113	if (!helper)
114		goto out;
115
116	ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
117			   ct, ctinfo);
118	if (ret != NF_ACCEPT) {
119		nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL,
120			      "nf_ct_%s: dropping packet", helper->name);
121		return ret;
122	}
123
124	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
125		typeof(nf_nat_seq_adjust_hook) seq_adjust;
126
127		seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
128		if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) {
129			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
130			return NF_DROP;
131		}
132	}
133out:
134	/* We've seen it coming out the other side: confirm it */
135	return nf_conntrack_confirm(skb);
136}
137
138static unsigned int ipv4_conntrack_in(unsigned int hooknum,
139				      struct sk_buff *skb,
140				      const struct net_device *in,
141				      const struct net_device *out,
142				      int (*okfn)(struct sk_buff *))
143{
144	return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
145}
146
147static unsigned int ipv4_conntrack_local(unsigned int hooknum,
148					 struct sk_buff *skb,
149					 const struct net_device *in,
150					 const struct net_device *out,
151					 int (*okfn)(struct sk_buff *))
152{
153	/* root is playing with raw sockets. */
154	if (skb->len < sizeof(struct iphdr) ||
155	    ip_hdrlen(skb) < sizeof(struct iphdr))
156		return NF_ACCEPT;
157	return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
158}
159
160/* Connection tracking may drop packets, but never alters them, so
161   make it the first hook. */
162static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
163	{
164		.hook		= ipv4_conntrack_in,
165		.owner		= THIS_MODULE,
166		.pf		= NFPROTO_IPV4,
167		.hooknum	= NF_INET_PRE_ROUTING,
168		.priority	= NF_IP_PRI_CONNTRACK,
169	},
170	{
171		.hook		= ipv4_conntrack_local,
172		.owner		= THIS_MODULE,
173		.pf		= NFPROTO_IPV4,
174		.hooknum	= NF_INET_LOCAL_OUT,
175		.priority	= NF_IP_PRI_CONNTRACK,
176	},
177	{
178		.hook		= ipv4_confirm,
179		.owner		= THIS_MODULE,
180		.pf		= NFPROTO_IPV4,
181		.hooknum	= NF_INET_POST_ROUTING,
182		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
183	},
184	{
185		.hook		= ipv4_confirm,
186		.owner		= THIS_MODULE,
187		.pf		= NFPROTO_IPV4,
188		.hooknum	= NF_INET_LOCAL_IN,
189		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
190	},
191};
192
193#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
194static int log_invalid_proto_min = 0;
195static int log_invalid_proto_max = 255;
196
197static ctl_table ip_ct_sysctl_table[] = {
198	{
199		.procname	= "ip_conntrack_max",
200		.data		= &nf_conntrack_max,
201		.maxlen		= sizeof(int),
202		.mode		= 0644,
203		.proc_handler	= proc_dointvec,
204	},
205	{
206		.procname	= "ip_conntrack_count",
207		.data		= &init_net.ct.count,
208		.maxlen		= sizeof(int),
209		.mode		= 0444,
210		.proc_handler	= proc_dointvec,
211	},
212	{
213		.procname	= "ip_conntrack_buckets",
214		.data		= &init_net.ct.htable_size,
215		.maxlen		= sizeof(unsigned int),
216		.mode		= 0444,
217		.proc_handler	= proc_dointvec,
218	},
219	{
220		.procname	= "ip_conntrack_checksum",
221		.data		= &init_net.ct.sysctl_checksum,
222		.maxlen		= sizeof(int),
223		.mode		= 0644,
224		.proc_handler	= proc_dointvec,
225	},
226	{
227		.procname	= "ip_conntrack_log_invalid",
228		.data		= &init_net.ct.sysctl_log_invalid,
229		.maxlen		= sizeof(unsigned int),
230		.mode		= 0644,
231		.proc_handler	= proc_dointvec_minmax,
232		.extra1		= &log_invalid_proto_min,
233		.extra2		= &log_invalid_proto_max,
234	},
235	{ }
236};
237#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
238
239/* Fast function for those who don't want to parse /proc (and I don't
240   blame them). */
241/* Reversing the socket's dst/src point of view gives us the reply
242   mapping. */
243static int
244getorigdst(struct sock *sk, int optval, void __user *user, int *len)
245{
246	const struct inet_sock *inet = inet_sk(sk);
247	const struct nf_conntrack_tuple_hash *h;
248	struct nf_conntrack_tuple tuple;
249
250	memset(&tuple, 0, sizeof(tuple));
251	tuple.src.u3.ip = inet->inet_rcv_saddr;
252	tuple.src.u.tcp.port = inet->inet_sport;
253	tuple.dst.u3.ip = inet->inet_daddr;
254	tuple.dst.u.tcp.port = inet->inet_dport;
255	tuple.src.l3num = PF_INET;
256	tuple.dst.protonum = sk->sk_protocol;
257
258	/* We only do TCP and SCTP at the moment: is there a better way? */
259	if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
260		pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
261		return -ENOPROTOOPT;
262	}
263
264	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
265		pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
266			 *len, sizeof(struct sockaddr_in));
267		return -EINVAL;
268	}
269
270	h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
271	if (h) {
272		struct sockaddr_in sin;
273		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
274
275		sin.sin_family = AF_INET;
276		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
277			.tuple.dst.u.tcp.port;
278		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
279			.tuple.dst.u3.ip;
280		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
281
282		pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
283			 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
284		nf_ct_put(ct);
285		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
286			return -EFAULT;
287		else
288			return 0;
289	}
290	pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
291		 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
292		 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
293	return -ENOENT;
294}
295
296#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
297
298#include <linux/netfilter/nfnetlink.h>
299#include <linux/netfilter/nfnetlink_conntrack.h>
300
301static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
302				const struct nf_conntrack_tuple *tuple)
303{
304	NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
305	NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
306	return 0;
307
308nla_put_failure:
309	return -1;
310}
311
312static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
313	[CTA_IP_V4_SRC]	= { .type = NLA_U32 },
314	[CTA_IP_V4_DST]	= { .type = NLA_U32 },
315};
316
317static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
318				struct nf_conntrack_tuple *t)
319{
320	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
321		return -EINVAL;
322
323	t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
324	t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
325
326	return 0;
327}
328
329static int ipv4_nlattr_tuple_size(void)
330{
331	return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
332}
333#endif
334
335static struct nf_sockopt_ops so_getorigdst = {
336	.pf		= PF_INET,
337	.get_optmin	= SO_ORIGINAL_DST,
338	.get_optmax	= SO_ORIGINAL_DST+1,
339	.get		= &getorigdst,
340	.owner		= THIS_MODULE,
341};
342
343struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
344	.l3proto	 = PF_INET,
345	.name		 = "ipv4",
346	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
347	.invert_tuple	 = ipv4_invert_tuple,
348	.print_tuple	 = ipv4_print_tuple,
349	.get_l4proto	 = ipv4_get_l4proto,
350#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
351	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
352	.nlattr_tuple_size = ipv4_nlattr_tuple_size,
353	.nlattr_to_tuple = ipv4_nlattr_to_tuple,
354	.nla_policy	 = ipv4_nla_policy,
355#endif
356#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
357	.ctl_table_path  = nf_net_ipv4_netfilter_sysctl_path,
358	.ctl_table	 = ip_ct_sysctl_table,
359#endif
360	.me		 = THIS_MODULE,
361};
362
363module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
364		  &nf_conntrack_htable_size, 0600);
365
366MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
367MODULE_ALIAS("ip_conntrack");
368MODULE_LICENSE("GPL");
369
370static int __init nf_conntrack_l3proto_ipv4_init(void)
371{
372	int ret = 0;
373
374	need_conntrack();
375	nf_defrag_ipv4_enable();
376
377	ret = nf_register_sockopt(&so_getorigdst);
378	if (ret < 0) {
379		printk(KERN_ERR "Unable to register netfilter socket option\n");
380		return ret;
381	}
382
383	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
384	if (ret < 0) {
385		pr_err("nf_conntrack_ipv4: can't register tcp.\n");
386		goto cleanup_sockopt;
387	}
388
389	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
390	if (ret < 0) {
391		pr_err("nf_conntrack_ipv4: can't register udp.\n");
392		goto cleanup_tcp;
393	}
394
395	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
396	if (ret < 0) {
397		pr_err("nf_conntrack_ipv4: can't register icmp.\n");
398		goto cleanup_udp;
399	}
400
401	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
402	if (ret < 0) {
403		pr_err("nf_conntrack_ipv4: can't register ipv4\n");
404		goto cleanup_icmp;
405	}
406
407	ret = nf_register_hooks(ipv4_conntrack_ops,
408				ARRAY_SIZE(ipv4_conntrack_ops));
409	if (ret < 0) {
410		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
411		goto cleanup_ipv4;
412	}
413#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
414	ret = nf_conntrack_ipv4_compat_init();
415	if (ret < 0)
416		goto cleanup_hooks;
417#endif
418	return ret;
419#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
420 cleanup_hooks:
421	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
422#endif
423 cleanup_ipv4:
424	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
425 cleanup_icmp:
426	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
427 cleanup_udp:
428	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
429 cleanup_tcp:
430	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
431 cleanup_sockopt:
432	nf_unregister_sockopt(&so_getorigdst);
433	return ret;
434}
435
436static void __exit nf_conntrack_l3proto_ipv4_fini(void)
437{
438	synchronize_net();
439#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
440	nf_conntrack_ipv4_compat_fini();
441#endif
442	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
443	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
444	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
445	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
446	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
447	nf_unregister_sockopt(&so_getorigdst);
448}
449
450module_init(nf_conntrack_l3proto_ipv4_init);
451module_exit(nf_conntrack_l3proto_ipv4_fini);
452
453void need_ipv4_conntrack(void)
454{
455	return;
456}
457EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
458