• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/
1/*
2 *	NET3	IP device support routines.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 *	Derived from the IP parts of dev.c 1.0.19
10 * 		Authors:	Ross Biro
11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 *	Additional Authors:
15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 *	Changes:
19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20 *					lists.
21 *		Cyrus Durgin:		updated for kmod
22 *		Matthias Andree:	in devinet_ioctl, compare label and
23 *					address (4.4BSD alias style support),
24 *					fall back to comparing just the label
25 *					if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#include <linux/slab.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58
59#include <net/arp.h>
60#include <net/ip.h>
61#include <net/route.h>
62#include <net/ip_fib.h>
63#include <net/rtnetlink.h>
64#include <net/net_namespace.h>
65
66static struct ipv4_devconf ipv4_devconf = {
67	.data = {
68		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72	},
73};
74
75static struct ipv4_devconf ipv4_devconf_dflt = {
76	.data = {
77		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82	},
83};
84
85#define IPV4_DEVCONF_DFLT(net, attr) \
86	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87
88static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89	[IFA_LOCAL]     	= { .type = NLA_U32 },
90	[IFA_ADDRESS]   	= { .type = NLA_U32 },
91	[IFA_BROADCAST] 	= { .type = NLA_U32 },
92	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93};
94
95static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99			 int destroy);
100#ifdef CONFIG_SYSCTL
101static void devinet_sysctl_register(struct in_device *idev);
102static void devinet_sysctl_unregister(struct in_device *idev);
103#else
104static inline void devinet_sysctl_register(struct in_device *idev)
105{
106}
107static inline void devinet_sysctl_unregister(struct in_device *idev)
108{
109}
110#endif
111
112/* Locks all the inet devices. */
113
114static struct in_ifaddr *inet_alloc_ifa(void)
115{
116	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117}
118
119static void inet_rcu_free_ifa(struct rcu_head *head)
120{
121	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122	if (ifa->ifa_dev)
123		in_dev_put(ifa->ifa_dev);
124	kfree(ifa);
125}
126
127static inline void inet_free_ifa(struct in_ifaddr *ifa)
128{
129	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130}
131
132void in_dev_finish_destroy(struct in_device *idev)
133{
134	struct net_device *dev = idev->dev;
135
136	WARN_ON(idev->ifa_list);
137	WARN_ON(idev->mc_list);
138#ifdef NET_REFCNT_DEBUG
139	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140	       idev, dev ? dev->name : "NIL");
141#endif
142	dev_put(dev);
143	if (!idev->dead)
144		pr_err("Freeing alive in_device %p\n", idev);
145	else
146		kfree(idev);
147}
148EXPORT_SYMBOL(in_dev_finish_destroy);
149
150static struct in_device *inetdev_init(struct net_device *dev)
151{
152	struct in_device *in_dev;
153
154	ASSERT_RTNL();
155
156	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157	if (!in_dev)
158		goto out;
159	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160			sizeof(in_dev->cnf));
161	in_dev->cnf.sysctl = NULL;
162	in_dev->dev = dev;
163	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164	if (!in_dev->arp_parms)
165		goto out_kfree;
166	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167		dev_disable_lro(dev);
168	/* Reference in_dev->dev */
169	dev_hold(dev);
170	/* Account for reference dev->ip_ptr (below) */
171	in_dev_hold(in_dev);
172
173	devinet_sysctl_register(in_dev);
174	ip_mc_init_dev(in_dev);
175	if (dev->flags & IFF_UP)
176		ip_mc_up(in_dev);
177
178	/* we can receive as soon as ip_ptr is set -- do this last */
179	rcu_assign_pointer(dev->ip_ptr, in_dev);
180out:
181	return in_dev;
182out_kfree:
183	kfree(in_dev);
184	in_dev = NULL;
185	goto out;
186}
187
188static void in_dev_rcu_put(struct rcu_head *head)
189{
190	struct in_device *idev = container_of(head, struct in_device, rcu_head);
191	in_dev_put(idev);
192}
193
194static void inetdev_destroy(struct in_device *in_dev)
195{
196	struct in_ifaddr *ifa;
197	struct net_device *dev;
198
199	ASSERT_RTNL();
200
201	dev = in_dev->dev;
202
203	in_dev->dead = 1;
204
205	ip_mc_destroy_dev(in_dev);
206
207	while ((ifa = in_dev->ifa_list) != NULL) {
208		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209		inet_free_ifa(ifa);
210	}
211
212	dev->ip_ptr = NULL;
213
214	devinet_sysctl_unregister(in_dev);
215	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216	arp_ifdown(dev);
217
218	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219}
220
221int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222{
223	rcu_read_lock();
224	for_primary_ifa(in_dev) {
225		if (inet_ifa_match(a, ifa)) {
226			if (!b || inet_ifa_match(b, ifa)) {
227				rcu_read_unlock();
228				return 1;
229			}
230		}
231	} endfor_ifa(in_dev);
232	rcu_read_unlock();
233	return 0;
234}
235
236static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237			 int destroy, struct nlmsghdr *nlh, u32 pid)
238{
239	struct in_ifaddr *promote = NULL;
240	struct in_ifaddr *ifa, *ifa1 = *ifap;
241	struct in_ifaddr *last_prim = in_dev->ifa_list;
242	struct in_ifaddr *prev_prom = NULL;
243	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244
245	ASSERT_RTNL();
246
247	/* 1. Deleting primary ifaddr forces deletion all secondaries
248	 * unless alias promotion is set
249	 **/
250
251	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253
254		while ((ifa = *ifap1) != NULL) {
255			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256			    ifa1->ifa_scope <= ifa->ifa_scope)
257				last_prim = ifa;
258
259			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260			    ifa1->ifa_mask != ifa->ifa_mask ||
261			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
262				ifap1 = &ifa->ifa_next;
263				prev_prom = ifa;
264				continue;
265			}
266
267			if (!do_promote) {
268				*ifap1 = ifa->ifa_next;
269
270				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271				blocking_notifier_call_chain(&inetaddr_chain,
272						NETDEV_DOWN, ifa);
273				inet_free_ifa(ifa);
274			} else {
275				promote = ifa;
276				break;
277			}
278		}
279	}
280
281	/* 2. Unlink it */
282
283	*ifap = ifa1->ifa_next;
284
285	/* 3. Announce address deletion */
286
287	/* Send message first, then call notifier.
288	   At first sight, FIB update triggered by notifier
289	   will refer to already deleted ifaddr, that could confuse
290	   netlink listeners. It is not true: look, gated sees
291	   that route deleted and if it still thinks that ifaddr
292	   is valid, it will try to restore deleted routes... Grr.
293	   So that, this order is correct.
294	 */
295	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297
298	if (promote) {
299
300		if (prev_prom) {
301			prev_prom->ifa_next = promote->ifa_next;
302			promote->ifa_next = last_prim->ifa_next;
303			last_prim->ifa_next = promote;
304		}
305
306		promote->ifa_flags &= ~IFA_F_SECONDARY;
307		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308		blocking_notifier_call_chain(&inetaddr_chain,
309				NETDEV_UP, promote);
310		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311			if (ifa1->ifa_mask != ifa->ifa_mask ||
312			    !inet_ifa_match(ifa1->ifa_address, ifa))
313					continue;
314			fib_add_ifaddr(ifa);
315		}
316
317	}
318	if (destroy)
319		inet_free_ifa(ifa1);
320}
321
322static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323			 int destroy)
324{
325	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326}
327
328static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329			     u32 pid)
330{
331	struct in_device *in_dev = ifa->ifa_dev;
332	struct in_ifaddr *ifa1, **ifap, **last_primary;
333
334	ASSERT_RTNL();
335
336	if (!ifa->ifa_local) {
337		inet_free_ifa(ifa);
338		return 0;
339	}
340
341	ifa->ifa_flags &= ~IFA_F_SECONDARY;
342	last_primary = &in_dev->ifa_list;
343
344	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345	     ifap = &ifa1->ifa_next) {
346		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347		    ifa->ifa_scope <= ifa1->ifa_scope)
348			last_primary = &ifa1->ifa_next;
349		if (ifa1->ifa_mask == ifa->ifa_mask &&
350		    inet_ifa_match(ifa1->ifa_address, ifa)) {
351			if (ifa1->ifa_local == ifa->ifa_local) {
352				inet_free_ifa(ifa);
353				return -EEXIST;
354			}
355			if (ifa1->ifa_scope != ifa->ifa_scope) {
356				inet_free_ifa(ifa);
357				return -EINVAL;
358			}
359			ifa->ifa_flags |= IFA_F_SECONDARY;
360		}
361	}
362
363	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364		net_srandom(ifa->ifa_local);
365		ifap = last_primary;
366	}
367
368	ifa->ifa_next = *ifap;
369	*ifap = ifa;
370
371	/* Send message first, then call notifier.
372	   Notifier will trigger FIB update, so that
373	   listeners of netlink will know about new ifaddr */
374	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376
377	return 0;
378}
379
380static int inet_insert_ifa(struct in_ifaddr *ifa)
381{
382	return __inet_insert_ifa(ifa, NULL, 0);
383}
384
385static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386{
387	struct in_device *in_dev = __in_dev_get_rtnl(dev);
388
389	ASSERT_RTNL();
390
391	if (!in_dev) {
392		inet_free_ifa(ifa);
393		return -ENOBUFS;
394	}
395	ipv4_devconf_setall(in_dev);
396	if (ifa->ifa_dev != in_dev) {
397		WARN_ON(ifa->ifa_dev);
398		in_dev_hold(in_dev);
399		ifa->ifa_dev = in_dev;
400	}
401	if (ipv4_is_loopback(ifa->ifa_local))
402		ifa->ifa_scope = RT_SCOPE_HOST;
403	return inet_insert_ifa(ifa);
404}
405
406struct in_device *inetdev_by_index(struct net *net, int ifindex)
407{
408	struct net_device *dev;
409	struct in_device *in_dev = NULL;
410
411	rcu_read_lock();
412	dev = dev_get_by_index_rcu(net, ifindex);
413	if (dev)
414		in_dev = in_dev_get(dev);
415	rcu_read_unlock();
416	return in_dev;
417}
418EXPORT_SYMBOL(inetdev_by_index);
419
420/* Called only from RTNL semaphored context. No locks. */
421
422struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
423				    __be32 mask)
424{
425	ASSERT_RTNL();
426
427	for_primary_ifa(in_dev) {
428		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
429			return ifa;
430	} endfor_ifa(in_dev);
431	return NULL;
432}
433
434static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
435{
436	struct net *net = sock_net(skb->sk);
437	struct nlattr *tb[IFA_MAX+1];
438	struct in_device *in_dev;
439	struct ifaddrmsg *ifm;
440	struct in_ifaddr *ifa, **ifap;
441	int err = -EINVAL;
442
443	ASSERT_RTNL();
444
445	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
446	if (err < 0)
447		goto errout;
448
449	ifm = nlmsg_data(nlh);
450	in_dev = inetdev_by_index(net, ifm->ifa_index);
451	if (in_dev == NULL) {
452		err = -ENODEV;
453		goto errout;
454	}
455
456	__in_dev_put(in_dev);
457
458	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459	     ifap = &ifa->ifa_next) {
460		if (tb[IFA_LOCAL] &&
461		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
462			continue;
463
464		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
465			continue;
466
467		if (tb[IFA_ADDRESS] &&
468		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
469		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
470			continue;
471
472		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
473		return 0;
474	}
475
476	err = -EADDRNOTAVAIL;
477errout:
478	return err;
479}
480
481static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
482{
483	struct nlattr *tb[IFA_MAX+1];
484	struct in_ifaddr *ifa;
485	struct ifaddrmsg *ifm;
486	struct net_device *dev;
487	struct in_device *in_dev;
488	int err;
489
490	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
491	if (err < 0)
492		goto errout;
493
494	ifm = nlmsg_data(nlh);
495	err = -EINVAL;
496	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
497		goto errout;
498
499	dev = __dev_get_by_index(net, ifm->ifa_index);
500	err = -ENODEV;
501	if (dev == NULL)
502		goto errout;
503
504	in_dev = __in_dev_get_rtnl(dev);
505	err = -ENOBUFS;
506	if (in_dev == NULL)
507		goto errout;
508
509	ifa = inet_alloc_ifa();
510	if (ifa == NULL)
511		/*
512		 * A potential indev allocation can be left alive, it stays
513		 * assigned to its device and is destroy with it.
514		 */
515		goto errout;
516
517	ipv4_devconf_setall(in_dev);
518	in_dev_hold(in_dev);
519
520	if (tb[IFA_ADDRESS] == NULL)
521		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
522
523	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
524	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
525	ifa->ifa_flags = ifm->ifa_flags;
526	ifa->ifa_scope = ifm->ifa_scope;
527	ifa->ifa_dev = in_dev;
528
529	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
530	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
531
532	if (tb[IFA_BROADCAST])
533		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
534
535	if (tb[IFA_LABEL])
536		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
537	else
538		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
539
540	return ifa;
541
542errout:
543	return ERR_PTR(err);
544}
545
546static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
547{
548	struct net *net = sock_net(skb->sk);
549	struct in_ifaddr *ifa;
550
551	ASSERT_RTNL();
552
553	ifa = rtm_to_ifaddr(net, nlh);
554	if (IS_ERR(ifa))
555		return PTR_ERR(ifa);
556
557	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
558}
559
560/*
561 *	Determine a default network mask, based on the IP address.
562 */
563
564static inline int inet_abc_len(__be32 addr)
565{
566	int rc = -1;	/* Something else, probably a multicast. */
567
568	if (ipv4_is_zeronet(addr))
569		rc = 0;
570	else {
571		__u32 haddr = ntohl(addr);
572
573		if (IN_CLASSA(haddr))
574			rc = 8;
575		else if (IN_CLASSB(haddr))
576			rc = 16;
577		else if (IN_CLASSC(haddr))
578			rc = 24;
579	}
580
581	return rc;
582}
583
584
585int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
586{
587	struct ifreq ifr;
588	struct sockaddr_in sin_orig;
589	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
590	struct in_device *in_dev;
591	struct in_ifaddr **ifap = NULL;
592	struct in_ifaddr *ifa = NULL;
593	struct net_device *dev;
594	char *colon;
595	int ret = -EFAULT;
596	int tryaddrmatch = 0;
597
598	/*
599	 *	Fetch the caller's info block into kernel space
600	 */
601
602	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
603		goto out;
604	ifr.ifr_name[IFNAMSIZ - 1] = 0;
605
606	/* save original address for comparison */
607	memcpy(&sin_orig, sin, sizeof(*sin));
608
609	colon = strchr(ifr.ifr_name, ':');
610	if (colon)
611		*colon = 0;
612
613	dev_load(net, ifr.ifr_name);
614
615	switch (cmd) {
616	case SIOCGIFADDR:	/* Get interface address */
617	case SIOCGIFBRDADDR:	/* Get the broadcast address */
618	case SIOCGIFDSTADDR:	/* Get the destination address */
619	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
620		/* Note that these ioctls will not sleep,
621		   so that we do not impose a lock.
622		   One day we will be forced to put shlock here (I mean SMP)
623		 */
624		tryaddrmatch = (sin_orig.sin_family == AF_INET);
625		memset(sin, 0, sizeof(*sin));
626		sin->sin_family = AF_INET;
627		break;
628
629	case SIOCSIFFLAGS:
630		ret = -EACCES;
631		if (!capable(CAP_NET_ADMIN))
632			goto out;
633		break;
634	case SIOCSIFADDR:	/* Set interface address (and family) */
635	case SIOCSIFBRDADDR:	/* Set the broadcast address */
636	case SIOCSIFDSTADDR:	/* Set the destination address */
637	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
638		ret = -EACCES;
639		if (!capable(CAP_NET_ADMIN))
640			goto out;
641		ret = -EINVAL;
642		if (sin->sin_family != AF_INET)
643			goto out;
644		break;
645	default:
646		ret = -EINVAL;
647		goto out;
648	}
649
650	rtnl_lock();
651
652	ret = -ENODEV;
653	dev = __dev_get_by_name(net, ifr.ifr_name);
654	if (!dev)
655		goto done;
656
657	if (colon)
658		*colon = ':';
659
660	in_dev = __in_dev_get_rtnl(dev);
661	if (in_dev) {
662		if (tryaddrmatch) {
663			/* Matthias Andree */
664			/* compare label and address (4.4BSD style) */
665			/* note: we only do this for a limited set of ioctls
666			   and only if the original address family was AF_INET.
667			   This is checked above. */
668			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
669			     ifap = &ifa->ifa_next) {
670				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
671				    sin_orig.sin_addr.s_addr ==
672							ifa->ifa_address) {
673					break; /* found */
674				}
675			}
676		}
677		/* we didn't get a match, maybe the application is
678		   4.3BSD-style and passed in junk so we fall back to
679		   comparing just the label */
680		if (!ifa) {
681			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682			     ifap = &ifa->ifa_next)
683				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
684					break;
685		}
686	}
687
688	ret = -EADDRNOTAVAIL;
689	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
690		goto done;
691
692	switch (cmd) {
693	case SIOCGIFADDR:	/* Get interface address */
694		sin->sin_addr.s_addr = ifa->ifa_local;
695		goto rarok;
696
697	case SIOCGIFBRDADDR:	/* Get the broadcast address */
698		sin->sin_addr.s_addr = ifa->ifa_broadcast;
699		goto rarok;
700
701	case SIOCGIFDSTADDR:	/* Get the destination address */
702		sin->sin_addr.s_addr = ifa->ifa_address;
703		goto rarok;
704
705	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
706		sin->sin_addr.s_addr = ifa->ifa_mask;
707		goto rarok;
708
709	case SIOCSIFFLAGS:
710		if (colon) {
711			ret = -EADDRNOTAVAIL;
712			if (!ifa)
713				break;
714			ret = 0;
715			if (!(ifr.ifr_flags & IFF_UP))
716				inet_del_ifa(in_dev, ifap, 1);
717			break;
718		}
719		ret = dev_change_flags(dev, ifr.ifr_flags);
720		break;
721
722	case SIOCSIFADDR:	/* Set interface address (and family) */
723		ret = -EINVAL;
724		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
725			break;
726
727		if (!ifa) {
728			ret = -ENOBUFS;
729			ifa = inet_alloc_ifa();
730			if (!ifa)
731				break;
732			if (colon)
733				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734			else
735				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736		} else {
737			ret = 0;
738			if (ifa->ifa_local == sin->sin_addr.s_addr)
739				break;
740			inet_del_ifa(in_dev, ifap, 0);
741			ifa->ifa_broadcast = 0;
742			ifa->ifa_scope = 0;
743		}
744
745		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746
747		if (!(dev->flags & IFF_POINTOPOINT)) {
748			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750			if ((dev->flags & IFF_BROADCAST) &&
751			    ifa->ifa_prefixlen < 31)
752				ifa->ifa_broadcast = ifa->ifa_address |
753						     ~ifa->ifa_mask;
754		} else {
755			ifa->ifa_prefixlen = 32;
756			ifa->ifa_mask = inet_make_mask(32);
757		}
758		ret = inet_set_ifa(dev, ifa);
759		break;
760
761	case SIOCSIFBRDADDR:	/* Set the broadcast address */
762		ret = 0;
763		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764			inet_del_ifa(in_dev, ifap, 0);
765			ifa->ifa_broadcast = sin->sin_addr.s_addr;
766			inet_insert_ifa(ifa);
767		}
768		break;
769
770	case SIOCSIFDSTADDR:	/* Set the destination address */
771		ret = 0;
772		if (ifa->ifa_address == sin->sin_addr.s_addr)
773			break;
774		ret = -EINVAL;
775		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776			break;
777		ret = 0;
778		inet_del_ifa(in_dev, ifap, 0);
779		ifa->ifa_address = sin->sin_addr.s_addr;
780		inet_insert_ifa(ifa);
781		break;
782
783	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
784
785		/*
786		 *	The mask we set must be legal.
787		 */
788		ret = -EINVAL;
789		if (bad_mask(sin->sin_addr.s_addr, 0))
790			break;
791		ret = 0;
792		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793			__be32 old_mask = ifa->ifa_mask;
794			inet_del_ifa(in_dev, ifap, 0);
795			ifa->ifa_mask = sin->sin_addr.s_addr;
796			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797
798			/* See if current broadcast address matches
799			 * with current netmask, then recalculate
800			 * the broadcast address. Otherwise it's a
801			 * funny address, so don't touch it since
802			 * the user seems to know what (s)he's doing...
803			 */
804			if ((dev->flags & IFF_BROADCAST) &&
805			    (ifa->ifa_prefixlen < 31) &&
806			    (ifa->ifa_broadcast ==
807			     (ifa->ifa_local|~old_mask))) {
808				ifa->ifa_broadcast = (ifa->ifa_local |
809						      ~sin->sin_addr.s_addr);
810			}
811			inet_insert_ifa(ifa);
812		}
813		break;
814	}
815done:
816	rtnl_unlock();
817out:
818	return ret;
819rarok:
820	rtnl_unlock();
821	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822	goto out;
823}
824
825static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826{
827	struct in_device *in_dev = __in_dev_get_rtnl(dev);
828	struct in_ifaddr *ifa;
829	struct ifreq ifr;
830	int done = 0;
831
832	if (!in_dev)
833		goto out;
834
835	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
836		if (!buf) {
837			done += sizeof(ifr);
838			continue;
839		}
840		if (len < (int) sizeof(ifr))
841			break;
842		memset(&ifr, 0, sizeof(struct ifreq));
843		if (ifa->ifa_label)
844			strcpy(ifr.ifr_name, ifa->ifa_label);
845		else
846			strcpy(ifr.ifr_name, dev->name);
847
848		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850								ifa->ifa_local;
851
852		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853			done = -EFAULT;
854			break;
855		}
856		buf  += sizeof(struct ifreq);
857		len  -= sizeof(struct ifreq);
858		done += sizeof(struct ifreq);
859	}
860out:
861	return done;
862}
863
864__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865{
866	__be32 addr = 0;
867	struct in_device *in_dev;
868	struct net *net = dev_net(dev);
869
870	rcu_read_lock();
871	in_dev = __in_dev_get_rcu(dev);
872	if (!in_dev)
873		goto no_in_dev;
874
875	for_primary_ifa(in_dev) {
876		if (ifa->ifa_scope > scope)
877			continue;
878		if (!dst || inet_ifa_match(dst, ifa)) {
879			addr = ifa->ifa_local;
880			break;
881		}
882		if (!addr)
883			addr = ifa->ifa_local;
884	} endfor_ifa(in_dev);
885
886	if (addr)
887		goto out_unlock;
888no_in_dev:
889
890	/* Not loopback addresses on loopback should be preferred
891	   in this case. It is importnat that lo is the first interface
892	   in dev_base list.
893	 */
894	for_each_netdev_rcu(net, dev) {
895		in_dev = __in_dev_get_rcu(dev);
896		if (!in_dev)
897			continue;
898
899		for_primary_ifa(in_dev) {
900			if (ifa->ifa_scope != RT_SCOPE_LINK &&
901			    ifa->ifa_scope <= scope) {
902				addr = ifa->ifa_local;
903				goto out_unlock;
904			}
905		} endfor_ifa(in_dev);
906	}
907out_unlock:
908	rcu_read_unlock();
909	return addr;
910}
911EXPORT_SYMBOL(inet_select_addr);
912
913static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
914			      __be32 local, int scope)
915{
916	int same = 0;
917	__be32 addr = 0;
918
919	for_ifa(in_dev) {
920		if (!addr &&
921		    (local == ifa->ifa_local || !local) &&
922		    ifa->ifa_scope <= scope) {
923			addr = ifa->ifa_local;
924			if (same)
925				break;
926		}
927		if (!same) {
928			same = (!local || inet_ifa_match(local, ifa)) &&
929				(!dst || inet_ifa_match(dst, ifa));
930			if (same && addr) {
931				if (local || !dst)
932					break;
933				/* Is the selected addr into dst subnet? */
934				if (inet_ifa_match(addr, ifa))
935					break;
936				/* No, then can we use new local src? */
937				if (ifa->ifa_scope <= scope) {
938					addr = ifa->ifa_local;
939					break;
940				}
941				/* search for large dst subnet for addr */
942				same = 0;
943			}
944		}
945	} endfor_ifa(in_dev);
946
947	return same ? addr : 0;
948}
949
950/*
951 * Confirm that local IP address exists using wildcards:
952 * - in_dev: only on this interface, 0=any interface
953 * - dst: only in the same subnet as dst, 0=any dst
954 * - local: address, 0=autoselect the local address
955 * - scope: maximum allowed scope value for the local address
956 */
957__be32 inet_confirm_addr(struct in_device *in_dev,
958			 __be32 dst, __be32 local, int scope)
959{
960	__be32 addr = 0;
961	struct net_device *dev;
962	struct net *net;
963
964	if (scope != RT_SCOPE_LINK)
965		return confirm_addr_indev(in_dev, dst, local, scope);
966
967	net = dev_net(in_dev->dev);
968	rcu_read_lock();
969	for_each_netdev_rcu(net, dev) {
970		in_dev = __in_dev_get_rcu(dev);
971		if (in_dev) {
972			addr = confirm_addr_indev(in_dev, dst, local, scope);
973			if (addr)
974				break;
975		}
976	}
977	rcu_read_unlock();
978
979	return addr;
980}
981
982/*
983 *	Device notifier
984 */
985
986int register_inetaddr_notifier(struct notifier_block *nb)
987{
988	return blocking_notifier_chain_register(&inetaddr_chain, nb);
989}
990EXPORT_SYMBOL(register_inetaddr_notifier);
991
992int unregister_inetaddr_notifier(struct notifier_block *nb)
993{
994	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
995}
996EXPORT_SYMBOL(unregister_inetaddr_notifier);
997
998/* Rename ifa_labels for a device name change. Make some effort to preserve
999 * existing alias numbering and to create unique labels if possible.
1000*/
1001static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1002{
1003	struct in_ifaddr *ifa;
1004	int named = 0;
1005
1006	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1007		char old[IFNAMSIZ], *dot;
1008
1009		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1010		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1011		if (named++ == 0)
1012			goto skip;
1013		dot = strchr(old, ':');
1014		if (dot == NULL) {
1015			sprintf(old, ":%d", named);
1016			dot = old;
1017		}
1018		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1019			strcat(ifa->ifa_label, dot);
1020		else
1021			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1022skip:
1023		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1024	}
1025}
1026
1027static inline bool inetdev_valid_mtu(unsigned mtu)
1028{
1029	return mtu >= 68;
1030}
1031
1032/* Called only under RTNL semaphore */
1033
1034static int inetdev_event(struct notifier_block *this, unsigned long event,
1035			 void *ptr)
1036{
1037	struct net_device *dev = ptr;
1038	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1039
1040	ASSERT_RTNL();
1041
1042	if (!in_dev) {
1043		if (event == NETDEV_REGISTER) {
1044			in_dev = inetdev_init(dev);
1045			if (!in_dev)
1046				return notifier_from_errno(-ENOMEM);
1047			if (dev->flags & IFF_LOOPBACK) {
1048				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050			}
1051		} else if (event == NETDEV_CHANGEMTU) {
1052			/* Re-enabling IP */
1053			if (inetdev_valid_mtu(dev->mtu))
1054				in_dev = inetdev_init(dev);
1055		}
1056		goto out;
1057	}
1058
1059	switch (event) {
1060	case NETDEV_REGISTER:
1061		printk(KERN_DEBUG "inetdev_event: bug\n");
1062		dev->ip_ptr = NULL;
1063		break;
1064	case NETDEV_UP:
1065		if (!inetdev_valid_mtu(dev->mtu))
1066			break;
1067		if (dev->flags & IFF_LOOPBACK) {
1068			struct in_ifaddr *ifa = inet_alloc_ifa();
1069
1070			if (ifa) {
1071				ifa->ifa_local =
1072				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1073				ifa->ifa_prefixlen = 8;
1074				ifa->ifa_mask = inet_make_mask(8);
1075				in_dev_hold(in_dev);
1076				ifa->ifa_dev = in_dev;
1077				ifa->ifa_scope = RT_SCOPE_HOST;
1078				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1079				inet_insert_ifa(ifa);
1080			}
1081		}
1082		ip_mc_up(in_dev);
1083		/* fall through */
1084	case NETDEV_NOTIFY_PEERS:
1085	case NETDEV_CHANGEADDR:
1086		/* Send gratuitous ARP to notify of link change */
1087		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1088			struct in_ifaddr *ifa = in_dev->ifa_list;
1089
1090			if (ifa)
1091				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1092					 ifa->ifa_address, dev,
1093					 ifa->ifa_address, NULL,
1094					 dev->dev_addr, NULL);
1095		}
1096		break;
1097	case NETDEV_DOWN:
1098		ip_mc_down(in_dev);
1099		break;
1100	case NETDEV_PRE_TYPE_CHANGE:
1101		ip_mc_unmap(in_dev);
1102		break;
1103	case NETDEV_POST_TYPE_CHANGE:
1104		ip_mc_remap(in_dev);
1105		break;
1106	case NETDEV_CHANGEMTU:
1107		if (inetdev_valid_mtu(dev->mtu))
1108			break;
1109		/* disable IP when MTU is not enough */
1110	case NETDEV_UNREGISTER:
1111		inetdev_destroy(in_dev);
1112		break;
1113	case NETDEV_CHANGENAME:
1114		/* Do not notify about label change, this event is
1115		 * not interesting to applications using netlink.
1116		 */
1117		inetdev_changename(dev, in_dev);
1118
1119		devinet_sysctl_unregister(in_dev);
1120		devinet_sysctl_register(in_dev);
1121		break;
1122	}
1123out:
1124	return NOTIFY_DONE;
1125}
1126
1127static struct notifier_block ip_netdev_notifier = {
1128	.notifier_call = inetdev_event,
1129};
1130
1131static inline size_t inet_nlmsg_size(void)
1132{
1133	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1134	       + nla_total_size(4) /* IFA_ADDRESS */
1135	       + nla_total_size(4) /* IFA_LOCAL */
1136	       + nla_total_size(4) /* IFA_BROADCAST */
1137	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1138}
1139
1140static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1141			    u32 pid, u32 seq, int event, unsigned int flags)
1142{
1143	struct ifaddrmsg *ifm;
1144	struct nlmsghdr  *nlh;
1145
1146	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1147	if (nlh == NULL)
1148		return -EMSGSIZE;
1149
1150	ifm = nlmsg_data(nlh);
1151	ifm->ifa_family = AF_INET;
1152	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1153	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1154	ifm->ifa_scope = ifa->ifa_scope;
1155	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1156
1157	if (ifa->ifa_address)
1158		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1159
1160	if (ifa->ifa_local)
1161		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1162
1163	if (ifa->ifa_broadcast)
1164		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1165
1166	if (ifa->ifa_label[0])
1167		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1168
1169	return nlmsg_end(skb, nlh);
1170
1171nla_put_failure:
1172	nlmsg_cancel(skb, nlh);
1173	return -EMSGSIZE;
1174}
1175
1176static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1177{
1178	struct net *net = sock_net(skb->sk);
1179	int h, s_h;
1180	int idx, s_idx;
1181	int ip_idx, s_ip_idx;
1182	struct net_device *dev;
1183	struct in_device *in_dev;
1184	struct in_ifaddr *ifa;
1185	struct hlist_head *head;
1186	struct hlist_node *node;
1187
1188	s_h = cb->args[0];
1189	s_idx = idx = cb->args[1];
1190	s_ip_idx = ip_idx = cb->args[2];
1191
1192	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1193		idx = 0;
1194		head = &net->dev_index_head[h];
1195		rcu_read_lock();
1196		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1197			if (idx < s_idx)
1198				goto cont;
1199			if (h > s_h || idx > s_idx)
1200				s_ip_idx = 0;
1201			in_dev = __in_dev_get_rcu(dev);
1202			if (!in_dev)
1203				goto cont;
1204
1205			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1206			     ifa = ifa->ifa_next, ip_idx++) {
1207				if (ip_idx < s_ip_idx)
1208					continue;
1209				if (inet_fill_ifaddr(skb, ifa,
1210					     NETLINK_CB(cb->skb).pid,
1211					     cb->nlh->nlmsg_seq,
1212					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1213					rcu_read_unlock();
1214					goto done;
1215				}
1216			}
1217cont:
1218			idx++;
1219		}
1220		rcu_read_unlock();
1221	}
1222
1223done:
1224	cb->args[0] = h;
1225	cb->args[1] = idx;
1226	cb->args[2] = ip_idx;
1227
1228	return skb->len;
1229}
1230
1231static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1232		      u32 pid)
1233{
1234	struct sk_buff *skb;
1235	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1236	int err = -ENOBUFS;
1237	struct net *net;
1238
1239	net = dev_net(ifa->ifa_dev->dev);
1240	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1241	if (skb == NULL)
1242		goto errout;
1243
1244	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1245	if (err < 0) {
1246		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1247		WARN_ON(err == -EMSGSIZE);
1248		kfree_skb(skb);
1249		goto errout;
1250	}
1251	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1252	return;
1253errout:
1254	if (err < 0)
1255		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1256}
1257
1258#ifdef CONFIG_SYSCTL
1259
1260static void devinet_copy_dflt_conf(struct net *net, int i)
1261{
1262	struct net_device *dev;
1263
1264	rcu_read_lock();
1265	for_each_netdev_rcu(net, dev) {
1266		struct in_device *in_dev;
1267
1268		in_dev = __in_dev_get_rcu(dev);
1269		if (in_dev && !test_bit(i, in_dev->cnf.state))
1270			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1271	}
1272	rcu_read_unlock();
1273}
1274
1275/* called with RTNL locked */
1276static void inet_forward_change(struct net *net)
1277{
1278	struct net_device *dev;
1279	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1280
1281	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1282	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1283
1284	for_each_netdev(net, dev) {
1285		struct in_device *in_dev;
1286		if (on)
1287			dev_disable_lro(dev);
1288		rcu_read_lock();
1289		in_dev = __in_dev_get_rcu(dev);
1290		if (in_dev)
1291			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1292		rcu_read_unlock();
1293	}
1294}
1295
1296static int devinet_conf_proc(ctl_table *ctl, int write,
1297			     void __user *buffer,
1298			     size_t *lenp, loff_t *ppos)
1299{
1300	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1301
1302	if (write) {
1303		struct ipv4_devconf *cnf = ctl->extra1;
1304		struct net *net = ctl->extra2;
1305		int i = (int *)ctl->data - cnf->data;
1306
1307		set_bit(i, cnf->state);
1308
1309		if (cnf == net->ipv4.devconf_dflt)
1310			devinet_copy_dflt_conf(net, i);
1311	}
1312
1313	return ret;
1314}
1315
1316static int devinet_sysctl_forward(ctl_table *ctl, int write,
1317				  void __user *buffer,
1318				  size_t *lenp, loff_t *ppos)
1319{
1320	int *valp = ctl->data;
1321	int val = *valp;
1322	loff_t pos = *ppos;
1323	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1324
1325	if (write && *valp != val) {
1326		struct net *net = ctl->extra2;
1327
1328		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1329			if (!rtnl_trylock()) {
1330				/* Restore the original values before restarting */
1331				*valp = val;
1332				*ppos = pos;
1333				return restart_syscall();
1334			}
1335			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1336				inet_forward_change(net);
1337			} else if (*valp) {
1338				struct ipv4_devconf *cnf = ctl->extra1;
1339				struct in_device *idev =
1340					container_of(cnf, struct in_device, cnf);
1341				dev_disable_lro(idev->dev);
1342			}
1343			rtnl_unlock();
1344			rt_cache_flush(net, 0);
1345		}
1346	}
1347
1348	return ret;
1349}
1350
1351int ipv4_doint_and_flush(ctl_table *ctl, int write,
1352			 void __user *buffer,
1353			 size_t *lenp, loff_t *ppos)
1354{
1355	int *valp = ctl->data;
1356	int val = *valp;
1357	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1358	struct net *net = ctl->extra2;
1359
1360	if (write && *valp != val)
1361		rt_cache_flush(net, 0);
1362
1363	return ret;
1364}
1365
1366#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1367	{ \
1368		.procname	= name, \
1369		.data		= ipv4_devconf.data + \
1370				  IPV4_DEVCONF_ ## attr - 1, \
1371		.maxlen		= sizeof(int), \
1372		.mode		= mval, \
1373		.proc_handler	= proc, \
1374		.extra1		= &ipv4_devconf, \
1375	}
1376
1377#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1378	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1379
1380#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1381	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1382
1383#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1384	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1385
1386#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1387	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1388
1389static struct devinet_sysctl_table {
1390	struct ctl_table_header *sysctl_header;
1391	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1392	char *dev_name;
1393} devinet_sysctl = {
1394	.devinet_vars = {
1395		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1396					     devinet_sysctl_forward),
1397		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1398
1399		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1400		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1401		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1402		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1403		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1404		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1405					"accept_source_route"),
1406		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1407		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1408		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1409		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1410		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1411		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1412		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1413		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1414		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1415		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1416		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1417		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1418		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1419
1420		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1421		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1422		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1423					      "force_igmp_version"),
1424		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1425					      "promote_secondaries"),
1426	},
1427};
1428
1429static int __devinet_sysctl_register(struct net *net, char *dev_name,
1430					struct ipv4_devconf *p)
1431{
1432	int i;
1433	struct devinet_sysctl_table *t;
1434
1435#define DEVINET_CTL_PATH_DEV	3
1436
1437	struct ctl_path devinet_ctl_path[] = {
1438		{ .procname = "net",  },
1439		{ .procname = "ipv4", },
1440		{ .procname = "conf", },
1441		{ /* to be set */ },
1442		{ },
1443	};
1444
1445	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1446	if (!t)
1447		goto out;
1448
1449	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1450		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1451		t->devinet_vars[i].extra1 = p;
1452		t->devinet_vars[i].extra2 = net;
1453	}
1454
1455	/*
1456	 * Make a copy of dev_name, because '.procname' is regarded as const
1457	 * by sysctl and we wouldn't want anyone to change it under our feet
1458	 * (see SIOCSIFNAME).
1459	 */
1460	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1461	if (!t->dev_name)
1462		goto free;
1463
1464	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1465
1466	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1467			t->devinet_vars);
1468	if (!t->sysctl_header)
1469		goto free_procname;
1470
1471	p->sysctl = t;
1472	return 0;
1473
1474free_procname:
1475	kfree(t->dev_name);
1476free:
1477	kfree(t);
1478out:
1479	return -ENOBUFS;
1480}
1481
1482static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1483{
1484	struct devinet_sysctl_table *t = cnf->sysctl;
1485
1486	if (t == NULL)
1487		return;
1488
1489	cnf->sysctl = NULL;
1490	unregister_sysctl_table(t->sysctl_header);
1491	kfree(t->dev_name);
1492	kfree(t);
1493}
1494
1495static void devinet_sysctl_register(struct in_device *idev)
1496{
1497	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1498	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1499					&idev->cnf);
1500}
1501
1502static void devinet_sysctl_unregister(struct in_device *idev)
1503{
1504	__devinet_sysctl_unregister(&idev->cnf);
1505	neigh_sysctl_unregister(idev->arp_parms);
1506}
1507
1508static struct ctl_table ctl_forward_entry[] = {
1509	{
1510		.procname	= "ip_forward",
1511		.data		= &ipv4_devconf.data[
1512					IPV4_DEVCONF_FORWARDING - 1],
1513		.maxlen		= sizeof(int),
1514		.mode		= 0644,
1515		.proc_handler	= devinet_sysctl_forward,
1516		.extra1		= &ipv4_devconf,
1517		.extra2		= &init_net,
1518	},
1519	{ },
1520};
1521
1522static __net_initdata struct ctl_path net_ipv4_path[] = {
1523	{ .procname = "net", },
1524	{ .procname = "ipv4", },
1525	{ },
1526};
1527#endif
1528
1529static __net_init int devinet_init_net(struct net *net)
1530{
1531	int err;
1532	struct ipv4_devconf *all, *dflt;
1533#ifdef CONFIG_SYSCTL
1534	struct ctl_table *tbl = ctl_forward_entry;
1535	struct ctl_table_header *forw_hdr;
1536#endif
1537
1538	err = -ENOMEM;
1539	all = &ipv4_devconf;
1540	dflt = &ipv4_devconf_dflt;
1541
1542	if (!net_eq(net, &init_net)) {
1543		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1544		if (all == NULL)
1545			goto err_alloc_all;
1546
1547		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1548		if (dflt == NULL)
1549			goto err_alloc_dflt;
1550
1551#ifdef CONFIG_SYSCTL
1552		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1553		if (tbl == NULL)
1554			goto err_alloc_ctl;
1555
1556		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1557		tbl[0].extra1 = all;
1558		tbl[0].extra2 = net;
1559#endif
1560	}
1561
1562#ifdef CONFIG_SYSCTL
1563	err = __devinet_sysctl_register(net, "all", all);
1564	if (err < 0)
1565		goto err_reg_all;
1566
1567	err = __devinet_sysctl_register(net, "default", dflt);
1568	if (err < 0)
1569		goto err_reg_dflt;
1570
1571	err = -ENOMEM;
1572	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1573	if (forw_hdr == NULL)
1574		goto err_reg_ctl;
1575	net->ipv4.forw_hdr = forw_hdr;
1576#endif
1577
1578	net->ipv4.devconf_all = all;
1579	net->ipv4.devconf_dflt = dflt;
1580	return 0;
1581
1582#ifdef CONFIG_SYSCTL
1583err_reg_ctl:
1584	__devinet_sysctl_unregister(dflt);
1585err_reg_dflt:
1586	__devinet_sysctl_unregister(all);
1587err_reg_all:
1588	if (tbl != ctl_forward_entry)
1589		kfree(tbl);
1590err_alloc_ctl:
1591#endif
1592	if (dflt != &ipv4_devconf_dflt)
1593		kfree(dflt);
1594err_alloc_dflt:
1595	if (all != &ipv4_devconf)
1596		kfree(all);
1597err_alloc_all:
1598	return err;
1599}
1600
1601static __net_exit void devinet_exit_net(struct net *net)
1602{
1603#ifdef CONFIG_SYSCTL
1604	struct ctl_table *tbl;
1605
1606	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1607	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1608	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1609	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1610	kfree(tbl);
1611#endif
1612	kfree(net->ipv4.devconf_dflt);
1613	kfree(net->ipv4.devconf_all);
1614}
1615
1616static __net_initdata struct pernet_operations devinet_ops = {
1617	.init = devinet_init_net,
1618	.exit = devinet_exit_net,
1619};
1620
1621void __init devinet_init(void)
1622{
1623	register_pernet_subsys(&devinet_ops);
1624
1625	register_gifconf(PF_INET, inet_gifconf);
1626	register_netdevice_notifier(&ip_netdev_notifier);
1627
1628	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1629	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1630	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1631}
1632