1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	NET3	IP device support routines.
4 *
5 *	Derived from the IP parts of dev.c 1.0.19
6 * 		Authors:	Ross Biro
7 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9 *
10 *	Additional Authors:
11 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 *	Changes:
15 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16 *					lists.
17 *		Cyrus Durgin:		updated for kmod
18 *		Matthias Andree:	in devinet_ioctl, compare label and
19 *					address (4.4BSD alias style support),
20 *					fall back to comparing just the label
21 *					if no match found.
22 */
23
24
25#include <linux/uaccess.h>
26#include <linux/bitops.h>
27#include <linux/capability.h>
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/kernel.h>
31#include <linux/sched/signal.h>
32#include <linux/string.h>
33#include <linux/mm.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/in.h>
37#include <linux/errno.h>
38#include <linux/interrupt.h>
39#include <linux/if_addr.h>
40#include <linux/if_ether.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/etherdevice.h>
44#include <linux/skbuff.h>
45#include <linux/init.h>
46#include <linux/notifier.h>
47#include <linux/inetdevice.h>
48#include <linux/igmp.h>
49#include <linux/slab.h>
50#include <linux/hash.h>
51#ifdef CONFIG_SYSCTL
52#include <linux/sysctl.h>
53#endif
54#include <linux/kmod.h>
55#include <linux/netconf.h>
56
57#include <net/arp.h>
58#include <net/ip.h>
59#include <net/route.h>
60#include <net/ip_fib.h>
61#include <net/rtnetlink.h>
62#include <net/net_namespace.h>
63#include <net/addrconf.h>
64
65#define IPV6ONLY_FLAGS	\
66		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79	},
80};
81
82static struct ipv4_devconf ipv4_devconf_dflt = {
83	.data = {
84		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92	},
93};
94
95#define IPV4_DEVCONF_DFLT(net, attr) \
96	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99	[IFA_LOCAL]     	= { .type = NLA_U32 },
100	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104	[IFA_FLAGS]		= { .type = NLA_U32 },
105	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107	[IFA_PROTO]		= { .type = NLA_U8 },
108};
109
110struct inet_fill_args {
111	u32 portid;
112	u32 seq;
113	int event;
114	unsigned int flags;
115	int netnsid;
116	int ifindex;
117};
118
119#define IN4_ADDR_HSIZE_SHIFT	8
120#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121
122static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124static u32 inet_addr_hash(const struct net *net, __be32 addr)
125{
126	u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129}
130
131static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132{
133	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135	ASSERT_RTNL();
136	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137}
138
139static void inet_hash_remove(struct in_ifaddr *ifa)
140{
141	ASSERT_RTNL();
142	hlist_del_init_rcu(&ifa->hash);
143}
144
145/**
146 * __ip_dev_find - find the first device with a given source address.
147 * @net: the net namespace
148 * @addr: the source address
149 * @devref: if true, take a reference on the found device
150 *
151 * If a caller uses devref=false, it should be protected by RCU, or RTNL
152 */
153struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154{
155	struct net_device *result = NULL;
156	struct in_ifaddr *ifa;
157
158	rcu_read_lock();
159	ifa = inet_lookup_ifaddr_rcu(net, addr);
160	if (!ifa) {
161		struct flowi4 fl4 = { .daddr = addr };
162		struct fib_result res = { 0 };
163		struct fib_table *local;
164
165		/* Fallback to FIB local table so that communication
166		 * over loopback subnets work.
167		 */
168		local = fib_get_table(net, RT_TABLE_LOCAL);
169		if (local &&
170		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171		    res.type == RTN_LOCAL)
172			result = FIB_RES_DEV(res);
173	} else {
174		result = ifa->ifa_dev->dev;
175	}
176	if (result && devref)
177		dev_hold(result);
178	rcu_read_unlock();
179	return result;
180}
181EXPORT_SYMBOL(__ip_dev_find);
182
183/* called under RCU lock */
184struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185{
186	u32 hash = inet_addr_hash(net, addr);
187	struct in_ifaddr *ifa;
188
189	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190		if (ifa->ifa_local == addr &&
191		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192			return ifa;
193
194	return NULL;
195}
196
197static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201static void inet_del_ifa(struct in_device *in_dev,
202			 struct in_ifaddr __rcu **ifap,
203			 int destroy);
204#ifdef CONFIG_SYSCTL
205static int devinet_sysctl_register(struct in_device *idev);
206static void devinet_sysctl_unregister(struct in_device *idev);
207#else
208static int devinet_sysctl_register(struct in_device *idev)
209{
210	return 0;
211}
212static void devinet_sysctl_unregister(struct in_device *idev)
213{
214}
215#endif
216
217/* Locks all the inet devices. */
218
219static struct in_ifaddr *inet_alloc_ifa(void)
220{
221	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222}
223
224static void inet_rcu_free_ifa(struct rcu_head *head)
225{
226	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227	if (ifa->ifa_dev)
228		in_dev_put(ifa->ifa_dev);
229	kfree(ifa);
230}
231
232static void inet_free_ifa(struct in_ifaddr *ifa)
233{
234	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235}
236
237static void in_dev_free_rcu(struct rcu_head *head)
238{
239	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240
241	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242	kfree(idev);
243}
244
245void in_dev_finish_destroy(struct in_device *idev)
246{
247	struct net_device *dev = idev->dev;
248
249	WARN_ON(idev->ifa_list);
250	WARN_ON(idev->mc_list);
251#ifdef NET_REFCNT_DEBUG
252	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253#endif
254	netdev_put(dev, &idev->dev_tracker);
255	if (!idev->dead)
256		pr_err("Freeing alive in_device %p\n", idev);
257	else
258		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259}
260EXPORT_SYMBOL(in_dev_finish_destroy);
261
262static struct in_device *inetdev_init(struct net_device *dev)
263{
264	struct in_device *in_dev;
265	int err = -ENOMEM;
266
267	ASSERT_RTNL();
268
269	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270	if (!in_dev)
271		goto out;
272	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273			sizeof(in_dev->cnf));
274	in_dev->cnf.sysctl = NULL;
275	in_dev->dev = dev;
276	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277	if (!in_dev->arp_parms)
278		goto out_kfree;
279	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280		dev_disable_lro(dev);
281	/* Reference in_dev->dev */
282	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283	/* Account for reference dev->ip_ptr (below) */
284	refcount_set(&in_dev->refcnt, 1);
285
286	err = devinet_sysctl_register(in_dev);
287	if (err) {
288		in_dev->dead = 1;
289		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290		in_dev_put(in_dev);
291		in_dev = NULL;
292		goto out;
293	}
294	ip_mc_init_dev(in_dev);
295	if (dev->flags & IFF_UP)
296		ip_mc_up(in_dev);
297
298	/* we can receive as soon as ip_ptr is set -- do this last */
299	rcu_assign_pointer(dev->ip_ptr, in_dev);
300out:
301	return in_dev ?: ERR_PTR(err);
302out_kfree:
303	kfree(in_dev);
304	in_dev = NULL;
305	goto out;
306}
307
308static void inetdev_destroy(struct in_device *in_dev)
309{
310	struct net_device *dev;
311	struct in_ifaddr *ifa;
312
313	ASSERT_RTNL();
314
315	dev = in_dev->dev;
316
317	in_dev->dead = 1;
318
319	ip_mc_destroy_dev(in_dev);
320
321	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323		inet_free_ifa(ifa);
324	}
325
326	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327
328	devinet_sysctl_unregister(in_dev);
329	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330	arp_ifdown(dev);
331
332	in_dev_put(in_dev);
333}
334
335int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336{
337	const struct in_ifaddr *ifa;
338
339	rcu_read_lock();
340	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341		if (inet_ifa_match(a, ifa)) {
342			if (!b || inet_ifa_match(b, ifa)) {
343				rcu_read_unlock();
344				return 1;
345			}
346		}
347	}
348	rcu_read_unlock();
349	return 0;
350}
351
352static void __inet_del_ifa(struct in_device *in_dev,
353			   struct in_ifaddr __rcu **ifap,
354			   int destroy, struct nlmsghdr *nlh, u32 portid)
355{
356	struct in_ifaddr *promote = NULL;
357	struct in_ifaddr *ifa, *ifa1;
358	struct in_ifaddr __rcu **last_prim;
359	struct in_ifaddr *prev_prom = NULL;
360	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361
362	ASSERT_RTNL();
363
364	ifa1 = rtnl_dereference(*ifap);
365	last_prim = ifap;
366	if (in_dev->dead)
367		goto no_promotions;
368
369	/* 1. Deleting primary ifaddr forces deletion all secondaries
370	 * unless alias promotion is set
371	 **/
372
373	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375
376		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378			    ifa1->ifa_scope <= ifa->ifa_scope)
379				last_prim = &ifa->ifa_next;
380
381			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382			    ifa1->ifa_mask != ifa->ifa_mask ||
383			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384				ifap1 = &ifa->ifa_next;
385				prev_prom = ifa;
386				continue;
387			}
388
389			if (!do_promote) {
390				inet_hash_remove(ifa);
391				*ifap1 = ifa->ifa_next;
392
393				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394				blocking_notifier_call_chain(&inetaddr_chain,
395						NETDEV_DOWN, ifa);
396				inet_free_ifa(ifa);
397			} else {
398				promote = ifa;
399				break;
400			}
401		}
402	}
403
404	/* On promotion all secondaries from subnet are changing
405	 * the primary IP, we must remove all their routes silently
406	 * and later to add them back with new prefsrc. Do this
407	 * while all addresses are on the device list.
408	 */
409	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410		if (ifa1->ifa_mask == ifa->ifa_mask &&
411		    inet_ifa_match(ifa1->ifa_address, ifa))
412			fib_del_ifaddr(ifa, ifa1);
413	}
414
415no_promotions:
416	/* 2. Unlink it */
417
418	*ifap = ifa1->ifa_next;
419	inet_hash_remove(ifa1);
420
421	/* 3. Announce address deletion */
422
423	/* Send message first, then call notifier.
424	   At first sight, FIB update triggered by notifier
425	   will refer to already deleted ifaddr, that could confuse
426	   netlink listeners. It is not true: look, gated sees
427	   that route deleted and if it still thinks that ifaddr
428	   is valid, it will try to restore deleted routes... Grr.
429	   So that, this order is correct.
430	 */
431	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433
434	if (promote) {
435		struct in_ifaddr *next_sec;
436
437		next_sec = rtnl_dereference(promote->ifa_next);
438		if (prev_prom) {
439			struct in_ifaddr *last_sec;
440
441			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442
443			last_sec = rtnl_dereference(*last_prim);
444			rcu_assign_pointer(promote->ifa_next, last_sec);
445			rcu_assign_pointer(*last_prim, promote);
446		}
447
448		promote->ifa_flags &= ~IFA_F_SECONDARY;
449		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450		blocking_notifier_call_chain(&inetaddr_chain,
451				NETDEV_UP, promote);
452		for (ifa = next_sec; ifa;
453		     ifa = rtnl_dereference(ifa->ifa_next)) {
454			if (ifa1->ifa_mask != ifa->ifa_mask ||
455			    !inet_ifa_match(ifa1->ifa_address, ifa))
456					continue;
457			fib_add_ifaddr(ifa);
458		}
459
460	}
461	if (destroy)
462		inet_free_ifa(ifa1);
463}
464
465static void inet_del_ifa(struct in_device *in_dev,
466			 struct in_ifaddr __rcu **ifap,
467			 int destroy)
468{
469	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470}
471
472static void check_lifetime(struct work_struct *work);
473
474static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475
476static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477			     u32 portid, struct netlink_ext_ack *extack)
478{
479	struct in_ifaddr __rcu **last_primary, **ifap;
480	struct in_device *in_dev = ifa->ifa_dev;
481	struct in_validator_info ivi;
482	struct in_ifaddr *ifa1;
483	int ret;
484
485	ASSERT_RTNL();
486
487	if (!ifa->ifa_local) {
488		inet_free_ifa(ifa);
489		return 0;
490	}
491
492	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493	last_primary = &in_dev->ifa_list;
494
495	/* Don't set IPv6 only flags to IPv4 addresses */
496	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497
498	ifap = &in_dev->ifa_list;
499	ifa1 = rtnl_dereference(*ifap);
500
501	while (ifa1) {
502		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503		    ifa->ifa_scope <= ifa1->ifa_scope)
504			last_primary = &ifa1->ifa_next;
505		if (ifa1->ifa_mask == ifa->ifa_mask &&
506		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507			if (ifa1->ifa_local == ifa->ifa_local) {
508				inet_free_ifa(ifa);
509				return -EEXIST;
510			}
511			if (ifa1->ifa_scope != ifa->ifa_scope) {
512				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513				inet_free_ifa(ifa);
514				return -EINVAL;
515			}
516			ifa->ifa_flags |= IFA_F_SECONDARY;
517		}
518
519		ifap = &ifa1->ifa_next;
520		ifa1 = rtnl_dereference(*ifap);
521	}
522
523	/* Allow any devices that wish to register ifaddr validtors to weigh
524	 * in now, before changes are committed.  The rntl lock is serializing
525	 * access here, so the state should not change between a validator call
526	 * and a final notify on commit.  This isn't invoked on promotion under
527	 * the assumption that validators are checking the address itself, and
528	 * not the flags.
529	 */
530	ivi.ivi_addr = ifa->ifa_address;
531	ivi.ivi_dev = ifa->ifa_dev;
532	ivi.extack = extack;
533	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534					   NETDEV_UP, &ivi);
535	ret = notifier_to_errno(ret);
536	if (ret) {
537		inet_free_ifa(ifa);
538		return ret;
539	}
540
541	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542		ifap = last_primary;
543
544	rcu_assign_pointer(ifa->ifa_next, *ifap);
545	rcu_assign_pointer(*ifap, ifa);
546
547	inet_hash_insert(dev_net(in_dev->dev), ifa);
548
549	cancel_delayed_work(&check_lifetime_work);
550	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551
552	/* Send message first, then call notifier.
553	   Notifier will trigger FIB update, so that
554	   listeners of netlink will know about new ifaddr */
555	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557
558	return 0;
559}
560
561static int inet_insert_ifa(struct in_ifaddr *ifa)
562{
563	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564}
565
566static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567{
568	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569
570	ASSERT_RTNL();
571
572	if (!in_dev) {
573		inet_free_ifa(ifa);
574		return -ENOBUFS;
575	}
576	ipv4_devconf_setall(in_dev);
577	neigh_parms_data_state_setall(in_dev->arp_parms);
578	if (ifa->ifa_dev != in_dev) {
579		WARN_ON(ifa->ifa_dev);
580		in_dev_hold(in_dev);
581		ifa->ifa_dev = in_dev;
582	}
583	if (ipv4_is_loopback(ifa->ifa_local))
584		ifa->ifa_scope = RT_SCOPE_HOST;
585	return inet_insert_ifa(ifa);
586}
587
588/* Caller must hold RCU or RTNL :
589 * We dont take a reference on found in_device
590 */
591struct in_device *inetdev_by_index(struct net *net, int ifindex)
592{
593	struct net_device *dev;
594	struct in_device *in_dev = NULL;
595
596	rcu_read_lock();
597	dev = dev_get_by_index_rcu(net, ifindex);
598	if (dev)
599		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600	rcu_read_unlock();
601	return in_dev;
602}
603EXPORT_SYMBOL(inetdev_by_index);
604
605/* Called only from RTNL semaphored context. No locks. */
606
607struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608				    __be32 mask)
609{
610	struct in_ifaddr *ifa;
611
612	ASSERT_RTNL();
613
614	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616			return ifa;
617	}
618	return NULL;
619}
620
621static int ip_mc_autojoin_config(struct net *net, bool join,
622				 const struct in_ifaddr *ifa)
623{
624#if defined(CONFIG_IP_MULTICAST)
625	struct ip_mreqn mreq = {
626		.imr_multiaddr.s_addr = ifa->ifa_address,
627		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628	};
629	struct sock *sk = net->ipv4.mc_autojoin_sk;
630	int ret;
631
632	ASSERT_RTNL();
633
634	lock_sock(sk);
635	if (join)
636		ret = ip_mc_join_group(sk, &mreq);
637	else
638		ret = ip_mc_leave_group(sk, &mreq);
639	release_sock(sk);
640
641	return ret;
642#else
643	return -EOPNOTSUPP;
644#endif
645}
646
647static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648			    struct netlink_ext_ack *extack)
649{
650	struct net *net = sock_net(skb->sk);
651	struct in_ifaddr __rcu **ifap;
652	struct nlattr *tb[IFA_MAX+1];
653	struct in_device *in_dev;
654	struct ifaddrmsg *ifm;
655	struct in_ifaddr *ifa;
656	int err;
657
658	ASSERT_RTNL();
659
660	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661				     ifa_ipv4_policy, extack);
662	if (err < 0)
663		goto errout;
664
665	ifm = nlmsg_data(nlh);
666	in_dev = inetdev_by_index(net, ifm->ifa_index);
667	if (!in_dev) {
668		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669		err = -ENODEV;
670		goto errout;
671	}
672
673	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674	     ifap = &ifa->ifa_next) {
675		if (tb[IFA_LOCAL] &&
676		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677			continue;
678
679		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680			continue;
681
682		if (tb[IFA_ADDRESS] &&
683		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685			continue;
686
687		if (ipv4_is_multicast(ifa->ifa_address))
688			ip_mc_autojoin_config(net, false, ifa);
689		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690		return 0;
691	}
692
693	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694	err = -EADDRNOTAVAIL;
695errout:
696	return err;
697}
698
699#define INFINITY_LIFE_TIME	0xFFFFFFFF
700
701static void check_lifetime(struct work_struct *work)
702{
703	unsigned long now, next, next_sec, next_sched;
704	struct in_ifaddr *ifa;
705	struct hlist_node *n;
706	int i;
707
708	now = jiffies;
709	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710
711	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712		bool change_needed = false;
713
714		rcu_read_lock();
715		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716			unsigned long age, tstamp;
717			u32 preferred_lft;
718			u32 valid_lft;
719			u32 flags;
720
721			flags = READ_ONCE(ifa->ifa_flags);
722			if (flags & IFA_F_PERMANENT)
723				continue;
724
725			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
726			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
727			tstamp = READ_ONCE(ifa->ifa_tstamp);
728			/* We try to batch several events at once. */
729			age = (now - tstamp +
730			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
731
732			if (valid_lft != INFINITY_LIFE_TIME &&
733			    age >= valid_lft) {
734				change_needed = true;
735			} else if (preferred_lft ==
736				   INFINITY_LIFE_TIME) {
737				continue;
738			} else if (age >= preferred_lft) {
739				if (time_before(tstamp + valid_lft * HZ, next))
740					next = tstamp + valid_lft * HZ;
741
742				if (!(flags & IFA_F_DEPRECATED))
743					change_needed = true;
744			} else if (time_before(tstamp + preferred_lft * HZ,
745					       next)) {
746				next = tstamp + preferred_lft * HZ;
747			}
748		}
749		rcu_read_unlock();
750		if (!change_needed)
751			continue;
752		rtnl_lock();
753		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
754			unsigned long age;
755
756			if (ifa->ifa_flags & IFA_F_PERMANENT)
757				continue;
758
759			/* We try to batch several events at once. */
760			age = (now - ifa->ifa_tstamp +
761			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
762
763			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
764			    age >= ifa->ifa_valid_lft) {
765				struct in_ifaddr __rcu **ifap;
766				struct in_ifaddr *tmp;
767
768				ifap = &ifa->ifa_dev->ifa_list;
769				tmp = rtnl_dereference(*ifap);
770				while (tmp) {
771					if (tmp == ifa) {
772						inet_del_ifa(ifa->ifa_dev,
773							     ifap, 1);
774						break;
775					}
776					ifap = &tmp->ifa_next;
777					tmp = rtnl_dereference(*ifap);
778				}
779			} else if (ifa->ifa_preferred_lft !=
780				   INFINITY_LIFE_TIME &&
781				   age >= ifa->ifa_preferred_lft &&
782				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
783				ifa->ifa_flags |= IFA_F_DEPRECATED;
784				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
785			}
786		}
787		rtnl_unlock();
788	}
789
790	next_sec = round_jiffies_up(next);
791	next_sched = next;
792
793	/* If rounded timeout is accurate enough, accept it. */
794	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
795		next_sched = next_sec;
796
797	now = jiffies;
798	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
799	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
800		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
801
802	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
803			next_sched - now);
804}
805
806static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
807			     __u32 prefered_lft)
808{
809	unsigned long timeout;
810	u32 flags;
811
812	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
813
814	timeout = addrconf_timeout_fixup(valid_lft, HZ);
815	if (addrconf_finite_timeout(timeout))
816		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
817	else
818		flags |= IFA_F_PERMANENT;
819
820	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
821	if (addrconf_finite_timeout(timeout)) {
822		if (timeout == 0)
823			flags |= IFA_F_DEPRECATED;
824		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
825	}
826	WRITE_ONCE(ifa->ifa_flags, flags);
827	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
828	if (!ifa->ifa_cstamp)
829		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
830}
831
832static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
833				       __u32 *pvalid_lft, __u32 *pprefered_lft,
834				       struct netlink_ext_ack *extack)
835{
836	struct nlattr *tb[IFA_MAX+1];
837	struct in_ifaddr *ifa;
838	struct ifaddrmsg *ifm;
839	struct net_device *dev;
840	struct in_device *in_dev;
841	int err;
842
843	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
844				     ifa_ipv4_policy, extack);
845	if (err < 0)
846		goto errout;
847
848	ifm = nlmsg_data(nlh);
849	err = -EINVAL;
850
851	if (ifm->ifa_prefixlen > 32) {
852		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
853		goto errout;
854	}
855
856	if (!tb[IFA_LOCAL]) {
857		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
858		goto errout;
859	}
860
861	dev = __dev_get_by_index(net, ifm->ifa_index);
862	err = -ENODEV;
863	if (!dev) {
864		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
865		goto errout;
866	}
867
868	in_dev = __in_dev_get_rtnl(dev);
869	err = -ENOBUFS;
870	if (!in_dev)
871		goto errout;
872
873	ifa = inet_alloc_ifa();
874	if (!ifa)
875		/*
876		 * A potential indev allocation can be left alive, it stays
877		 * assigned to its device and is destroy with it.
878		 */
879		goto errout;
880
881	ipv4_devconf_setall(in_dev);
882	neigh_parms_data_state_setall(in_dev->arp_parms);
883	in_dev_hold(in_dev);
884
885	if (!tb[IFA_ADDRESS])
886		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
887
888	INIT_HLIST_NODE(&ifa->hash);
889	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
890	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
891	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
892					 ifm->ifa_flags;
893	ifa->ifa_scope = ifm->ifa_scope;
894	ifa->ifa_dev = in_dev;
895
896	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
897	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
898
899	if (tb[IFA_BROADCAST])
900		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
901
902	if (tb[IFA_LABEL])
903		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
904	else
905		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
906
907	if (tb[IFA_RT_PRIORITY])
908		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
909
910	if (tb[IFA_PROTO])
911		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
912
913	if (tb[IFA_CACHEINFO]) {
914		struct ifa_cacheinfo *ci;
915
916		ci = nla_data(tb[IFA_CACHEINFO]);
917		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
918			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
919			err = -EINVAL;
920			goto errout_free;
921		}
922		*pvalid_lft = ci->ifa_valid;
923		*pprefered_lft = ci->ifa_prefered;
924	}
925
926	return ifa;
927
928errout_free:
929	inet_free_ifa(ifa);
930errout:
931	return ERR_PTR(err);
932}
933
934static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
935{
936	struct in_device *in_dev = ifa->ifa_dev;
937	struct in_ifaddr *ifa1;
938
939	if (!ifa->ifa_local)
940		return NULL;
941
942	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
943		if (ifa1->ifa_mask == ifa->ifa_mask &&
944		    inet_ifa_match(ifa1->ifa_address, ifa) &&
945		    ifa1->ifa_local == ifa->ifa_local)
946			return ifa1;
947	}
948	return NULL;
949}
950
951static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
952			    struct netlink_ext_ack *extack)
953{
954	struct net *net = sock_net(skb->sk);
955	struct in_ifaddr *ifa;
956	struct in_ifaddr *ifa_existing;
957	__u32 valid_lft = INFINITY_LIFE_TIME;
958	__u32 prefered_lft = INFINITY_LIFE_TIME;
959
960	ASSERT_RTNL();
961
962	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
963	if (IS_ERR(ifa))
964		return PTR_ERR(ifa);
965
966	ifa_existing = find_matching_ifa(ifa);
967	if (!ifa_existing) {
968		/* It would be best to check for !NLM_F_CREATE here but
969		 * userspace already relies on not having to provide this.
970		 */
971		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
972		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
973			int ret = ip_mc_autojoin_config(net, true, ifa);
974
975			if (ret < 0) {
976				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
977				inet_free_ifa(ifa);
978				return ret;
979			}
980		}
981		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
982					 extack);
983	} else {
984		u32 new_metric = ifa->ifa_rt_priority;
985		u8 new_proto = ifa->ifa_proto;
986
987		inet_free_ifa(ifa);
988
989		if (nlh->nlmsg_flags & NLM_F_EXCL ||
990		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
991			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
992			return -EEXIST;
993		}
994		ifa = ifa_existing;
995
996		if (ifa->ifa_rt_priority != new_metric) {
997			fib_modify_prefix_metric(ifa, new_metric);
998			ifa->ifa_rt_priority = new_metric;
999		}
1000
1001		ifa->ifa_proto = new_proto;
1002
1003		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1004		cancel_delayed_work(&check_lifetime_work);
1005		queue_delayed_work(system_power_efficient_wq,
1006				&check_lifetime_work, 0);
1007		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1008	}
1009	return 0;
1010}
1011
1012/*
1013 *	Determine a default network mask, based on the IP address.
1014 */
1015
1016static int inet_abc_len(__be32 addr)
1017{
1018	int rc = -1;	/* Something else, probably a multicast. */
1019
1020	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1021		rc = 0;
1022	else {
1023		__u32 haddr = ntohl(addr);
1024		if (IN_CLASSA(haddr))
1025			rc = 8;
1026		else if (IN_CLASSB(haddr))
1027			rc = 16;
1028		else if (IN_CLASSC(haddr))
1029			rc = 24;
1030		else if (IN_CLASSE(haddr))
1031			rc = 32;
1032	}
1033
1034	return rc;
1035}
1036
1037
1038int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1039{
1040	struct sockaddr_in sin_orig;
1041	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1042	struct in_ifaddr __rcu **ifap = NULL;
1043	struct in_device *in_dev;
1044	struct in_ifaddr *ifa = NULL;
1045	struct net_device *dev;
1046	char *colon;
1047	int ret = -EFAULT;
1048	int tryaddrmatch = 0;
1049
1050	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1051
1052	/* save original address for comparison */
1053	memcpy(&sin_orig, sin, sizeof(*sin));
1054
1055	colon = strchr(ifr->ifr_name, ':');
1056	if (colon)
1057		*colon = 0;
1058
1059	dev_load(net, ifr->ifr_name);
1060
1061	switch (cmd) {
1062	case SIOCGIFADDR:	/* Get interface address */
1063	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1064	case SIOCGIFDSTADDR:	/* Get the destination address */
1065	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1066		/* Note that these ioctls will not sleep,
1067		   so that we do not impose a lock.
1068		   One day we will be forced to put shlock here (I mean SMP)
1069		 */
1070		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1071		memset(sin, 0, sizeof(*sin));
1072		sin->sin_family = AF_INET;
1073		break;
1074
1075	case SIOCSIFFLAGS:
1076		ret = -EPERM;
1077		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1078			goto out;
1079		break;
1080	case SIOCSIFADDR:	/* Set interface address (and family) */
1081	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1082	case SIOCSIFDSTADDR:	/* Set the destination address */
1083	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1084		ret = -EPERM;
1085		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1086			goto out;
1087		ret = -EINVAL;
1088		if (sin->sin_family != AF_INET)
1089			goto out;
1090		break;
1091	default:
1092		ret = -EINVAL;
1093		goto out;
1094	}
1095
1096	rtnl_lock();
1097
1098	ret = -ENODEV;
1099	dev = __dev_get_by_name(net, ifr->ifr_name);
1100	if (!dev)
1101		goto done;
1102
1103	if (colon)
1104		*colon = ':';
1105
1106	in_dev = __in_dev_get_rtnl(dev);
1107	if (in_dev) {
1108		if (tryaddrmatch) {
1109			/* Matthias Andree */
1110			/* compare label and address (4.4BSD style) */
1111			/* note: we only do this for a limited set of ioctls
1112			   and only if the original address family was AF_INET.
1113			   This is checked above. */
1114
1115			for (ifap = &in_dev->ifa_list;
1116			     (ifa = rtnl_dereference(*ifap)) != NULL;
1117			     ifap = &ifa->ifa_next) {
1118				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1119				    sin_orig.sin_addr.s_addr ==
1120							ifa->ifa_local) {
1121					break; /* found */
1122				}
1123			}
1124		}
1125		/* we didn't get a match, maybe the application is
1126		   4.3BSD-style and passed in junk so we fall back to
1127		   comparing just the label */
1128		if (!ifa) {
1129			for (ifap = &in_dev->ifa_list;
1130			     (ifa = rtnl_dereference(*ifap)) != NULL;
1131			     ifap = &ifa->ifa_next)
1132				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1133					break;
1134		}
1135	}
1136
1137	ret = -EADDRNOTAVAIL;
1138	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1139		goto done;
1140
1141	switch (cmd) {
1142	case SIOCGIFADDR:	/* Get interface address */
1143		ret = 0;
1144		sin->sin_addr.s_addr = ifa->ifa_local;
1145		break;
1146
1147	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1148		ret = 0;
1149		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1150		break;
1151
1152	case SIOCGIFDSTADDR:	/* Get the destination address */
1153		ret = 0;
1154		sin->sin_addr.s_addr = ifa->ifa_address;
1155		break;
1156
1157	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1158		ret = 0;
1159		sin->sin_addr.s_addr = ifa->ifa_mask;
1160		break;
1161
1162	case SIOCSIFFLAGS:
1163		if (colon) {
1164			ret = -EADDRNOTAVAIL;
1165			if (!ifa)
1166				break;
1167			ret = 0;
1168			if (!(ifr->ifr_flags & IFF_UP))
1169				inet_del_ifa(in_dev, ifap, 1);
1170			break;
1171		}
1172		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1173		break;
1174
1175	case SIOCSIFADDR:	/* Set interface address (and family) */
1176		ret = -EINVAL;
1177		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1178			break;
1179
1180		if (!ifa) {
1181			ret = -ENOBUFS;
1182			ifa = inet_alloc_ifa();
1183			if (!ifa)
1184				break;
1185			INIT_HLIST_NODE(&ifa->hash);
1186			if (colon)
1187				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1188			else
1189				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1190		} else {
1191			ret = 0;
1192			if (ifa->ifa_local == sin->sin_addr.s_addr)
1193				break;
1194			inet_del_ifa(in_dev, ifap, 0);
1195			ifa->ifa_broadcast = 0;
1196			ifa->ifa_scope = 0;
1197		}
1198
1199		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1200
1201		if (!(dev->flags & IFF_POINTOPOINT)) {
1202			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1203			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1204			if ((dev->flags & IFF_BROADCAST) &&
1205			    ifa->ifa_prefixlen < 31)
1206				ifa->ifa_broadcast = ifa->ifa_address |
1207						     ~ifa->ifa_mask;
1208		} else {
1209			ifa->ifa_prefixlen = 32;
1210			ifa->ifa_mask = inet_make_mask(32);
1211		}
1212		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1213		ret = inet_set_ifa(dev, ifa);
1214		break;
1215
1216	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1217		ret = 0;
1218		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1219			inet_del_ifa(in_dev, ifap, 0);
1220			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1221			inet_insert_ifa(ifa);
1222		}
1223		break;
1224
1225	case SIOCSIFDSTADDR:	/* Set the destination address */
1226		ret = 0;
1227		if (ifa->ifa_address == sin->sin_addr.s_addr)
1228			break;
1229		ret = -EINVAL;
1230		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1231			break;
1232		ret = 0;
1233		inet_del_ifa(in_dev, ifap, 0);
1234		ifa->ifa_address = sin->sin_addr.s_addr;
1235		inet_insert_ifa(ifa);
1236		break;
1237
1238	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1239
1240		/*
1241		 *	The mask we set must be legal.
1242		 */
1243		ret = -EINVAL;
1244		if (bad_mask(sin->sin_addr.s_addr, 0))
1245			break;
1246		ret = 0;
1247		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1248			__be32 old_mask = ifa->ifa_mask;
1249			inet_del_ifa(in_dev, ifap, 0);
1250			ifa->ifa_mask = sin->sin_addr.s_addr;
1251			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1252
1253			/* See if current broadcast address matches
1254			 * with current netmask, then recalculate
1255			 * the broadcast address. Otherwise it's a
1256			 * funny address, so don't touch it since
1257			 * the user seems to know what (s)he's doing...
1258			 */
1259			if ((dev->flags & IFF_BROADCAST) &&
1260			    (ifa->ifa_prefixlen < 31) &&
1261			    (ifa->ifa_broadcast ==
1262			     (ifa->ifa_local|~old_mask))) {
1263				ifa->ifa_broadcast = (ifa->ifa_local |
1264						      ~sin->sin_addr.s_addr);
1265			}
1266			inet_insert_ifa(ifa);
1267		}
1268		break;
1269	}
1270done:
1271	rtnl_unlock();
1272out:
1273	return ret;
1274}
1275
1276int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1277{
1278	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1279	const struct in_ifaddr *ifa;
1280	struct ifreq ifr;
1281	int done = 0;
1282
1283	if (WARN_ON(size > sizeof(struct ifreq)))
1284		goto out;
1285
1286	if (!in_dev)
1287		goto out;
1288
1289	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1290		if (!buf) {
1291			done += size;
1292			continue;
1293		}
1294		if (len < size)
1295			break;
1296		memset(&ifr, 0, sizeof(struct ifreq));
1297		strcpy(ifr.ifr_name, ifa->ifa_label);
1298
1299		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1300		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1301								ifa->ifa_local;
1302
1303		if (copy_to_user(buf + done, &ifr, size)) {
1304			done = -EFAULT;
1305			break;
1306		}
1307		len  -= size;
1308		done += size;
1309	}
1310out:
1311	return done;
1312}
1313
1314static __be32 in_dev_select_addr(const struct in_device *in_dev,
1315				 int scope)
1316{
1317	const struct in_ifaddr *ifa;
1318
1319	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1320		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1321			continue;
1322		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1323		    ifa->ifa_scope <= scope)
1324			return ifa->ifa_local;
1325	}
1326
1327	return 0;
1328}
1329
1330__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1331{
1332	const struct in_ifaddr *ifa;
1333	__be32 addr = 0;
1334	unsigned char localnet_scope = RT_SCOPE_HOST;
1335	struct in_device *in_dev;
1336	struct net *net = dev_net(dev);
1337	int master_idx;
1338
1339	rcu_read_lock();
1340	in_dev = __in_dev_get_rcu(dev);
1341	if (!in_dev)
1342		goto no_in_dev;
1343
1344	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1345		localnet_scope = RT_SCOPE_LINK;
1346
1347	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1348		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1349			continue;
1350		if (min(ifa->ifa_scope, localnet_scope) > scope)
1351			continue;
1352		if (!dst || inet_ifa_match(dst, ifa)) {
1353			addr = ifa->ifa_local;
1354			break;
1355		}
1356		if (!addr)
1357			addr = ifa->ifa_local;
1358	}
1359
1360	if (addr)
1361		goto out_unlock;
1362no_in_dev:
1363	master_idx = l3mdev_master_ifindex_rcu(dev);
1364
1365	/* For VRFs, the VRF device takes the place of the loopback device,
1366	 * with addresses on it being preferred.  Note in such cases the
1367	 * loopback device will be among the devices that fail the master_idx
1368	 * equality check in the loop below.
1369	 */
1370	if (master_idx &&
1371	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1372	    (in_dev = __in_dev_get_rcu(dev))) {
1373		addr = in_dev_select_addr(in_dev, scope);
1374		if (addr)
1375			goto out_unlock;
1376	}
1377
1378	/* Not loopback addresses on loopback should be preferred
1379	   in this case. It is important that lo is the first interface
1380	   in dev_base list.
1381	 */
1382	for_each_netdev_rcu(net, dev) {
1383		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1384			continue;
1385
1386		in_dev = __in_dev_get_rcu(dev);
1387		if (!in_dev)
1388			continue;
1389
1390		addr = in_dev_select_addr(in_dev, scope);
1391		if (addr)
1392			goto out_unlock;
1393	}
1394out_unlock:
1395	rcu_read_unlock();
1396	return addr;
1397}
1398EXPORT_SYMBOL(inet_select_addr);
1399
1400static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1401			      __be32 local, int scope)
1402{
1403	unsigned char localnet_scope = RT_SCOPE_HOST;
1404	const struct in_ifaddr *ifa;
1405	__be32 addr = 0;
1406	int same = 0;
1407
1408	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1409		localnet_scope = RT_SCOPE_LINK;
1410
1411	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1412		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1413
1414		if (!addr &&
1415		    (local == ifa->ifa_local || !local) &&
1416		    min_scope <= scope) {
1417			addr = ifa->ifa_local;
1418			if (same)
1419				break;
1420		}
1421		if (!same) {
1422			same = (!local || inet_ifa_match(local, ifa)) &&
1423				(!dst || inet_ifa_match(dst, ifa));
1424			if (same && addr) {
1425				if (local || !dst)
1426					break;
1427				/* Is the selected addr into dst subnet? */
1428				if (inet_ifa_match(addr, ifa))
1429					break;
1430				/* No, then can we use new local src? */
1431				if (min_scope <= scope) {
1432					addr = ifa->ifa_local;
1433					break;
1434				}
1435				/* search for large dst subnet for addr */
1436				same = 0;
1437			}
1438		}
1439	}
1440
1441	return same ? addr : 0;
1442}
1443
1444/*
1445 * Confirm that local IP address exists using wildcards:
1446 * - net: netns to check, cannot be NULL
1447 * - in_dev: only on this interface, NULL=any interface
1448 * - dst: only in the same subnet as dst, 0=any dst
1449 * - local: address, 0=autoselect the local address
1450 * - scope: maximum allowed scope value for the local address
1451 */
1452__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1453			 __be32 dst, __be32 local, int scope)
1454{
1455	__be32 addr = 0;
1456	struct net_device *dev;
1457
1458	if (in_dev)
1459		return confirm_addr_indev(in_dev, dst, local, scope);
1460
1461	rcu_read_lock();
1462	for_each_netdev_rcu(net, dev) {
1463		in_dev = __in_dev_get_rcu(dev);
1464		if (in_dev) {
1465			addr = confirm_addr_indev(in_dev, dst, local, scope);
1466			if (addr)
1467				break;
1468		}
1469	}
1470	rcu_read_unlock();
1471
1472	return addr;
1473}
1474EXPORT_SYMBOL(inet_confirm_addr);
1475
1476/*
1477 *	Device notifier
1478 */
1479
1480int register_inetaddr_notifier(struct notifier_block *nb)
1481{
1482	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1483}
1484EXPORT_SYMBOL(register_inetaddr_notifier);
1485
1486int unregister_inetaddr_notifier(struct notifier_block *nb)
1487{
1488	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1489}
1490EXPORT_SYMBOL(unregister_inetaddr_notifier);
1491
1492int register_inetaddr_validator_notifier(struct notifier_block *nb)
1493{
1494	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1495}
1496EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1497
1498int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1499{
1500	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1501	    nb);
1502}
1503EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1504
1505/* Rename ifa_labels for a device name change. Make some effort to preserve
1506 * existing alias numbering and to create unique labels if possible.
1507*/
1508static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1509{
1510	struct in_ifaddr *ifa;
1511	int named = 0;
1512
1513	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1514		char old[IFNAMSIZ], *dot;
1515
1516		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1517		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1518		if (named++ == 0)
1519			goto skip;
1520		dot = strchr(old, ':');
1521		if (!dot) {
1522			sprintf(old, ":%d", named);
1523			dot = old;
1524		}
1525		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1526			strcat(ifa->ifa_label, dot);
1527		else
1528			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1529skip:
1530		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1531	}
1532}
1533
1534static void inetdev_send_gratuitous_arp(struct net_device *dev,
1535					struct in_device *in_dev)
1536
1537{
1538	const struct in_ifaddr *ifa;
1539
1540	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1541		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1542			 ifa->ifa_local, dev,
1543			 ifa->ifa_local, NULL,
1544			 dev->dev_addr, NULL);
1545	}
1546}
1547
1548/* Called only under RTNL semaphore */
1549
1550static int inetdev_event(struct notifier_block *this, unsigned long event,
1551			 void *ptr)
1552{
1553	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1554	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1555
1556	ASSERT_RTNL();
1557
1558	if (!in_dev) {
1559		if (event == NETDEV_REGISTER) {
1560			in_dev = inetdev_init(dev);
1561			if (IS_ERR(in_dev))
1562				return notifier_from_errno(PTR_ERR(in_dev));
1563			if (dev->flags & IFF_LOOPBACK) {
1564				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1565				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1566			}
1567		} else if (event == NETDEV_CHANGEMTU) {
1568			/* Re-enabling IP */
1569			if (inetdev_valid_mtu(dev->mtu))
1570				in_dev = inetdev_init(dev);
1571		}
1572		goto out;
1573	}
1574
1575	switch (event) {
1576	case NETDEV_REGISTER:
1577		pr_debug("%s: bug\n", __func__);
1578		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1579		break;
1580	case NETDEV_UP:
1581		if (!inetdev_valid_mtu(dev->mtu))
1582			break;
1583		if (dev->flags & IFF_LOOPBACK) {
1584			struct in_ifaddr *ifa = inet_alloc_ifa();
1585
1586			if (ifa) {
1587				INIT_HLIST_NODE(&ifa->hash);
1588				ifa->ifa_local =
1589				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1590				ifa->ifa_prefixlen = 8;
1591				ifa->ifa_mask = inet_make_mask(8);
1592				in_dev_hold(in_dev);
1593				ifa->ifa_dev = in_dev;
1594				ifa->ifa_scope = RT_SCOPE_HOST;
1595				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1596				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1597						 INFINITY_LIFE_TIME);
1598				ipv4_devconf_setall(in_dev);
1599				neigh_parms_data_state_setall(in_dev->arp_parms);
1600				inet_insert_ifa(ifa);
1601			}
1602		}
1603		ip_mc_up(in_dev);
1604		fallthrough;
1605	case NETDEV_CHANGEADDR:
1606		if (!IN_DEV_ARP_NOTIFY(in_dev))
1607			break;
1608		fallthrough;
1609	case NETDEV_NOTIFY_PEERS:
1610		/* Send gratuitous ARP to notify of link change */
1611		inetdev_send_gratuitous_arp(dev, in_dev);
1612		break;
1613	case NETDEV_DOWN:
1614		ip_mc_down(in_dev);
1615		break;
1616	case NETDEV_PRE_TYPE_CHANGE:
1617		ip_mc_unmap(in_dev);
1618		break;
1619	case NETDEV_POST_TYPE_CHANGE:
1620		ip_mc_remap(in_dev);
1621		break;
1622	case NETDEV_CHANGEMTU:
1623		if (inetdev_valid_mtu(dev->mtu))
1624			break;
1625		/* disable IP when MTU is not enough */
1626		fallthrough;
1627	case NETDEV_UNREGISTER:
1628		inetdev_destroy(in_dev);
1629		break;
1630	case NETDEV_CHANGENAME:
1631		/* Do not notify about label change, this event is
1632		 * not interesting to applications using netlink.
1633		 */
1634		inetdev_changename(dev, in_dev);
1635
1636		devinet_sysctl_unregister(in_dev);
1637		devinet_sysctl_register(in_dev);
1638		break;
1639	}
1640out:
1641	return NOTIFY_DONE;
1642}
1643
1644static struct notifier_block ip_netdev_notifier = {
1645	.notifier_call = inetdev_event,
1646};
1647
1648static size_t inet_nlmsg_size(void)
1649{
1650	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1651	       + nla_total_size(4) /* IFA_ADDRESS */
1652	       + nla_total_size(4) /* IFA_LOCAL */
1653	       + nla_total_size(4) /* IFA_BROADCAST */
1654	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1655	       + nla_total_size(4)  /* IFA_FLAGS */
1656	       + nla_total_size(1)  /* IFA_PROTO */
1657	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1658	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1659}
1660
1661static inline u32 cstamp_delta(unsigned long cstamp)
1662{
1663	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1664}
1665
1666static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1667			 unsigned long tstamp, u32 preferred, u32 valid)
1668{
1669	struct ifa_cacheinfo ci;
1670
1671	ci.cstamp = cstamp_delta(cstamp);
1672	ci.tstamp = cstamp_delta(tstamp);
1673	ci.ifa_prefered = preferred;
1674	ci.ifa_valid = valid;
1675
1676	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1677}
1678
1679static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1680			    struct inet_fill_args *args)
1681{
1682	struct ifaddrmsg *ifm;
1683	struct nlmsghdr  *nlh;
1684	unsigned long tstamp;
1685	u32 preferred, valid;
1686
1687	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1688			args->flags);
1689	if (!nlh)
1690		return -EMSGSIZE;
1691
1692	ifm = nlmsg_data(nlh);
1693	ifm->ifa_family = AF_INET;
1694	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1695	ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
1696	ifm->ifa_scope = ifa->ifa_scope;
1697	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1698
1699	if (args->netnsid >= 0 &&
1700	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1701		goto nla_put_failure;
1702
1703	tstamp = READ_ONCE(ifa->ifa_tstamp);
1704	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1705		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1706		valid = READ_ONCE(ifa->ifa_valid_lft);
1707		if (preferred != INFINITY_LIFE_TIME) {
1708			long tval = (jiffies - tstamp) / HZ;
1709
1710			if (preferred > tval)
1711				preferred -= tval;
1712			else
1713				preferred = 0;
1714			if (valid != INFINITY_LIFE_TIME) {
1715				if (valid > tval)
1716					valid -= tval;
1717				else
1718					valid = 0;
1719			}
1720		}
1721	} else {
1722		preferred = INFINITY_LIFE_TIME;
1723		valid = INFINITY_LIFE_TIME;
1724	}
1725	if ((ifa->ifa_address &&
1726	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1727	    (ifa->ifa_local &&
1728	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1729	    (ifa->ifa_broadcast &&
1730	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1731	    (ifa->ifa_label[0] &&
1732	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1733	    (ifa->ifa_proto &&
1734	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1735	    nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
1736	    (ifa->ifa_rt_priority &&
1737	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1738	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1739			  preferred, valid))
1740		goto nla_put_failure;
1741
1742	nlmsg_end(skb, nlh);
1743	return 0;
1744
1745nla_put_failure:
1746	nlmsg_cancel(skb, nlh);
1747	return -EMSGSIZE;
1748}
1749
1750static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1751				      struct inet_fill_args *fillargs,
1752				      struct net **tgt_net, struct sock *sk,
1753				      struct netlink_callback *cb)
1754{
1755	struct netlink_ext_ack *extack = cb->extack;
1756	struct nlattr *tb[IFA_MAX+1];
1757	struct ifaddrmsg *ifm;
1758	int err, i;
1759
1760	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1761		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1762		return -EINVAL;
1763	}
1764
1765	ifm = nlmsg_data(nlh);
1766	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1767		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1768		return -EINVAL;
1769	}
1770
1771	fillargs->ifindex = ifm->ifa_index;
1772	if (fillargs->ifindex) {
1773		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1774		fillargs->flags |= NLM_F_DUMP_FILTERED;
1775	}
1776
1777	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1778					    ifa_ipv4_policy, extack);
1779	if (err < 0)
1780		return err;
1781
1782	for (i = 0; i <= IFA_MAX; ++i) {
1783		if (!tb[i])
1784			continue;
1785
1786		if (i == IFA_TARGET_NETNSID) {
1787			struct net *net;
1788
1789			fillargs->netnsid = nla_get_s32(tb[i]);
1790
1791			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1792			if (IS_ERR(net)) {
1793				fillargs->netnsid = -1;
1794				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1795				return PTR_ERR(net);
1796			}
1797			*tgt_net = net;
1798		} else {
1799			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1800			return -EINVAL;
1801		}
1802	}
1803
1804	return 0;
1805}
1806
1807static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1808			    struct netlink_callback *cb, int *s_ip_idx,
1809			    struct inet_fill_args *fillargs)
1810{
1811	struct in_ifaddr *ifa;
1812	int ip_idx = 0;
1813	int err;
1814
1815	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1816		if (ip_idx < *s_ip_idx) {
1817			ip_idx++;
1818			continue;
1819		}
1820		err = inet_fill_ifaddr(skb, ifa, fillargs);
1821		if (err < 0)
1822			goto done;
1823
1824		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1825		ip_idx++;
1826	}
1827	err = 0;
1828	ip_idx = 0;
1829done:
1830	*s_ip_idx = ip_idx;
1831
1832	return err;
1833}
1834
1835/* Combine dev_addr_genid and dev_base_seq to detect changes.
1836 */
1837static u32 inet_base_seq(const struct net *net)
1838{
1839	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1840		  READ_ONCE(net->dev_base_seq);
1841
1842	/* Must not return 0 (see nl_dump_check_consistent()).
1843	 * Chose a value far away from 0.
1844	 */
1845	if (!res)
1846		res = 0x80000000;
1847	return res;
1848}
1849
1850static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1851{
1852	const struct nlmsghdr *nlh = cb->nlh;
1853	struct inet_fill_args fillargs = {
1854		.portid = NETLINK_CB(cb->skb).portid,
1855		.seq = nlh->nlmsg_seq,
1856		.event = RTM_NEWADDR,
1857		.flags = NLM_F_MULTI,
1858		.netnsid = -1,
1859	};
1860	struct net *net = sock_net(skb->sk);
1861	struct net *tgt_net = net;
1862	struct {
1863		unsigned long ifindex;
1864		int ip_idx;
1865	} *ctx = (void *)cb->ctx;
1866	struct in_device *in_dev;
1867	struct net_device *dev;
1868	int err = 0;
1869
1870	rcu_read_lock();
1871	if (cb->strict_check) {
1872		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1873						 skb->sk, cb);
1874		if (err < 0)
1875			goto done;
1876
1877		if (fillargs.ifindex) {
1878			err = -ENODEV;
1879			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1880			if (!dev)
1881				goto done;
1882			in_dev = __in_dev_get_rcu(dev);
1883			if (!in_dev)
1884				goto done;
1885			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1886					       &fillargs);
1887			goto done;
1888		}
1889	}
1890
1891	cb->seq = inet_base_seq(tgt_net);
1892
1893	for_each_netdev_dump(net, dev, ctx->ifindex) {
1894		in_dev = __in_dev_get_rcu(dev);
1895		if (!in_dev)
1896			continue;
1897		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1898				       &fillargs);
1899		if (err < 0)
1900			goto done;
1901	}
1902done:
1903	if (fillargs.netnsid >= 0)
1904		put_net(tgt_net);
1905	rcu_read_unlock();
1906	return err;
1907}
1908
1909static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1910		      u32 portid)
1911{
1912	struct inet_fill_args fillargs = {
1913		.portid = portid,
1914		.seq = nlh ? nlh->nlmsg_seq : 0,
1915		.event = event,
1916		.flags = 0,
1917		.netnsid = -1,
1918	};
1919	struct sk_buff *skb;
1920	int err = -ENOBUFS;
1921	struct net *net;
1922
1923	net = dev_net(ifa->ifa_dev->dev);
1924	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1925	if (!skb)
1926		goto errout;
1927
1928	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1929	if (err < 0) {
1930		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1931		WARN_ON(err == -EMSGSIZE);
1932		kfree_skb(skb);
1933		goto errout;
1934	}
1935	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1936	return;
1937errout:
1938	if (err < 0)
1939		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1940}
1941
1942static size_t inet_get_link_af_size(const struct net_device *dev,
1943				    u32 ext_filter_mask)
1944{
1945	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1946
1947	if (!in_dev)
1948		return 0;
1949
1950	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1951}
1952
1953static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1954			     u32 ext_filter_mask)
1955{
1956	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1957	struct nlattr *nla;
1958	int i;
1959
1960	if (!in_dev)
1961		return -ENODATA;
1962
1963	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1964	if (!nla)
1965		return -EMSGSIZE;
1966
1967	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1968		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1969
1970	return 0;
1971}
1972
1973static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1974	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1975};
1976
1977static int inet_validate_link_af(const struct net_device *dev,
1978				 const struct nlattr *nla,
1979				 struct netlink_ext_ack *extack)
1980{
1981	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1982	int err, rem;
1983
1984	if (dev && !__in_dev_get_rtnl(dev))
1985		return -EAFNOSUPPORT;
1986
1987	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1988					  inet_af_policy, extack);
1989	if (err < 0)
1990		return err;
1991
1992	if (tb[IFLA_INET_CONF]) {
1993		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1994			int cfgid = nla_type(a);
1995
1996			if (nla_len(a) < 4)
1997				return -EINVAL;
1998
1999			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2000				return -EINVAL;
2001		}
2002	}
2003
2004	return 0;
2005}
2006
2007static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2008			    struct netlink_ext_ack *extack)
2009{
2010	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2011	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2012	int rem;
2013
2014	if (!in_dev)
2015		return -EAFNOSUPPORT;
2016
2017	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2018		return -EINVAL;
2019
2020	if (tb[IFLA_INET_CONF]) {
2021		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2022			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2023	}
2024
2025	return 0;
2026}
2027
2028static int inet_netconf_msgsize_devconf(int type)
2029{
2030	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2031		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2032	bool all = false;
2033
2034	if (type == NETCONFA_ALL)
2035		all = true;
2036
2037	if (all || type == NETCONFA_FORWARDING)
2038		size += nla_total_size(4);
2039	if (all || type == NETCONFA_RP_FILTER)
2040		size += nla_total_size(4);
2041	if (all || type == NETCONFA_MC_FORWARDING)
2042		size += nla_total_size(4);
2043	if (all || type == NETCONFA_BC_FORWARDING)
2044		size += nla_total_size(4);
2045	if (all || type == NETCONFA_PROXY_NEIGH)
2046		size += nla_total_size(4);
2047	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2048		size += nla_total_size(4);
2049
2050	return size;
2051}
2052
2053static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2054				     const struct ipv4_devconf *devconf,
2055				     u32 portid, u32 seq, int event,
2056				     unsigned int flags, int type)
2057{
2058	struct nlmsghdr  *nlh;
2059	struct netconfmsg *ncm;
2060	bool all = false;
2061
2062	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2063			flags);
2064	if (!nlh)
2065		return -EMSGSIZE;
2066
2067	if (type == NETCONFA_ALL)
2068		all = true;
2069
2070	ncm = nlmsg_data(nlh);
2071	ncm->ncm_family = AF_INET;
2072
2073	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2074		goto nla_put_failure;
2075
2076	if (!devconf)
2077		goto out;
2078
2079	if ((all || type == NETCONFA_FORWARDING) &&
2080	    nla_put_s32(skb, NETCONFA_FORWARDING,
2081			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2082		goto nla_put_failure;
2083	if ((all || type == NETCONFA_RP_FILTER) &&
2084	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2085			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2086		goto nla_put_failure;
2087	if ((all || type == NETCONFA_MC_FORWARDING) &&
2088	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2089			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2090		goto nla_put_failure;
2091	if ((all || type == NETCONFA_BC_FORWARDING) &&
2092	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2093			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2094		goto nla_put_failure;
2095	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2096	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2097			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2098		goto nla_put_failure;
2099	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2100	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2101			IPV4_DEVCONF_RO(*devconf,
2102					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2103		goto nla_put_failure;
2104
2105out:
2106	nlmsg_end(skb, nlh);
2107	return 0;
2108
2109nla_put_failure:
2110	nlmsg_cancel(skb, nlh);
2111	return -EMSGSIZE;
2112}
2113
2114void inet_netconf_notify_devconf(struct net *net, int event, int type,
2115				 int ifindex, struct ipv4_devconf *devconf)
2116{
2117	struct sk_buff *skb;
2118	int err = -ENOBUFS;
2119
2120	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2121	if (!skb)
2122		goto errout;
2123
2124	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2125					event, 0, type);
2126	if (err < 0) {
2127		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2128		WARN_ON(err == -EMSGSIZE);
2129		kfree_skb(skb);
2130		goto errout;
2131	}
2132	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2133	return;
2134errout:
2135	if (err < 0)
2136		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2137}
2138
2139static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2140	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2141	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2142	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2143	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2144	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2145};
2146
2147static int inet_netconf_valid_get_req(struct sk_buff *skb,
2148				      const struct nlmsghdr *nlh,
2149				      struct nlattr **tb,
2150				      struct netlink_ext_ack *extack)
2151{
2152	int i, err;
2153
2154	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2155		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2156		return -EINVAL;
2157	}
2158
2159	if (!netlink_strict_get_check(skb))
2160		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2161					      tb, NETCONFA_MAX,
2162					      devconf_ipv4_policy, extack);
2163
2164	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2165					    tb, NETCONFA_MAX,
2166					    devconf_ipv4_policy, extack);
2167	if (err)
2168		return err;
2169
2170	for (i = 0; i <= NETCONFA_MAX; i++) {
2171		if (!tb[i])
2172			continue;
2173
2174		switch (i) {
2175		case NETCONFA_IFINDEX:
2176			break;
2177		default:
2178			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2179			return -EINVAL;
2180		}
2181	}
2182
2183	return 0;
2184}
2185
2186static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2187				    struct nlmsghdr *nlh,
2188				    struct netlink_ext_ack *extack)
2189{
2190	struct net *net = sock_net(in_skb->sk);
2191	struct nlattr *tb[NETCONFA_MAX + 1];
2192	const struct ipv4_devconf *devconf;
2193	struct in_device *in_dev = NULL;
2194	struct net_device *dev = NULL;
2195	struct sk_buff *skb;
2196	int ifindex;
2197	int err;
2198
2199	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2200	if (err)
2201		return err;
2202
2203	if (!tb[NETCONFA_IFINDEX])
2204		return -EINVAL;
2205
2206	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2207	switch (ifindex) {
2208	case NETCONFA_IFINDEX_ALL:
2209		devconf = net->ipv4.devconf_all;
2210		break;
2211	case NETCONFA_IFINDEX_DEFAULT:
2212		devconf = net->ipv4.devconf_dflt;
2213		break;
2214	default:
2215		err = -ENODEV;
2216		dev = dev_get_by_index(net, ifindex);
2217		if (dev)
2218			in_dev = in_dev_get(dev);
2219		if (!in_dev)
2220			goto errout;
2221		devconf = &in_dev->cnf;
2222		break;
2223	}
2224
2225	err = -ENOBUFS;
2226	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2227	if (!skb)
2228		goto errout;
2229
2230	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2231					NETLINK_CB(in_skb).portid,
2232					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2233					NETCONFA_ALL);
2234	if (err < 0) {
2235		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2236		WARN_ON(err == -EMSGSIZE);
2237		kfree_skb(skb);
2238		goto errout;
2239	}
2240	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2241errout:
2242	if (in_dev)
2243		in_dev_put(in_dev);
2244	dev_put(dev);
2245	return err;
2246}
2247
2248static int inet_netconf_dump_devconf(struct sk_buff *skb,
2249				     struct netlink_callback *cb)
2250{
2251	const struct nlmsghdr *nlh = cb->nlh;
2252	struct net *net = sock_net(skb->sk);
2253	struct {
2254		unsigned long ifindex;
2255		unsigned int all_default;
2256	} *ctx = (void *)cb->ctx;
2257	const struct in_device *in_dev;
2258	struct net_device *dev;
2259	int err = 0;
2260
2261	if (cb->strict_check) {
2262		struct netlink_ext_ack *extack = cb->extack;
2263		struct netconfmsg *ncm;
2264
2265		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2266			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2267			return -EINVAL;
2268		}
2269
2270		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2271			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2272			return -EINVAL;
2273		}
2274	}
2275
2276	rcu_read_lock();
2277	for_each_netdev_dump(net, dev, ctx->ifindex) {
2278		in_dev = __in_dev_get_rcu(dev);
2279		if (!in_dev)
2280			continue;
2281		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2282						&in_dev->cnf,
2283						NETLINK_CB(cb->skb).portid,
2284						nlh->nlmsg_seq,
2285						RTM_NEWNETCONF, NLM_F_MULTI,
2286						NETCONFA_ALL);
2287		if (err < 0)
2288			goto done;
2289	}
2290	if (ctx->all_default == 0) {
2291		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2292						net->ipv4.devconf_all,
2293						NETLINK_CB(cb->skb).portid,
2294						nlh->nlmsg_seq,
2295						RTM_NEWNETCONF, NLM_F_MULTI,
2296						NETCONFA_ALL);
2297		if (err < 0)
2298			goto done;
2299		ctx->all_default++;
2300	}
2301	if (ctx->all_default == 1) {
2302		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2303						net->ipv4.devconf_dflt,
2304						NETLINK_CB(cb->skb).portid,
2305						nlh->nlmsg_seq,
2306						RTM_NEWNETCONF, NLM_F_MULTI,
2307						NETCONFA_ALL);
2308		if (err < 0)
2309			goto done;
2310		ctx->all_default++;
2311	}
2312done:
2313	rcu_read_unlock();
2314	return err;
2315}
2316
2317#ifdef CONFIG_SYSCTL
2318
2319static void devinet_copy_dflt_conf(struct net *net, int i)
2320{
2321	struct net_device *dev;
2322
2323	rcu_read_lock();
2324	for_each_netdev_rcu(net, dev) {
2325		struct in_device *in_dev;
2326
2327		in_dev = __in_dev_get_rcu(dev);
2328		if (in_dev && !test_bit(i, in_dev->cnf.state))
2329			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2330	}
2331	rcu_read_unlock();
2332}
2333
2334/* called with RTNL locked */
2335static void inet_forward_change(struct net *net)
2336{
2337	struct net_device *dev;
2338	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2339
2340	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2341	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2342	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2343				    NETCONFA_FORWARDING,
2344				    NETCONFA_IFINDEX_ALL,
2345				    net->ipv4.devconf_all);
2346	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2347				    NETCONFA_FORWARDING,
2348				    NETCONFA_IFINDEX_DEFAULT,
2349				    net->ipv4.devconf_dflt);
2350
2351	for_each_netdev(net, dev) {
2352		struct in_device *in_dev;
2353
2354		if (on)
2355			dev_disable_lro(dev);
2356
2357		in_dev = __in_dev_get_rtnl(dev);
2358		if (in_dev) {
2359			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2360			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2361						    NETCONFA_FORWARDING,
2362						    dev->ifindex, &in_dev->cnf);
2363		}
2364	}
2365}
2366
2367static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2368{
2369	if (cnf == net->ipv4.devconf_dflt)
2370		return NETCONFA_IFINDEX_DEFAULT;
2371	else if (cnf == net->ipv4.devconf_all)
2372		return NETCONFA_IFINDEX_ALL;
2373	else {
2374		struct in_device *idev
2375			= container_of(cnf, struct in_device, cnf);
2376		return idev->dev->ifindex;
2377	}
2378}
2379
2380static int devinet_conf_proc(struct ctl_table *ctl, int write,
2381			     void *buffer, size_t *lenp, loff_t *ppos)
2382{
2383	int old_value = *(int *)ctl->data;
2384	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2385	int new_value = *(int *)ctl->data;
2386
2387	if (write) {
2388		struct ipv4_devconf *cnf = ctl->extra1;
2389		struct net *net = ctl->extra2;
2390		int i = (int *)ctl->data - cnf->data;
2391		int ifindex;
2392
2393		set_bit(i, cnf->state);
2394
2395		if (cnf == net->ipv4.devconf_dflt)
2396			devinet_copy_dflt_conf(net, i);
2397		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2398		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2399			if ((new_value == 0) && (old_value != 0))
2400				rt_cache_flush(net);
2401
2402		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2403		    new_value != old_value)
2404			rt_cache_flush(net);
2405
2406		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2407		    new_value != old_value) {
2408			ifindex = devinet_conf_ifindex(net, cnf);
2409			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2410						    NETCONFA_RP_FILTER,
2411						    ifindex, cnf);
2412		}
2413		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2414		    new_value != old_value) {
2415			ifindex = devinet_conf_ifindex(net, cnf);
2416			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2417						    NETCONFA_PROXY_NEIGH,
2418						    ifindex, cnf);
2419		}
2420		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2421		    new_value != old_value) {
2422			ifindex = devinet_conf_ifindex(net, cnf);
2423			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2424						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2425						    ifindex, cnf);
2426		}
2427	}
2428
2429	return ret;
2430}
2431
2432static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2433				  void *buffer, size_t *lenp, loff_t *ppos)
2434{
2435	int *valp = ctl->data;
2436	int val = *valp;
2437	loff_t pos = *ppos;
2438	struct net *net = ctl->extra2;
2439	int ret;
2440
2441	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2442		return -EPERM;
2443
2444	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2445
2446	if (write && *valp != val) {
2447		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2448			if (!rtnl_trylock()) {
2449				/* Restore the original values before restarting */
2450				*valp = val;
2451				*ppos = pos;
2452				return restart_syscall();
2453			}
2454			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2455				inet_forward_change(net);
2456			} else {
2457				struct ipv4_devconf *cnf = ctl->extra1;
2458				struct in_device *idev =
2459					container_of(cnf, struct in_device, cnf);
2460				if (*valp)
2461					dev_disable_lro(idev->dev);
2462				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2463							    NETCONFA_FORWARDING,
2464							    idev->dev->ifindex,
2465							    cnf);
2466			}
2467			rtnl_unlock();
2468			rt_cache_flush(net);
2469		} else
2470			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2471						    NETCONFA_FORWARDING,
2472						    NETCONFA_IFINDEX_DEFAULT,
2473						    net->ipv4.devconf_dflt);
2474	}
2475
2476	return ret;
2477}
2478
2479static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2480				void *buffer, size_t *lenp, loff_t *ppos)
2481{
2482	int *valp = ctl->data;
2483	int val = *valp;
2484	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2485	struct net *net = ctl->extra2;
2486
2487	if (write && *valp != val)
2488		rt_cache_flush(net);
2489
2490	return ret;
2491}
2492
2493#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2494	{ \
2495		.procname	= name, \
2496		.data		= ipv4_devconf.data + \
2497				  IPV4_DEVCONF_ ## attr - 1, \
2498		.maxlen		= sizeof(int), \
2499		.mode		= mval, \
2500		.proc_handler	= proc, \
2501		.extra1		= &ipv4_devconf, \
2502	}
2503
2504#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2505	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2506
2507#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2508	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2509
2510#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2511	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2512
2513#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2514	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2515
2516static struct devinet_sysctl_table {
2517	struct ctl_table_header *sysctl_header;
2518	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2519} devinet_sysctl = {
2520	.devinet_vars = {
2521		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2522					     devinet_sysctl_forward),
2523		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2524		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2525
2526		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2527		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2528		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2529		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2530		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2531		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2532					"accept_source_route"),
2533		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2534		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2535		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2536		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2537		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2538		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2539		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2540		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2541		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2542		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2543		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2544		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2545		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2546					"arp_evict_nocarrier"),
2547		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2548		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2549					"force_igmp_version"),
2550		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2551					"igmpv2_unsolicited_report_interval"),
2552		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2553					"igmpv3_unsolicited_report_interval"),
2554		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2555					"ignore_routes_with_linkdown"),
2556		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2557					"drop_gratuitous_arp"),
2558
2559		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2560		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2561		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2562					      "promote_secondaries"),
2563		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2564					      "route_localnet"),
2565		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2566					      "drop_unicast_in_l2_multicast"),
2567	},
2568};
2569
2570static int __devinet_sysctl_register(struct net *net, char *dev_name,
2571				     int ifindex, struct ipv4_devconf *p)
2572{
2573	int i;
2574	struct devinet_sysctl_table *t;
2575	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2576
2577	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2578	if (!t)
2579		goto out;
2580
2581	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2582		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2583		t->devinet_vars[i].extra1 = p;
2584		t->devinet_vars[i].extra2 = net;
2585	}
2586
2587	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2588
2589	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2590	if (!t->sysctl_header)
2591		goto free;
2592
2593	p->sysctl = t;
2594
2595	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2596				    ifindex, p);
2597	return 0;
2598
2599free:
2600	kfree(t);
2601out:
2602	return -ENOMEM;
2603}
2604
2605static void __devinet_sysctl_unregister(struct net *net,
2606					struct ipv4_devconf *cnf, int ifindex)
2607{
2608	struct devinet_sysctl_table *t = cnf->sysctl;
2609
2610	if (t) {
2611		cnf->sysctl = NULL;
2612		unregister_net_sysctl_table(t->sysctl_header);
2613		kfree(t);
2614	}
2615
2616	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2617}
2618
2619static int devinet_sysctl_register(struct in_device *idev)
2620{
2621	int err;
2622
2623	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2624		return -EINVAL;
2625
2626	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2627	if (err)
2628		return err;
2629	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2630					idev->dev->ifindex, &idev->cnf);
2631	if (err)
2632		neigh_sysctl_unregister(idev->arp_parms);
2633	return err;
2634}
2635
2636static void devinet_sysctl_unregister(struct in_device *idev)
2637{
2638	struct net *net = dev_net(idev->dev);
2639
2640	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2641	neigh_sysctl_unregister(idev->arp_parms);
2642}
2643
2644static struct ctl_table ctl_forward_entry[] = {
2645	{
2646		.procname	= "ip_forward",
2647		.data		= &ipv4_devconf.data[
2648					IPV4_DEVCONF_FORWARDING - 1],
2649		.maxlen		= sizeof(int),
2650		.mode		= 0644,
2651		.proc_handler	= devinet_sysctl_forward,
2652		.extra1		= &ipv4_devconf,
2653		.extra2		= &init_net,
2654	},
2655	{ },
2656};
2657#endif
2658
2659static __net_init int devinet_init_net(struct net *net)
2660{
2661	int err;
2662	struct ipv4_devconf *all, *dflt;
2663#ifdef CONFIG_SYSCTL
2664	struct ctl_table *tbl;
2665	struct ctl_table_header *forw_hdr;
2666#endif
2667
2668	err = -ENOMEM;
2669	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2670	if (!all)
2671		goto err_alloc_all;
2672
2673	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2674	if (!dflt)
2675		goto err_alloc_dflt;
2676
2677#ifdef CONFIG_SYSCTL
2678	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2679	if (!tbl)
2680		goto err_alloc_ctl;
2681
2682	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2683	tbl[0].extra1 = all;
2684	tbl[0].extra2 = net;
2685#endif
2686
2687	if (!net_eq(net, &init_net)) {
2688		switch (net_inherit_devconf()) {
2689		case 3:
2690			/* copy from the current netns */
2691			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2692			       sizeof(ipv4_devconf));
2693			memcpy(dflt,
2694			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2695			       sizeof(ipv4_devconf_dflt));
2696			break;
2697		case 0:
2698		case 1:
2699			/* copy from init_net */
2700			memcpy(all, init_net.ipv4.devconf_all,
2701			       sizeof(ipv4_devconf));
2702			memcpy(dflt, init_net.ipv4.devconf_dflt,
2703			       sizeof(ipv4_devconf_dflt));
2704			break;
2705		case 2:
2706			/* use compiled values */
2707			break;
2708		}
2709	}
2710
2711#ifdef CONFIG_SYSCTL
2712	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2713	if (err < 0)
2714		goto err_reg_all;
2715
2716	err = __devinet_sysctl_register(net, "default",
2717					NETCONFA_IFINDEX_DEFAULT, dflt);
2718	if (err < 0)
2719		goto err_reg_dflt;
2720
2721	err = -ENOMEM;
2722	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2723					  ARRAY_SIZE(ctl_forward_entry));
2724	if (!forw_hdr)
2725		goto err_reg_ctl;
2726	net->ipv4.forw_hdr = forw_hdr;
2727#endif
2728
2729	net->ipv4.devconf_all = all;
2730	net->ipv4.devconf_dflt = dflt;
2731	return 0;
2732
2733#ifdef CONFIG_SYSCTL
2734err_reg_ctl:
2735	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2736err_reg_dflt:
2737	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2738err_reg_all:
2739	kfree(tbl);
2740err_alloc_ctl:
2741#endif
2742	kfree(dflt);
2743err_alloc_dflt:
2744	kfree(all);
2745err_alloc_all:
2746	return err;
2747}
2748
2749static __net_exit void devinet_exit_net(struct net *net)
2750{
2751#ifdef CONFIG_SYSCTL
2752	struct ctl_table *tbl;
2753
2754	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2755	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2756	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2757				    NETCONFA_IFINDEX_DEFAULT);
2758	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2759				    NETCONFA_IFINDEX_ALL);
2760	kfree(tbl);
2761#endif
2762	kfree(net->ipv4.devconf_dflt);
2763	kfree(net->ipv4.devconf_all);
2764}
2765
2766static __net_initdata struct pernet_operations devinet_ops = {
2767	.init = devinet_init_net,
2768	.exit = devinet_exit_net,
2769};
2770
2771static struct rtnl_af_ops inet_af_ops __read_mostly = {
2772	.family		  = AF_INET,
2773	.fill_link_af	  = inet_fill_link_af,
2774	.get_link_af_size = inet_get_link_af_size,
2775	.validate_link_af = inet_validate_link_af,
2776	.set_link_af	  = inet_set_link_af,
2777};
2778
2779void __init devinet_init(void)
2780{
2781	int i;
2782
2783	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2784		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2785
2786	register_pernet_subsys(&devinet_ops);
2787	register_netdevice_notifier(&ip_netdev_notifier);
2788
2789	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2790
2791	rtnl_af_register(&inet_af_ops);
2792
2793	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2794	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2795	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2796		      RTNL_FLAG_DUMP_UNLOCKED);
2797	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2798		      inet_netconf_dump_devconf,
2799		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2800}
2801