1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 *		This program is free software; you can redistribute it and/or
11 *		modify it under the terms of the GNU General Public License
12 *		as published by the Free Software Foundation; either version
13 *		2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37#include <linux/slab.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/arp.h>
45#include <net/ip_fib.h>
46#include <net/rtnetlink.h>
47
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
50static int __net_init fib4_rules_init(struct net *net)
51{
52	struct fib_table *local_table, *main_table;
53
54	local_table = fib_hash_table(RT_TABLE_LOCAL);
55	if (local_table == NULL)
56		return -ENOMEM;
57
58	main_table  = fib_hash_table(RT_TABLE_MAIN);
59	if (main_table == NULL)
60		goto fail;
61
62	hlist_add_head_rcu(&local_table->tb_hlist,
63				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64	hlist_add_head_rcu(&main_table->tb_hlist,
65				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66	return 0;
67
68fail:
69	kfree(local_table);
70	return -ENOMEM;
71}
72#else
73
74struct fib_table *fib_new_table(struct net *net, u32 id)
75{
76	struct fib_table *tb;
77	unsigned int h;
78
79	if (id == 0)
80		id = RT_TABLE_MAIN;
81	tb = fib_get_table(net, id);
82	if (tb)
83		return tb;
84
85	tb = fib_hash_table(id);
86	if (!tb)
87		return NULL;
88	h = id & (FIB_TABLE_HASHSZ - 1);
89	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90	return tb;
91}
92
93struct fib_table *fib_get_table(struct net *net, u32 id)
94{
95	struct fib_table *tb;
96	struct hlist_node *node;
97	struct hlist_head *head;
98	unsigned int h;
99
100	if (id == 0)
101		id = RT_TABLE_MAIN;
102	h = id & (FIB_TABLE_HASHSZ - 1);
103
104	rcu_read_lock();
105	head = &net->ipv4.fib_table_hash[h];
106	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107		if (tb->tb_id == id) {
108			rcu_read_unlock();
109			return tb;
110		}
111	}
112	rcu_read_unlock();
113	return NULL;
114}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
117void fib_select_default(struct net *net,
118			const struct flowi *flp, struct fib_result *res)
119{
120	struct fib_table *tb;
121	int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124		return;
125	table = res->r->table;
126#endif
127	tb = fib_get_table(net, table);
128	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129		fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net)
133{
134	int flushed = 0;
135	struct fib_table *tb;
136	struct hlist_node *node;
137	struct hlist_head *head;
138	unsigned int h;
139
140	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141		head = &net->ipv4.fib_table_hash[h];
142		hlist_for_each_entry(tb, node, head, tb_hlist)
143			flushed += fib_table_flush(tb);
144	}
145
146	if (flushed)
147		rt_cache_flush(net, -1);
148}
149
150/*
151 *	Find the first device with a given source address.
152 */
153
154struct net_device * ip_dev_find(struct net *net, __be32 addr)
155{
156	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157	struct fib_result res;
158	struct net_device *dev = NULL;
159	struct fib_table *local_table;
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162	res.r = NULL;
163#endif
164
165	local_table = fib_get_table(net, RT_TABLE_LOCAL);
166	if (!local_table || fib_table_lookup(local_table, &fl, &res))
167		return NULL;
168	if (res.type != RTN_LOCAL)
169		goto out;
170	dev = FIB_RES_DEV(res);
171
172	if (dev)
173		dev_hold(dev);
174out:
175	fib_res_put(&res);
176	return dev;
177}
178EXPORT_SYMBOL(ip_dev_find);
179
180/*
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
183 */
184static inline unsigned __inet_dev_addr_type(struct net *net,
185					    const struct net_device *dev,
186					    __be32 addr)
187{
188	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189	struct fib_result	res;
190	unsigned ret = RTN_BROADCAST;
191	struct fib_table *local_table;
192
193	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
194		return RTN_BROADCAST;
195	if (ipv4_is_multicast(addr))
196		return RTN_MULTICAST;
197
198#ifdef CONFIG_IP_MULTIPLE_TABLES
199	res.r = NULL;
200#endif
201
202	local_table = fib_get_table(net, RT_TABLE_LOCAL);
203	if (local_table) {
204		ret = RTN_UNICAST;
205		if (!fib_table_lookup(local_table, &fl, &res)) {
206			if (!dev || dev == res.fi->fib_dev)
207				ret = res.type;
208			fib_res_put(&res);
209		}
210	}
211	return ret;
212}
213
214unsigned int inet_addr_type(struct net *net, __be32 addr)
215{
216	return __inet_dev_addr_type(net, NULL, addr);
217}
218EXPORT_SYMBOL(inet_addr_type);
219
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221				__be32 addr)
222{
223       return __inet_dev_addr_type(net, dev, addr);
224}
225EXPORT_SYMBOL(inet_dev_addr_type);
226
227/* Given (packet source, input interface) and optional (dst, oif, tos):
228   - (main) check, that source is valid i.e. not broadcast or our local
229     address.
230   - figure out what "logical" interface this packet arrived
231     and calculate "specific destination" address.
232   - check, that packet arrived from expected physical interface.
233 */
234
235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
236			struct net_device *dev, __be32 *spec_dst,
237			u32 *itag, u32 mark)
238{
239	struct in_device *in_dev;
240	struct flowi fl = { .nl_u = { .ip4_u =
241				      { .daddr = src,
242					.saddr = dst,
243					.tos = tos } },
244			    .mark = mark,
245			    .iif = oif };
246
247	struct fib_result res;
248	int no_addr, rpf, accept_local;
249	bool dev_match;
250	int ret;
251	struct net *net;
252
253	no_addr = rpf = accept_local = 0;
254	rcu_read_lock();
255	in_dev = __in_dev_get_rcu(dev);
256	if (in_dev) {
257		no_addr = in_dev->ifa_list == NULL;
258		rpf = IN_DEV_RPFILTER(in_dev);
259		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
260		if (mark && !IN_DEV_SRC_VMARK(in_dev))
261			fl.mark = 0;
262	}
263	rcu_read_unlock();
264
265	if (in_dev == NULL)
266		goto e_inval;
267
268	net = dev_net(dev);
269	if (fib_lookup(net, &fl, &res))
270		goto last_resort;
271	if (res.type != RTN_UNICAST) {
272		if (res.type != RTN_LOCAL || !accept_local)
273			goto e_inval_res;
274	}
275	*spec_dst = FIB_RES_PREFSRC(res);
276	fib_combine_itag(itag, &res);
277	dev_match = false;
278
279#ifdef CONFIG_IP_ROUTE_MULTIPATH
280	for (ret = 0; ret < res.fi->fib_nhs; ret++) {
281		struct fib_nh *nh = &res.fi->fib_nh[ret];
282
283		if (nh->nh_dev == dev) {
284			dev_match = true;
285			break;
286		}
287	}
288#else
289	if (FIB_RES_DEV(res) == dev)
290		dev_match = true;
291#endif
292	if (dev_match) {
293		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294		fib_res_put(&res);
295		return ret;
296	}
297	fib_res_put(&res);
298	if (no_addr)
299		goto last_resort;
300	if (rpf == 1)
301		goto e_rpf;
302	fl.oif = dev->ifindex;
303
304	ret = 0;
305	if (fib_lookup(net, &fl, &res) == 0) {
306		if (res.type == RTN_UNICAST) {
307			*spec_dst = FIB_RES_PREFSRC(res);
308			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
309		}
310		fib_res_put(&res);
311	}
312	return ret;
313
314last_resort:
315	if (rpf)
316		goto e_rpf;
317	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
318	*itag = 0;
319	return 0;
320
321e_inval_res:
322	fib_res_put(&res);
323e_inval:
324	return -EINVAL;
325e_rpf:
326	return -EXDEV;
327}
328
329static inline __be32 sk_extract_addr(struct sockaddr *addr)
330{
331	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
332}
333
334static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
335{
336	struct nlattr *nla;
337
338	nla = (struct nlattr *) ((char *) mx + len);
339	nla->nla_type = type;
340	nla->nla_len = nla_attr_size(4);
341	*(u32 *) nla_data(nla) = value;
342
343	return len + nla_total_size(4);
344}
345
346static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
347				 struct fib_config *cfg)
348{
349	__be32 addr;
350	int plen;
351
352	memset(cfg, 0, sizeof(*cfg));
353	cfg->fc_nlinfo.nl_net = net;
354
355	if (rt->rt_dst.sa_family != AF_INET)
356		return -EAFNOSUPPORT;
357
358	/*
359	 * Check mask for validity:
360	 * a) it must be contiguous.
361	 * b) destination must have all host bits clear.
362	 * c) if application forgot to set correct family (AF_INET),
363	 *    reject request unless it is absolutely clear i.e.
364	 *    both family and mask are zero.
365	 */
366	plen = 32;
367	addr = sk_extract_addr(&rt->rt_dst);
368	if (!(rt->rt_flags & RTF_HOST)) {
369		__be32 mask = sk_extract_addr(&rt->rt_genmask);
370
371		if (rt->rt_genmask.sa_family != AF_INET) {
372			if (mask || rt->rt_genmask.sa_family)
373				return -EAFNOSUPPORT;
374		}
375
376		if (bad_mask(mask, addr))
377			return -EINVAL;
378
379		plen = inet_mask_len(mask);
380	}
381
382	cfg->fc_dst_len = plen;
383	cfg->fc_dst = addr;
384
385	if (cmd != SIOCDELRT) {
386		cfg->fc_nlflags = NLM_F_CREATE;
387		cfg->fc_protocol = RTPROT_BOOT;
388	}
389
390	if (rt->rt_metric)
391		cfg->fc_priority = rt->rt_metric - 1;
392
393	if (rt->rt_flags & RTF_REJECT) {
394		cfg->fc_scope = RT_SCOPE_HOST;
395		cfg->fc_type = RTN_UNREACHABLE;
396		return 0;
397	}
398
399	cfg->fc_scope = RT_SCOPE_NOWHERE;
400	cfg->fc_type = RTN_UNICAST;
401
402	if (rt->rt_dev) {
403		char *colon;
404		struct net_device *dev;
405		char devname[IFNAMSIZ];
406
407		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
408			return -EFAULT;
409
410		devname[IFNAMSIZ-1] = 0;
411		colon = strchr(devname, ':');
412		if (colon)
413			*colon = 0;
414		dev = __dev_get_by_name(net, devname);
415		if (!dev)
416			return -ENODEV;
417		cfg->fc_oif = dev->ifindex;
418		if (colon) {
419			struct in_ifaddr *ifa;
420			struct in_device *in_dev = __in_dev_get_rtnl(dev);
421			if (!in_dev)
422				return -ENODEV;
423			*colon = ':';
424			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
425				if (strcmp(ifa->ifa_label, devname) == 0)
426					break;
427			if (ifa == NULL)
428				return -ENODEV;
429			cfg->fc_prefsrc = ifa->ifa_local;
430		}
431	}
432
433	addr = sk_extract_addr(&rt->rt_gateway);
434	if (rt->rt_gateway.sa_family == AF_INET && addr) {
435		cfg->fc_gw = addr;
436		if (rt->rt_flags & RTF_GATEWAY &&
437		    inet_addr_type(net, addr) == RTN_UNICAST)
438			cfg->fc_scope = RT_SCOPE_UNIVERSE;
439	}
440
441	if (cmd == SIOCDELRT)
442		return 0;
443
444	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
445		return -EINVAL;
446
447	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
448		cfg->fc_scope = RT_SCOPE_LINK;
449
450	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
451		struct nlattr *mx;
452		int len = 0;
453
454		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
455		if (mx == NULL)
456			return -ENOMEM;
457
458		if (rt->rt_flags & RTF_MTU)
459			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
460
461		if (rt->rt_flags & RTF_WINDOW)
462			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
463
464		if (rt->rt_flags & RTF_IRTT)
465			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
466
467		cfg->fc_mx = mx;
468		cfg->fc_mx_len = len;
469	}
470
471	return 0;
472}
473
474/*
475 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
476 */
477
478int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
479{
480	struct fib_config cfg;
481	struct rtentry rt;
482	int err;
483
484	switch (cmd) {
485	case SIOCADDRT:		/* Add a route */
486	case SIOCDELRT:		/* Delete a route */
487		if (!capable(CAP_NET_ADMIN))
488			return -EPERM;
489
490		if (copy_from_user(&rt, arg, sizeof(rt)))
491			return -EFAULT;
492
493		rtnl_lock();
494		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
495		if (err == 0) {
496			struct fib_table *tb;
497
498			if (cmd == SIOCDELRT) {
499				tb = fib_get_table(net, cfg.fc_table);
500				if (tb)
501					err = fib_table_delete(tb, &cfg);
502				else
503					err = -ESRCH;
504			} else {
505				tb = fib_new_table(net, cfg.fc_table);
506				if (tb)
507					err = fib_table_insert(tb, &cfg);
508				else
509					err = -ENOBUFS;
510			}
511
512			/* allocated by rtentry_to_fib_config() */
513			kfree(cfg.fc_mx);
514		}
515		rtnl_unlock();
516		return err;
517	}
518	return -EINVAL;
519}
520
521const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
522	[RTA_DST]		= { .type = NLA_U32 },
523	[RTA_SRC]		= { .type = NLA_U32 },
524	[RTA_IIF]		= { .type = NLA_U32 },
525	[RTA_OIF]		= { .type = NLA_U32 },
526	[RTA_GATEWAY]		= { .type = NLA_U32 },
527	[RTA_PRIORITY]		= { .type = NLA_U32 },
528	[RTA_PREFSRC]		= { .type = NLA_U32 },
529	[RTA_METRICS]		= { .type = NLA_NESTED },
530	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
531	[RTA_FLOW]		= { .type = NLA_U32 },
532};
533
534static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
535			    struct nlmsghdr *nlh, struct fib_config *cfg)
536{
537	struct nlattr *attr;
538	int err, remaining;
539	struct rtmsg *rtm;
540
541	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
542	if (err < 0)
543		goto errout;
544
545	memset(cfg, 0, sizeof(*cfg));
546
547	rtm = nlmsg_data(nlh);
548	cfg->fc_dst_len = rtm->rtm_dst_len;
549	cfg->fc_tos = rtm->rtm_tos;
550	cfg->fc_table = rtm->rtm_table;
551	cfg->fc_protocol = rtm->rtm_protocol;
552	cfg->fc_scope = rtm->rtm_scope;
553	cfg->fc_type = rtm->rtm_type;
554	cfg->fc_flags = rtm->rtm_flags;
555	cfg->fc_nlflags = nlh->nlmsg_flags;
556
557	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
558	cfg->fc_nlinfo.nlh = nlh;
559	cfg->fc_nlinfo.nl_net = net;
560
561	if (cfg->fc_type > RTN_MAX) {
562		err = -EINVAL;
563		goto errout;
564	}
565
566	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
567		switch (nla_type(attr)) {
568		case RTA_DST:
569			cfg->fc_dst = nla_get_be32(attr);
570			break;
571		case RTA_OIF:
572			cfg->fc_oif = nla_get_u32(attr);
573			break;
574		case RTA_GATEWAY:
575			cfg->fc_gw = nla_get_be32(attr);
576			break;
577		case RTA_PRIORITY:
578			cfg->fc_priority = nla_get_u32(attr);
579			break;
580		case RTA_PREFSRC:
581			cfg->fc_prefsrc = nla_get_be32(attr);
582			break;
583		case RTA_METRICS:
584			cfg->fc_mx = nla_data(attr);
585			cfg->fc_mx_len = nla_len(attr);
586			break;
587		case RTA_MULTIPATH:
588			cfg->fc_mp = nla_data(attr);
589			cfg->fc_mp_len = nla_len(attr);
590			break;
591		case RTA_FLOW:
592			cfg->fc_flow = nla_get_u32(attr);
593			break;
594		case RTA_TABLE:
595			cfg->fc_table = nla_get_u32(attr);
596			break;
597		}
598	}
599
600	return 0;
601errout:
602	return err;
603}
604
605static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
606{
607	struct net *net = sock_net(skb->sk);
608	struct fib_config cfg;
609	struct fib_table *tb;
610	int err;
611
612	err = rtm_to_fib_config(net, skb, nlh, &cfg);
613	if (err < 0)
614		goto errout;
615
616	tb = fib_get_table(net, cfg.fc_table);
617	if (tb == NULL) {
618		err = -ESRCH;
619		goto errout;
620	}
621
622	err = fib_table_delete(tb, &cfg);
623errout:
624	return err;
625}
626
627static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
628{
629	struct net *net = sock_net(skb->sk);
630	struct fib_config cfg;
631	struct fib_table *tb;
632	int err;
633
634	err = rtm_to_fib_config(net, skb, nlh, &cfg);
635	if (err < 0)
636		goto errout;
637
638	tb = fib_new_table(net, cfg.fc_table);
639	if (tb == NULL) {
640		err = -ENOBUFS;
641		goto errout;
642	}
643
644	err = fib_table_insert(tb, &cfg);
645errout:
646	return err;
647}
648
649static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
650{
651	struct net *net = sock_net(skb->sk);
652	unsigned int h, s_h;
653	unsigned int e = 0, s_e;
654	struct fib_table *tb;
655	struct hlist_node *node;
656	struct hlist_head *head;
657	int dumped = 0;
658
659	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
660	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
661		return ip_rt_dump(skb, cb);
662
663	s_h = cb->args[0];
664	s_e = cb->args[1];
665
666	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
667		e = 0;
668		head = &net->ipv4.fib_table_hash[h];
669		hlist_for_each_entry(tb, node, head, tb_hlist) {
670			if (e < s_e)
671				goto next;
672			if (dumped)
673				memset(&cb->args[2], 0, sizeof(cb->args) -
674						 2 * sizeof(cb->args[0]));
675			if (fib_table_dump(tb, skb, cb) < 0)
676				goto out;
677			dumped = 1;
678next:
679			e++;
680		}
681	}
682out:
683	cb->args[1] = e;
684	cb->args[0] = h;
685
686	return skb->len;
687}
688
689/* Prepare and feed intra-kernel routing request.
690   Really, it should be netlink message, but :-( netlink
691   can be not configured, so that we feed it directly
692   to fib engine. It is legal, because all events occur
693   only when netlink is already locked.
694 */
695
696static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
697{
698	struct net *net = dev_net(ifa->ifa_dev->dev);
699	struct fib_table *tb;
700	struct fib_config cfg = {
701		.fc_protocol = RTPROT_KERNEL,
702		.fc_type = type,
703		.fc_dst = dst,
704		.fc_dst_len = dst_len,
705		.fc_prefsrc = ifa->ifa_local,
706		.fc_oif = ifa->ifa_dev->dev->ifindex,
707		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
708		.fc_nlinfo = {
709			.nl_net = net,
710		},
711	};
712
713	if (type == RTN_UNICAST)
714		tb = fib_new_table(net, RT_TABLE_MAIN);
715	else
716		tb = fib_new_table(net, RT_TABLE_LOCAL);
717
718	if (tb == NULL)
719		return;
720
721	cfg.fc_table = tb->tb_id;
722
723	if (type != RTN_LOCAL)
724		cfg.fc_scope = RT_SCOPE_LINK;
725	else
726		cfg.fc_scope = RT_SCOPE_HOST;
727
728	if (cmd == RTM_NEWROUTE)
729		fib_table_insert(tb, &cfg);
730	else
731		fib_table_delete(tb, &cfg);
732}
733
734void fib_add_ifaddr(struct in_ifaddr *ifa)
735{
736	struct in_device *in_dev = ifa->ifa_dev;
737	struct net_device *dev = in_dev->dev;
738	struct in_ifaddr *prim = ifa;
739	__be32 mask = ifa->ifa_mask;
740	__be32 addr = ifa->ifa_local;
741	__be32 prefix = ifa->ifa_address&mask;
742
743	if (ifa->ifa_flags&IFA_F_SECONDARY) {
744		prim = inet_ifa_byprefix(in_dev, prefix, mask);
745		if (prim == NULL) {
746			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
747			return;
748		}
749	}
750
751	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
752
753	if (!(dev->flags&IFF_UP))
754		return;
755
756	/* Add broadcast address, if it is explicitly assigned. */
757	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
758		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
759
760	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
761	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
762		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
763			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
764
765		/* Add network specific broadcasts, when it takes a sense */
766		if (ifa->ifa_prefixlen < 31) {
767			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
768			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
769		}
770	}
771}
772
773static void fib_del_ifaddr(struct in_ifaddr *ifa)
774{
775	struct in_device *in_dev = ifa->ifa_dev;
776	struct net_device *dev = in_dev->dev;
777	struct in_ifaddr *ifa1;
778	struct in_ifaddr *prim = ifa;
779	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
780	__be32 any = ifa->ifa_address&ifa->ifa_mask;
781#define LOCAL_OK	1
782#define BRD_OK		2
783#define BRD0_OK		4
784#define BRD1_OK		8
785	unsigned ok = 0;
786
787	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
788		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
789			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
790	else {
791		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
792		if (prim == NULL) {
793			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
794			return;
795		}
796	}
797
798	/* Deletion is more complicated than add.
799	   We should take care of not to delete too much :-)
800
801	   Scan address list to be sure that addresses are really gone.
802	 */
803
804	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
805		if (ifa->ifa_local == ifa1->ifa_local)
806			ok |= LOCAL_OK;
807		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
808			ok |= BRD_OK;
809		if (brd == ifa1->ifa_broadcast)
810			ok |= BRD1_OK;
811		if (any == ifa1->ifa_broadcast)
812			ok |= BRD0_OK;
813	}
814
815	if (!(ok&BRD_OK))
816		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
817	if (!(ok&BRD1_OK))
818		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
819	if (!(ok&BRD0_OK))
820		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
821	if (!(ok&LOCAL_OK)) {
822		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
823
824		/* Check, that this local address finally disappeared. */
825		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
826			/* And the last, but not the least thing.
827			   We must flush stray FIB entries.
828
829			   First of all, we scan fib_info list searching
830			   for stray nexthop entries, then ignite fib_flush.
831			*/
832			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
833				fib_flush(dev_net(dev));
834		}
835	}
836#undef LOCAL_OK
837#undef BRD_OK
838#undef BRD0_OK
839#undef BRD1_OK
840}
841
842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
843{
844
845	struct fib_result       res;
846	struct flowi            fl = { .mark = frn->fl_mark,
847				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
848							    .tos = frn->fl_tos,
849							    .scope = frn->fl_scope } } };
850
851#ifdef CONFIG_IP_MULTIPLE_TABLES
852	res.r = NULL;
853#endif
854
855	frn->err = -ENOENT;
856	if (tb) {
857		local_bh_disable();
858
859		frn->tb_id = tb->tb_id;
860		frn->err = fib_table_lookup(tb, &fl, &res);
861
862		if (!frn->err) {
863			frn->prefixlen = res.prefixlen;
864			frn->nh_sel = res.nh_sel;
865			frn->type = res.type;
866			frn->scope = res.scope;
867			fib_res_put(&res);
868		}
869		local_bh_enable();
870	}
871}
872
873static void nl_fib_input(struct sk_buff *skb)
874{
875	struct net *net;
876	struct fib_result_nl *frn;
877	struct nlmsghdr *nlh;
878	struct fib_table *tb;
879	u32 pid;
880
881	net = sock_net(skb->sk);
882	nlh = nlmsg_hdr(skb);
883	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
884	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
885		return;
886
887	skb = skb_clone(skb, GFP_KERNEL);
888	if (skb == NULL)
889		return;
890	nlh = nlmsg_hdr(skb);
891
892	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
893	tb = fib_get_table(net, frn->tb_id_in);
894
895	nl_fib_lookup(frn, tb);
896
897	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
898	NETLINK_CB(skb).pid = 0;         /* from kernel */
899	NETLINK_CB(skb).dst_group = 0;  /* unicast */
900	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
901}
902
903static int __net_init nl_fib_lookup_init(struct net *net)
904{
905	struct sock *sk;
906	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
907				   nl_fib_input, NULL, THIS_MODULE);
908	if (sk == NULL)
909		return -EAFNOSUPPORT;
910	net->ipv4.fibnl = sk;
911	return 0;
912}
913
914static void nl_fib_lookup_exit(struct net *net)
915{
916	netlink_kernel_release(net->ipv4.fibnl);
917	net->ipv4.fibnl = NULL;
918}
919
920static void fib_disable_ip(struct net_device *dev, int force, int delay)
921{
922	if (fib_sync_down_dev(dev, force))
923		fib_flush(dev_net(dev));
924	rt_cache_flush(dev_net(dev), delay);
925	arp_ifdown(dev);
926}
927
928static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
929{
930	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
931	struct net_device *dev = ifa->ifa_dev->dev;
932
933	switch (event) {
934	case NETDEV_UP:
935		fib_add_ifaddr(ifa);
936#ifdef CONFIG_IP_ROUTE_MULTIPATH
937		fib_sync_up(dev);
938#endif
939		rt_cache_flush(dev_net(dev), -1);
940		break;
941	case NETDEV_DOWN:
942		fib_del_ifaddr(ifa);
943		if (ifa->ifa_dev->ifa_list == NULL) {
944			/* Last address was deleted from this interface.
945			   Disable IP.
946			 */
947			fib_disable_ip(dev, 1, 0);
948		} else {
949			rt_cache_flush(dev_net(dev), -1);
950		}
951		break;
952	}
953	return NOTIFY_DONE;
954}
955
956static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
957{
958	struct net_device *dev = ptr;
959	struct in_device *in_dev = __in_dev_get_rtnl(dev);
960
961	if (event == NETDEV_UNREGISTER) {
962		fib_disable_ip(dev, 2, -1);
963		return NOTIFY_DONE;
964	}
965
966	if (!in_dev)
967		return NOTIFY_DONE;
968
969	switch (event) {
970	case NETDEV_UP:
971		for_ifa(in_dev) {
972			fib_add_ifaddr(ifa);
973		} endfor_ifa(in_dev);
974#ifdef CONFIG_IP_ROUTE_MULTIPATH
975		fib_sync_up(dev);
976#endif
977		rt_cache_flush(dev_net(dev), -1);
978		break;
979	case NETDEV_DOWN:
980		fib_disable_ip(dev, 0, 0);
981		break;
982	case NETDEV_CHANGEMTU:
983	case NETDEV_CHANGE:
984		rt_cache_flush(dev_net(dev), 0);
985		break;
986	case NETDEV_UNREGISTER_BATCH:
987		rt_cache_flush_batch();
988		break;
989	}
990	return NOTIFY_DONE;
991}
992
993static struct notifier_block fib_inetaddr_notifier = {
994	.notifier_call = fib_inetaddr_event,
995};
996
997static struct notifier_block fib_netdev_notifier = {
998	.notifier_call = fib_netdev_event,
999};
1000
1001static int __net_init ip_fib_net_init(struct net *net)
1002{
1003	int err;
1004	unsigned int i;
1005
1006	net->ipv4.fib_table_hash = kzalloc(
1007			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1008	if (net->ipv4.fib_table_hash == NULL)
1009		return -ENOMEM;
1010
1011	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1012		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1013
1014	err = fib4_rules_init(net);
1015	if (err < 0)
1016		goto fail;
1017	return 0;
1018
1019fail:
1020	kfree(net->ipv4.fib_table_hash);
1021	return err;
1022}
1023
1024static void ip_fib_net_exit(struct net *net)
1025{
1026	unsigned int i;
1027
1028#ifdef CONFIG_IP_MULTIPLE_TABLES
1029	fib4_rules_exit(net);
1030#endif
1031
1032	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1033		struct fib_table *tb;
1034		struct hlist_head *head;
1035		struct hlist_node *node, *tmp;
1036
1037		head = &net->ipv4.fib_table_hash[i];
1038		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1039			hlist_del(node);
1040			fib_table_flush(tb);
1041			kfree(tb);
1042		}
1043	}
1044	kfree(net->ipv4.fib_table_hash);
1045}
1046
1047static int __net_init fib_net_init(struct net *net)
1048{
1049	int error;
1050
1051	error = ip_fib_net_init(net);
1052	if (error < 0)
1053		goto out;
1054	error = nl_fib_lookup_init(net);
1055	if (error < 0)
1056		goto out_nlfl;
1057	error = fib_proc_init(net);
1058	if (error < 0)
1059		goto out_proc;
1060out:
1061	return error;
1062
1063out_proc:
1064	nl_fib_lookup_exit(net);
1065out_nlfl:
1066	ip_fib_net_exit(net);
1067	goto out;
1068}
1069
1070static void __net_exit fib_net_exit(struct net *net)
1071{
1072	fib_proc_exit(net);
1073	nl_fib_lookup_exit(net);
1074	ip_fib_net_exit(net);
1075}
1076
1077static struct pernet_operations fib_net_ops = {
1078	.init = fib_net_init,
1079	.exit = fib_net_exit,
1080};
1081
1082void __init ip_fib_init(void)
1083{
1084	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1085	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1086	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1087
1088	register_pernet_subsys(&fib_net_ops);
1089	register_netdevice_notifier(&fib_netdev_notifier);
1090	register_inetaddr_notifier(&fib_inetaddr_notifier);
1091
1092	fib_hash_init();
1093}
1094