1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version:	$Id: fib_frontend.c,v 1.1.1.1 2007/08/03 18:53:51 Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/capability.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/inetdevice.h>
33#include <linux/netdevice.h>
34#include <linux/if_addr.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/init.h>
38#include <linux/list.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
54struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
57#define FIB_TABLE_HASHSZ 1
58static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59
60#else
61
62#define FIB_TABLE_HASHSZ 256
63static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64
65struct fib_table *fib_new_table(u32 id)
66{
67	struct fib_table *tb;
68	unsigned int h;
69
70	if (id == 0)
71		id = RT_TABLE_MAIN;
72	tb = fib_get_table(id);
73	if (tb)
74		return tb;
75	tb = fib_hash_init(id);
76	if (!tb)
77		return NULL;
78	h = id & (FIB_TABLE_HASHSZ - 1);
79	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
80	return tb;
81}
82
83struct fib_table *fib_get_table(u32 id)
84{
85	struct fib_table *tb;
86	struct hlist_node *node;
87	unsigned int h;
88
89	if (id == 0)
90		id = RT_TABLE_MAIN;
91	h = id & (FIB_TABLE_HASHSZ - 1);
92	rcu_read_lock();
93	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94		if (tb->tb_id == id) {
95			rcu_read_unlock();
96			return tb;
97		}
98	}
99	rcu_read_unlock();
100	return NULL;
101}
102#endif /* CONFIG_IP_MULTIPLE_TABLES */
103
104static void fib_flush(void)
105{
106	int flushed = 0;
107	struct fib_table *tb;
108	struct hlist_node *node;
109	unsigned int h;
110
111	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113			flushed += tb->tb_flush(tb);
114	}
115
116	if (flushed)
117		rt_cache_flush(-1);
118}
119
120/*
121 *	Find the first device with a given source address.
122 */
123
124struct net_device * ip_dev_find(__be32 addr)
125{
126	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127	struct fib_result res;
128	struct net_device *dev = NULL;
129
130#ifdef CONFIG_IP_MULTIPLE_TABLES
131	res.r = NULL;
132#endif
133
134	if (!ip_fib_local_table ||
135	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136		return NULL;
137	if (res.type != RTN_LOCAL)
138		goto out;
139	dev = FIB_RES_DEV(res);
140
141	if (dev)
142		dev_hold(dev);
143out:
144	fib_res_put(&res);
145	return dev;
146}
147
148unsigned inet_addr_type(__be32 addr)
149{
150	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151	struct fib_result	res;
152	unsigned ret = RTN_BROADCAST;
153
154	if (ZERONET(addr) || BADCLASS(addr))
155		return RTN_BROADCAST;
156	if (MULTICAST(addr))
157		return RTN_MULTICAST;
158
159#ifdef CONFIG_IP_MULTIPLE_TABLES
160	res.r = NULL;
161#endif
162
163	if (ip_fib_local_table) {
164		ret = RTN_UNICAST;
165		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166						   &fl, &res)) {
167			ret = res.type;
168			fib_res_put(&res);
169		}
170	}
171	return ret;
172}
173
174/* Given (packet source, input interface) and optional (dst, oif, tos):
175   - (main) check, that source is valid i.e. not broadcast or our local
176     address.
177   - figure out what "logical" interface this packet arrived
178     and calculate "specific destination" address.
179   - check, that packet arrived from expected physical interface.
180 */
181
182int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183			struct net_device *dev, __be32 *spec_dst, u32 *itag)
184{
185	struct in_device *in_dev;
186	struct flowi fl = { .nl_u = { .ip4_u =
187				      { .daddr = src,
188					.saddr = dst,
189					.tos = tos } },
190			    .iif = oif };
191	struct fib_result res;
192	int no_addr, rpf;
193	int ret;
194
195	no_addr = rpf = 0;
196	rcu_read_lock();
197	in_dev = __in_dev_get_rcu(dev);
198	if (in_dev) {
199		no_addr = in_dev->ifa_list == NULL;
200		rpf = IN_DEV_RPFILTER(in_dev);
201	}
202	rcu_read_unlock();
203
204	if (in_dev == NULL)
205		goto e_inval;
206
207	if (fib_lookup(&fl, &res))
208		goto last_resort;
209	if (res.type != RTN_UNICAST)
210		goto e_inval_res;
211	*spec_dst = FIB_RES_PREFSRC(res);
212	fib_combine_itag(itag, &res);
213#ifdef CONFIG_IP_ROUTE_MULTIPATH
214	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215#else
216	if (FIB_RES_DEV(res) == dev)
217#endif
218	{
219		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220		fib_res_put(&res);
221		return ret;
222	}
223	fib_res_put(&res);
224	if (no_addr)
225		goto last_resort;
226	if (rpf)
227		goto e_inval;
228	fl.oif = dev->ifindex;
229
230	ret = 0;
231	if (fib_lookup(&fl, &res) == 0) {
232		if (res.type == RTN_UNICAST) {
233			*spec_dst = FIB_RES_PREFSRC(res);
234			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235		}
236		fib_res_put(&res);
237	}
238	return ret;
239
240last_resort:
241	if (rpf)
242		goto e_inval;
243	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244	*itag = 0;
245	return 0;
246
247e_inval_res:
248	fib_res_put(&res);
249e_inval:
250	return -EINVAL;
251}
252
253static inline __be32 sk_extract_addr(struct sockaddr *addr)
254{
255	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
256}
257
258static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
259{
260	struct nlattr *nla;
261
262	nla = (struct nlattr *) ((char *) mx + len);
263	nla->nla_type = type;
264	nla->nla_len = nla_attr_size(4);
265	*(u32 *) nla_data(nla) = value;
266
267	return len + nla_total_size(4);
268}
269
270static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
271				 struct fib_config *cfg)
272{
273	__be32 addr;
274	int plen;
275
276	memset(cfg, 0, sizeof(*cfg));
277
278	if (rt->rt_dst.sa_family != AF_INET)
279		return -EAFNOSUPPORT;
280
281	/*
282	 * Check mask for validity:
283	 * a) it must be contiguous.
284	 * b) destination must have all host bits clear.
285	 * c) if application forgot to set correct family (AF_INET),
286	 *    reject request unless it is absolutely clear i.e.
287	 *    both family and mask are zero.
288	 */
289	plen = 32;
290	addr = sk_extract_addr(&rt->rt_dst);
291	if (!(rt->rt_flags & RTF_HOST)) {
292		__be32 mask = sk_extract_addr(&rt->rt_genmask);
293
294		if (rt->rt_genmask.sa_family != AF_INET) {
295			if (mask || rt->rt_genmask.sa_family)
296				return -EAFNOSUPPORT;
297		}
298
299		if (bad_mask(mask, addr))
300			return -EINVAL;
301
302		plen = inet_mask_len(mask);
303	}
304
305	cfg->fc_dst_len = plen;
306	cfg->fc_dst = addr;
307
308	if (cmd != SIOCDELRT) {
309		cfg->fc_nlflags = NLM_F_CREATE;
310		cfg->fc_protocol = RTPROT_BOOT;
311	}
312
313	if (rt->rt_metric)
314		cfg->fc_priority = rt->rt_metric - 1;
315
316	if (rt->rt_flags & RTF_REJECT) {
317		cfg->fc_scope = RT_SCOPE_HOST;
318		cfg->fc_type = RTN_UNREACHABLE;
319		return 0;
320	}
321
322	cfg->fc_scope = RT_SCOPE_NOWHERE;
323	cfg->fc_type = RTN_UNICAST;
324
325	if (rt->rt_dev) {
326		char *colon;
327		struct net_device *dev;
328		char devname[IFNAMSIZ];
329
330		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
331			return -EFAULT;
332
333		devname[IFNAMSIZ-1] = 0;
334		colon = strchr(devname, ':');
335		if (colon)
336			*colon = 0;
337		dev = __dev_get_by_name(devname);
338		if (!dev)
339			return -ENODEV;
340		cfg->fc_oif = dev->ifindex;
341		if (colon) {
342			struct in_ifaddr *ifa;
343			struct in_device *in_dev = __in_dev_get_rtnl(dev);
344			if (!in_dev)
345				return -ENODEV;
346			*colon = ':';
347			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
348				if (strcmp(ifa->ifa_label, devname) == 0)
349					break;
350			if (ifa == NULL)
351				return -ENODEV;
352			cfg->fc_prefsrc = ifa->ifa_local;
353		}
354	}
355
356	addr = sk_extract_addr(&rt->rt_gateway);
357	if (rt->rt_gateway.sa_family == AF_INET && addr) {
358		cfg->fc_gw = addr;
359		if (rt->rt_flags & RTF_GATEWAY &&
360		    inet_addr_type(addr) == RTN_UNICAST)
361			cfg->fc_scope = RT_SCOPE_UNIVERSE;
362	}
363
364	if (cmd == SIOCDELRT)
365		return 0;
366
367	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
368		return -EINVAL;
369
370	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
371		cfg->fc_scope = RT_SCOPE_LINK;
372
373	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
374		struct nlattr *mx;
375		int len = 0;
376
377		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
378		if (mx == NULL)
379			return -ENOMEM;
380
381		if (rt->rt_flags & RTF_MTU)
382			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
383
384		if (rt->rt_flags & RTF_WINDOW)
385			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
386
387		if (rt->rt_flags & RTF_IRTT)
388			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
389
390		cfg->fc_mx = mx;
391		cfg->fc_mx_len = len;
392	}
393
394	return 0;
395}
396
397/*
398 *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
399 */
400
401int ip_rt_ioctl(unsigned int cmd, void __user *arg)
402{
403	struct fib_config cfg;
404	struct rtentry rt;
405	int err;
406
407	switch (cmd) {
408	case SIOCADDRT:		/* Add a route */
409	case SIOCDELRT:		/* Delete a route */
410		if (!capable(CAP_NET_ADMIN))
411			return -EPERM;
412
413		if (copy_from_user(&rt, arg, sizeof(rt)))
414			return -EFAULT;
415
416		rtnl_lock();
417		err = rtentry_to_fib_config(cmd, &rt, &cfg);
418		if (err == 0) {
419			struct fib_table *tb;
420
421			if (cmd == SIOCDELRT) {
422				tb = fib_get_table(cfg.fc_table);
423				if (tb)
424					err = tb->tb_delete(tb, &cfg);
425				else
426					err = -ESRCH;
427			} else {
428				tb = fib_new_table(cfg.fc_table);
429				if (tb)
430					err = tb->tb_insert(tb, &cfg);
431				else
432					err = -ENOBUFS;
433			}
434
435			/* allocated by rtentry_to_fib_config() */
436			kfree(cfg.fc_mx);
437		}
438		rtnl_unlock();
439		return err;
440	}
441	return -EINVAL;
442}
443
444const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
445	[RTA_DST]		= { .type = NLA_U32 },
446	[RTA_SRC]		= { .type = NLA_U32 },
447	[RTA_IIF]		= { .type = NLA_U32 },
448	[RTA_OIF]		= { .type = NLA_U32 },
449	[RTA_GATEWAY]		= { .type = NLA_U32 },
450	[RTA_PRIORITY]		= { .type = NLA_U32 },
451	[RTA_PREFSRC]		= { .type = NLA_U32 },
452	[RTA_METRICS]		= { .type = NLA_NESTED },
453	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
454	[RTA_PROTOINFO]		= { .type = NLA_U32 },
455	[RTA_FLOW]		= { .type = NLA_U32 },
456	[RTA_MP_ALGO]		= { .type = NLA_U32 },
457};
458
459static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
460			     struct fib_config *cfg)
461{
462	struct nlattr *attr;
463	int err, remaining;
464	struct rtmsg *rtm;
465
466	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
467	if (err < 0)
468		goto errout;
469
470	memset(cfg, 0, sizeof(*cfg));
471
472	rtm = nlmsg_data(nlh);
473	cfg->fc_dst_len = rtm->rtm_dst_len;
474	cfg->fc_tos = rtm->rtm_tos;
475	cfg->fc_table = rtm->rtm_table;
476	cfg->fc_protocol = rtm->rtm_protocol;
477	cfg->fc_scope = rtm->rtm_scope;
478	cfg->fc_type = rtm->rtm_type;
479	cfg->fc_flags = rtm->rtm_flags;
480	cfg->fc_nlflags = nlh->nlmsg_flags;
481
482	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
483	cfg->fc_nlinfo.nlh = nlh;
484
485	if (cfg->fc_type > RTN_MAX) {
486		err = -EINVAL;
487		goto errout;
488	}
489
490	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
491		switch (attr->nla_type) {
492		case RTA_DST:
493			cfg->fc_dst = nla_get_be32(attr);
494			break;
495		case RTA_OIF:
496			cfg->fc_oif = nla_get_u32(attr);
497			break;
498		case RTA_GATEWAY:
499			cfg->fc_gw = nla_get_be32(attr);
500			break;
501		case RTA_PRIORITY:
502			cfg->fc_priority = nla_get_u32(attr);
503			break;
504		case RTA_PREFSRC:
505			cfg->fc_prefsrc = nla_get_be32(attr);
506			break;
507		case RTA_METRICS:
508			cfg->fc_mx = nla_data(attr);
509			cfg->fc_mx_len = nla_len(attr);
510			break;
511		case RTA_MULTIPATH:
512			cfg->fc_mp = nla_data(attr);
513			cfg->fc_mp_len = nla_len(attr);
514			break;
515		case RTA_FLOW:
516			cfg->fc_flow = nla_get_u32(attr);
517			break;
518		case RTA_MP_ALGO:
519			cfg->fc_mp_alg = nla_get_u32(attr);
520			break;
521		case RTA_TABLE:
522			cfg->fc_table = nla_get_u32(attr);
523			break;
524		}
525	}
526
527	return 0;
528errout:
529	return err;
530}
531
532static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
533{
534	struct fib_config cfg;
535	struct fib_table *tb;
536	int err;
537
538	err = rtm_to_fib_config(skb, nlh, &cfg);
539	if (err < 0)
540		goto errout;
541
542	tb = fib_get_table(cfg.fc_table);
543	if (tb == NULL) {
544		err = -ESRCH;
545		goto errout;
546	}
547
548	err = tb->tb_delete(tb, &cfg);
549errout:
550	return err;
551}
552
553static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
554{
555	struct fib_config cfg;
556	struct fib_table *tb;
557	int err;
558
559	err = rtm_to_fib_config(skb, nlh, &cfg);
560	if (err < 0)
561		goto errout;
562
563	tb = fib_new_table(cfg.fc_table);
564	if (tb == NULL) {
565		err = -ENOBUFS;
566		goto errout;
567	}
568
569	err = tb->tb_insert(tb, &cfg);
570errout:
571	return err;
572}
573
574static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
575{
576	unsigned int h, s_h;
577	unsigned int e = 0, s_e;
578	struct fib_table *tb;
579	struct hlist_node *node;
580	int dumped = 0;
581
582	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
583	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
584		return ip_rt_dump(skb, cb);
585
586	s_h = cb->args[0];
587	s_e = cb->args[1];
588
589	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
590		e = 0;
591		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
592			if (e < s_e)
593				goto next;
594			if (dumped)
595				memset(&cb->args[2], 0, sizeof(cb->args) -
596						 2 * sizeof(cb->args[0]));
597			if (tb->tb_dump(tb, skb, cb) < 0)
598				goto out;
599			dumped = 1;
600next:
601			e++;
602		}
603	}
604out:
605	cb->args[1] = e;
606	cb->args[0] = h;
607
608	return skb->len;
609}
610
611/* Prepare and feed intra-kernel routing request.
612   Really, it should be netlink message, but :-( netlink
613   can be not configured, so that we feed it directly
614   to fib engine. It is legal, because all events occur
615   only when netlink is already locked.
616 */
617
618static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
619{
620	struct fib_table *tb;
621	struct fib_config cfg = {
622		.fc_protocol = RTPROT_KERNEL,
623		.fc_type = type,
624		.fc_dst = dst,
625		.fc_dst_len = dst_len,
626		.fc_prefsrc = ifa->ifa_local,
627		.fc_oif = ifa->ifa_dev->dev->ifindex,
628		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
629	};
630
631	if (type == RTN_UNICAST)
632		tb = fib_new_table(RT_TABLE_MAIN);
633	else
634		tb = fib_new_table(RT_TABLE_LOCAL);
635
636	if (tb == NULL)
637		return;
638
639	cfg.fc_table = tb->tb_id;
640
641	if (type != RTN_LOCAL)
642		cfg.fc_scope = RT_SCOPE_LINK;
643	else
644		cfg.fc_scope = RT_SCOPE_HOST;
645
646	if (cmd == RTM_NEWROUTE)
647		tb->tb_insert(tb, &cfg);
648	else
649		tb->tb_delete(tb, &cfg);
650}
651
652void fib_add_ifaddr(struct in_ifaddr *ifa)
653{
654	struct in_device *in_dev = ifa->ifa_dev;
655	struct net_device *dev = in_dev->dev;
656	struct in_ifaddr *prim = ifa;
657	__be32 mask = ifa->ifa_mask;
658	__be32 addr = ifa->ifa_local;
659	__be32 prefix = ifa->ifa_address&mask;
660
661	if (ifa->ifa_flags&IFA_F_SECONDARY) {
662		prim = inet_ifa_byprefix(in_dev, prefix, mask);
663		if (prim == NULL) {
664			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
665			return;
666		}
667	}
668
669	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
670
671	if (!(dev->flags&IFF_UP))
672		return;
673
674	/* Add broadcast address, if it is explicitly assigned. */
675	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
676		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
677
678	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
680		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
682
683		/* Add network specific broadcasts, when it takes a sense */
684		if (ifa->ifa_prefixlen < 31) {
685			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
686			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
687		}
688	}
689}
690
691static void fib_del_ifaddr(struct in_ifaddr *ifa)
692{
693	struct in_device *in_dev = ifa->ifa_dev;
694	struct net_device *dev = in_dev->dev;
695	struct in_ifaddr *ifa1;
696	struct in_ifaddr *prim = ifa;
697	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
698	__be32 any = ifa->ifa_address&ifa->ifa_mask;
699#define LOCAL_OK	1
700#define BRD_OK		2
701#define BRD0_OK		4
702#define BRD1_OK		8
703	unsigned ok = 0;
704
705	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
706		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
707			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
708	else {
709		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
710		if (prim == NULL) {
711			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
712			return;
713		}
714	}
715
716	/* Deletion is more complicated than add.
717	   We should take care of not to delete too much :-)
718
719	   Scan address list to be sure that addresses are really gone.
720	 */
721
722	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
723		if (ifa->ifa_local == ifa1->ifa_local)
724			ok |= LOCAL_OK;
725		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
726			ok |= BRD_OK;
727		if (brd == ifa1->ifa_broadcast)
728			ok |= BRD1_OK;
729		if (any == ifa1->ifa_broadcast)
730			ok |= BRD0_OK;
731	}
732
733	if (!(ok&BRD_OK))
734		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735	if (!(ok&BRD1_OK))
736		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
737	if (!(ok&BRD0_OK))
738		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
739	if (!(ok&LOCAL_OK)) {
740		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
741
742		/* Check, that this local address finally disappeared. */
743		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
744			/* And the last, but not the least thing.
745			   We must flush stray FIB entries.
746
747			   First of all, we scan fib_info list searching
748			   for stray nexthop entries, then ignite fib_flush.
749			*/
750			if (fib_sync_down(ifa->ifa_local, NULL, 0))
751				fib_flush();
752		}
753	}
754#undef LOCAL_OK
755#undef BRD_OK
756#undef BRD0_OK
757#undef BRD1_OK
758}
759
760static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
761{
762
763	struct fib_result       res;
764	struct flowi            fl = { .mark = frn->fl_mark,
765				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
766							    .tos = frn->fl_tos,
767							    .scope = frn->fl_scope } } };
768
769#ifdef CONFIG_IP_MULTIPLE_TABLES
770	res.r = NULL;
771#endif
772
773	frn->err = -ENOENT;
774	if (tb) {
775		local_bh_disable();
776
777		frn->tb_id = tb->tb_id;
778		frn->err = tb->tb_lookup(tb, &fl, &res);
779
780		if (!frn->err) {
781			frn->prefixlen = res.prefixlen;
782			frn->nh_sel = res.nh_sel;
783			frn->type = res.type;
784			frn->scope = res.scope;
785			fib_res_put(&res);
786		}
787		local_bh_enable();
788	}
789}
790
791static void nl_fib_input(struct sock *sk, int len)
792{
793	struct sk_buff *skb = NULL;
794	struct nlmsghdr *nlh = NULL;
795	struct fib_result_nl *frn;
796	u32 pid;
797	struct fib_table *tb;
798
799	skb = skb_dequeue(&sk->sk_receive_queue);
800	if (skb == NULL)
801		return;
802
803	nlh = nlmsg_hdr(skb);
804	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
805	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
806		kfree_skb(skb);
807		return;
808	}
809
810	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
811	tb = fib_get_table(frn->tb_id_in);
812
813	nl_fib_lookup(frn, tb);
814
815	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
816	NETLINK_CB(skb).pid = 0;         /* from kernel */
817	NETLINK_CB(skb).dst_group = 0;  /* unicast */
818	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
819}
820
821static void nl_fib_lookup_init(void)
822{
823      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
824      			    THIS_MODULE);
825}
826
827static void fib_disable_ip(struct net_device *dev, int force)
828{
829	if (fib_sync_down(0, dev, force))
830		fib_flush();
831	rt_cache_flush(0);
832	arp_ifdown(dev);
833}
834
835static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
836{
837	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
838
839	switch (event) {
840	case NETDEV_UP:
841		fib_add_ifaddr(ifa);
842#ifdef CONFIG_IP_ROUTE_MULTIPATH
843		fib_sync_up(ifa->ifa_dev->dev);
844#endif
845		rt_cache_flush(-1);
846		break;
847	case NETDEV_DOWN:
848		fib_del_ifaddr(ifa);
849		if (ifa->ifa_dev->ifa_list == NULL) {
850			/* Last address was deleted from this interface.
851			   Disable IP.
852			 */
853			fib_disable_ip(ifa->ifa_dev->dev, 1);
854		} else {
855			rt_cache_flush(-1);
856		}
857		break;
858	}
859	return NOTIFY_DONE;
860}
861
862static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
863{
864	struct net_device *dev = ptr;
865	struct in_device *in_dev = __in_dev_get_rtnl(dev);
866
867	if (event == NETDEV_UNREGISTER) {
868		fib_disable_ip(dev, 2);
869		return NOTIFY_DONE;
870	}
871
872	if (!in_dev)
873		return NOTIFY_DONE;
874
875	switch (event) {
876	case NETDEV_UP:
877		for_ifa(in_dev) {
878			fib_add_ifaddr(ifa);
879		} endfor_ifa(in_dev);
880#ifdef CONFIG_IP_ROUTE_MULTIPATH
881		fib_sync_up(dev);
882#endif
883		rt_cache_flush(-1);
884		break;
885	case NETDEV_DOWN:
886		fib_disable_ip(dev, 0);
887		break;
888	case NETDEV_CHANGEMTU:
889	case NETDEV_CHANGE:
890		rt_cache_flush(0);
891		break;
892	}
893	return NOTIFY_DONE;
894}
895
896static struct notifier_block fib_inetaddr_notifier = {
897	.notifier_call =fib_inetaddr_event,
898};
899
900static struct notifier_block fib_netdev_notifier = {
901	.notifier_call =fib_netdev_event,
902};
903
904void __init ip_fib_init(void)
905{
906	unsigned int i;
907
908	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
909		INIT_HLIST_HEAD(&fib_table_hash[i]);
910#ifndef CONFIG_IP_MULTIPLE_TABLES
911	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
912	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
913	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
914	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
915#else
916	fib4_rules_init();
917#endif
918
919	register_netdevice_notifier(&fib_netdev_notifier);
920	register_inetaddr_notifier(&fib_inetaddr_notifier);
921	nl_fib_lookup_init();
922
923	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
924	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
925	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
926}
927
928EXPORT_SYMBOL(inet_addr_type);
929EXPORT_SYMBOL(ip_dev_find);
930