• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/
1/*
2 *	IP multicast routing support for mrouted 3.6/3.8
3 *
4 *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *	  Linux Consultancy and Custom Driver Development
6 *
7 *	This program is free software; you can redistribute it and/or
8 *	modify it under the terms of the GNU General Public License
9 *	as published by the Free Software Foundation; either version
10 *	2 of the License, or (at your option) any later version.
11 *
12 *	Fixes:
13 *	Michael Chastain	:	Incorrect size of copying.
14 *	Alan Cox		:	Added the cache manager code
15 *	Alan Cox		:	Fixed the clone/copy bug and device race.
16 *	Mike McLagan		:	Routing by source
17 *	Malcolm Beattie		:	Buffer handling fixes.
18 *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19 *	SVR Anand		:	Fixed several multicast bugs and problems.
20 *	Alexey Kuznetsov	:	Status, optimisations and more.
21 *	Brad Parker		:	Better behaviour on mrouted upcall
22 *					overflow.
23 *      Carlos Picoto           :       PIMv1 Support
24 *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25 *					Relax this requirement to work with older peers.
26 *
27 */
28
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
32#include <linux/capability.h>
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
49#include <linux/if_ether.h>
50#include <linux/slab.h>
51#include <net/net_namespace.h>
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
55#include <net/route.h>
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
65#include <net/netlink.h>
66#include <net/fib_rules.h>
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM	1
70#endif
71
72struct mr_table {
73	struct list_head	list;
74#ifdef CONFIG_NET_NS
75	struct net		*net;
76#endif
77	u32			id;
78	struct sock		*mroute_sk;
79	struct timer_list	ipmr_expire_timer;
80	struct list_head	mfc_unres_queue;
81	struct list_head	mfc_cache_array[MFC_LINES];
82	struct vif_device	vif_table[MAXVIFS];
83	int			maxvif;
84	atomic_t		cache_resolve_queue_len;
85	int			mroute_do_assert;
86	int			mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88	int			mroute_reg_vif_num;
89#endif
90};
91
92struct ipmr_rule {
93	struct fib_rule		common;
94};
95
96struct ipmr_result {
97	struct mr_table		*mrt;
98};
99
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101   Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 *	Multicast router control variables
108 */
109
110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116   entries is changed only in process context and protected
117   with weak lock mrt_lock. Queue of unresolved entries is protected
118   with strong spinlock mfc_unres_lock.
119
120   In this case data path is free of exclusive locks at all.
121 */
122
123static struct kmem_cache *mrt_cachep __read_mostly;
124
125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127			 struct sk_buff *skb, struct mfc_cache *cache,
128			 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
130			     struct sk_buff *pkt, vifi_t vifi, int assert);
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132			      struct mfc_cache *c, struct rtmsg *rtm);
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141	struct mr_table *mrt;
142
143	ipmr_for_each_table(mrt, net) {
144		if (mrt->id == id)
145			return mrt;
146	}
147	return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151			   struct mr_table **mrt)
152{
153	struct ipmr_result res;
154	struct fib_lookup_arg arg = { .result = &res, };
155	int err;
156
157	err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158	if (err < 0)
159		return err;
160	*mrt = res.mrt;
161	return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165			    int flags, struct fib_lookup_arg *arg)
166{
167	struct ipmr_result *res = arg->result;
168	struct mr_table *mrt;
169
170	switch (rule->action) {
171	case FR_ACT_TO_TBL:
172		break;
173	case FR_ACT_UNREACHABLE:
174		return -ENETUNREACH;
175	case FR_ACT_PROHIBIT:
176		return -EACCES;
177	case FR_ACT_BLACKHOLE:
178	default:
179		return -EINVAL;
180	}
181
182	mrt = ipmr_get_table(rule->fr_net, rule->table);
183	if (mrt == NULL)
184		return -EAGAIN;
185	res->mrt = mrt;
186	return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191	return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195	FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199			       struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201	return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205			     struct nlattr **tb)
206{
207	return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211			  struct fib_rule_hdr *frh)
212{
213	frh->dst_len = 0;
214	frh->src_len = 0;
215	frh->tos     = 0;
216	return 0;
217}
218
219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220	.family		= RTNL_FAMILY_IPMR,
221	.rule_size	= sizeof(struct ipmr_rule),
222	.addr_size	= sizeof(u32),
223	.action		= ipmr_rule_action,
224	.match		= ipmr_rule_match,
225	.configure	= ipmr_rule_configure,
226	.compare	= ipmr_rule_compare,
227	.default_pref	= fib_default_rule_pref,
228	.fill		= ipmr_rule_fill,
229	.nlgroup	= RTNLGRP_IPV4_RULE,
230	.policy		= ipmr_rule_policy,
231	.owner		= THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236	struct fib_rules_ops *ops;
237	struct mr_table *mrt;
238	int err;
239
240	ops = fib_rules_register(&ipmr_rules_ops_template, net);
241	if (IS_ERR(ops))
242		return PTR_ERR(ops);
243
244	INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247	if (mrt == NULL) {
248		err = -ENOMEM;
249		goto err1;
250	}
251
252	err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253	if (err < 0)
254		goto err2;
255
256	net->ipv4.mr_rules_ops = ops;
257	return 0;
258
259err2:
260	kfree(mrt);
261err1:
262	fib_rules_unregister(ops);
263	return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268	struct mr_table *mrt, *next;
269
270	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271		list_del(&mrt->list);
272		kfree(mrt);
273	}
274	fib_rules_unregister(net->ipv4.mr_rules_ops);
275}
276#else
277#define ipmr_for_each_table(mrt, net) \
278	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281{
282	return net->ipv4.mrt;
283}
284
285static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286			   struct mr_table **mrt)
287{
288	*mrt = net->ipv4.mrt;
289	return 0;
290}
291
292static int __net_init ipmr_rules_init(struct net *net)
293{
294	net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295	return net->ipv4.mrt ? 0 : -ENOMEM;
296}
297
298static void __net_exit ipmr_rules_exit(struct net *net)
299{
300	kfree(net->ipv4.mrt);
301}
302#endif
303
304static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305{
306	struct mr_table *mrt;
307	unsigned int i;
308
309	mrt = ipmr_get_table(net, id);
310	if (mrt != NULL)
311		return mrt;
312
313	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314	if (mrt == NULL)
315		return NULL;
316	write_pnet(&mrt->net, net);
317	mrt->id = id;
318
319	/* Forwarding cache */
320	for (i = 0; i < MFC_LINES; i++)
321		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326		    (unsigned long)mrt);
327
328#ifdef CONFIG_IP_PIMSM
329	mrt->mroute_reg_vif_num = -1;
330#endif
331#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333#endif
334	return mrt;
335}
336
337/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
339static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340{
341	struct net *net = dev_net(dev);
342
343	dev_close(dev);
344
345	dev = __dev_get_by_name(net, "tunl0");
346	if (dev) {
347		const struct net_device_ops *ops = dev->netdev_ops;
348		struct ifreq ifr;
349		struct ip_tunnel_parm p;
350
351		memset(&p, 0, sizeof(p));
352		p.iph.daddr = v->vifc_rmt_addr.s_addr;
353		p.iph.saddr = v->vifc_lcl_addr.s_addr;
354		p.iph.version = 4;
355		p.iph.ihl = 5;
356		p.iph.protocol = IPPROTO_IPIP;
357		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
360		if (ops->ndo_do_ioctl) {
361			mm_segment_t oldfs = get_fs();
362
363			set_fs(KERNEL_DS);
364			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365			set_fs(oldfs);
366		}
367	}
368}
369
370static
371struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
372{
373	struct net_device  *dev;
374
375	dev = __dev_get_by_name(net, "tunl0");
376
377	if (dev) {
378		const struct net_device_ops *ops = dev->netdev_ops;
379		int err;
380		struct ifreq ifr;
381		struct ip_tunnel_parm p;
382		struct in_device  *in_dev;
383
384		memset(&p, 0, sizeof(p));
385		p.iph.daddr = v->vifc_rmt_addr.s_addr;
386		p.iph.saddr = v->vifc_lcl_addr.s_addr;
387		p.iph.version = 4;
388		p.iph.ihl = 5;
389		p.iph.protocol = IPPROTO_IPIP;
390		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
391		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
392
393		if (ops->ndo_do_ioctl) {
394			mm_segment_t oldfs = get_fs();
395
396			set_fs(KERNEL_DS);
397			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398			set_fs(oldfs);
399		} else
400			err = -EOPNOTSUPP;
401
402		dev = NULL;
403
404		if (err == 0 &&
405		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
406			dev->flags |= IFF_MULTICAST;
407
408			in_dev = __in_dev_get_rtnl(dev);
409			if (in_dev == NULL)
410				goto failure;
411
412			ipv4_devconf_setall(in_dev);
413			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
414
415			if (dev_open(dev))
416				goto failure;
417			dev_hold(dev);
418		}
419	}
420	return dev;
421
422failure:
423	/* allow the register to be completed before unregistering. */
424	rtnl_unlock();
425	rtnl_lock();
426
427	unregister_netdevice(dev);
428	return NULL;
429}
430
431#ifdef CONFIG_IP_PIMSM
432
433static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
434{
435	struct net *net = dev_net(dev);
436	struct mr_table *mrt;
437	struct flowi fl = {
438		.oif		= dev->ifindex,
439		.iif		= skb->skb_iif,
440		.mark		= skb->mark,
441	};
442	int err;
443
444	err = ipmr_fib_lookup(net, &fl, &mrt);
445	if (err < 0) {
446		kfree_skb(skb);
447		return err;
448	}
449
450	read_lock(&mrt_lock);
451	dev->stats.tx_bytes += skb->len;
452	dev->stats.tx_packets++;
453	ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
454	read_unlock(&mrt_lock);
455	kfree_skb(skb);
456	return NETDEV_TX_OK;
457}
458
459static const struct net_device_ops reg_vif_netdev_ops = {
460	.ndo_start_xmit	= reg_vif_xmit,
461};
462
463static void reg_vif_setup(struct net_device *dev)
464{
465	dev->type		= ARPHRD_PIMREG;
466	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
467	dev->flags		= IFF_NOARP;
468	dev->netdev_ops		= &reg_vif_netdev_ops,
469	dev->destructor		= free_netdev;
470	dev->features		|= NETIF_F_NETNS_LOCAL;
471}
472
473static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
474{
475	struct net_device *dev;
476	struct in_device *in_dev;
477	char name[IFNAMSIZ];
478
479	if (mrt->id == RT_TABLE_DEFAULT)
480		sprintf(name, "pimreg");
481	else
482		sprintf(name, "pimreg%u", mrt->id);
483
484	dev = alloc_netdev(0, name, reg_vif_setup);
485
486	if (dev == NULL)
487		return NULL;
488
489	dev_net_set(dev, net);
490
491	if (register_netdevice(dev)) {
492		free_netdev(dev);
493		return NULL;
494	}
495	dev->iflink = 0;
496
497	rcu_read_lock();
498	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499		rcu_read_unlock();
500		goto failure;
501	}
502
503	ipv4_devconf_setall(in_dev);
504	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
505	rcu_read_unlock();
506
507	if (dev_open(dev))
508		goto failure;
509
510	dev_hold(dev);
511
512	return dev;
513
514failure:
515	/* allow the register to be completed before unregistering. */
516	rtnl_unlock();
517	rtnl_lock();
518
519	unregister_netdevice(dev);
520	return NULL;
521}
522#endif
523
524/*
525 *	Delete a VIF entry
526 *	@notify: Set to 1, if the caller is a notifier_call
527 */
528
529static int vif_delete(struct mr_table *mrt, int vifi, int notify,
530		      struct list_head *head)
531{
532	struct vif_device *v;
533	struct net_device *dev;
534	struct in_device *in_dev;
535
536	if (vifi < 0 || vifi >= mrt->maxvif)
537		return -EADDRNOTAVAIL;
538
539	v = &mrt->vif_table[vifi];
540
541	write_lock_bh(&mrt_lock);
542	dev = v->dev;
543	v->dev = NULL;
544
545	if (!dev) {
546		write_unlock_bh(&mrt_lock);
547		return -EADDRNOTAVAIL;
548	}
549
550#ifdef CONFIG_IP_PIMSM
551	if (vifi == mrt->mroute_reg_vif_num)
552		mrt->mroute_reg_vif_num = -1;
553#endif
554
555	if (vifi+1 == mrt->maxvif) {
556		int tmp;
557		for (tmp=vifi-1; tmp>=0; tmp--) {
558			if (VIF_EXISTS(mrt, tmp))
559				break;
560		}
561		mrt->maxvif = tmp+1;
562	}
563
564	write_unlock_bh(&mrt_lock);
565
566	dev_set_allmulti(dev, -1);
567
568	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
569		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570		ip_rt_multicast_event(in_dev);
571	}
572
573	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
574		unregister_netdevice_queue(dev, head);
575
576	dev_put(dev);
577	return 0;
578}
579
580static inline void ipmr_cache_free(struct mfc_cache *c)
581{
582	kmem_cache_free(mrt_cachep, c);
583}
584
585/* Destroy an unresolved cache entry, killing queued skbs
586   and reporting error to netlink readers.
587 */
588
589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
590{
591	struct net *net = read_pnet(&mrt->net);
592	struct sk_buff *skb;
593	struct nlmsgerr *e;
594
595	atomic_dec(&mrt->cache_resolve_queue_len);
596
597	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
598		if (ip_hdr(skb)->version == 0) {
599			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
600			nlh->nlmsg_type = NLMSG_ERROR;
601			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
602			skb_trim(skb, nlh->nlmsg_len);
603			e = NLMSG_DATA(nlh);
604			e->error = -ETIMEDOUT;
605			memset(&e->msg, 0, sizeof(e->msg));
606
607			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608		} else
609			kfree_skb(skb);
610	}
611
612	ipmr_cache_free(c);
613}
614
615
616/* Timer process for the unresolved queue. */
617
618static void ipmr_expire_process(unsigned long arg)
619{
620	struct mr_table *mrt = (struct mr_table *)arg;
621	unsigned long now;
622	unsigned long expires;
623	struct mfc_cache *c, *next;
624
625	if (!spin_trylock(&mfc_unres_lock)) {
626		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
627		return;
628	}
629
630	if (list_empty(&mrt->mfc_unres_queue))
631		goto out;
632
633	now = jiffies;
634	expires = 10*HZ;
635
636	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
637		if (time_after(c->mfc_un.unres.expires, now)) {
638			unsigned long interval = c->mfc_un.unres.expires - now;
639			if (interval < expires)
640				expires = interval;
641			continue;
642		}
643
644		list_del(&c->list);
645		ipmr_destroy_unres(mrt, c);
646	}
647
648	if (!list_empty(&mrt->mfc_unres_queue))
649		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
650
651out:
652	spin_unlock(&mfc_unres_lock);
653}
654
655/* Fill oifs list. It is called under write locked mrt_lock. */
656
657static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
658				   unsigned char *ttls)
659{
660	int vifi;
661
662	cache->mfc_un.res.minvif = MAXVIFS;
663	cache->mfc_un.res.maxvif = 0;
664	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
665
666	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
667		if (VIF_EXISTS(mrt, vifi) &&
668		    ttls[vifi] && ttls[vifi] < 255) {
669			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
670			if (cache->mfc_un.res.minvif > vifi)
671				cache->mfc_un.res.minvif = vifi;
672			if (cache->mfc_un.res.maxvif <= vifi)
673				cache->mfc_un.res.maxvif = vifi + 1;
674		}
675	}
676}
677
678static int vif_add(struct net *net, struct mr_table *mrt,
679		   struct vifctl *vifc, int mrtsock)
680{
681	int vifi = vifc->vifc_vifi;
682	struct vif_device *v = &mrt->vif_table[vifi];
683	struct net_device *dev;
684	struct in_device *in_dev;
685	int err;
686
687	/* Is vif busy ? */
688	if (VIF_EXISTS(mrt, vifi))
689		return -EADDRINUSE;
690
691	switch (vifc->vifc_flags) {
692#ifdef CONFIG_IP_PIMSM
693	case VIFF_REGISTER:
694		/*
695		 * Special Purpose VIF in PIM
696		 * All the packets will be sent to the daemon
697		 */
698		if (mrt->mroute_reg_vif_num >= 0)
699			return -EADDRINUSE;
700		dev = ipmr_reg_vif(net, mrt);
701		if (!dev)
702			return -ENOBUFS;
703		err = dev_set_allmulti(dev, 1);
704		if (err) {
705			unregister_netdevice(dev);
706			dev_put(dev);
707			return err;
708		}
709		break;
710#endif
711	case VIFF_TUNNEL:
712		dev = ipmr_new_tunnel(net, vifc);
713		if (!dev)
714			return -ENOBUFS;
715		err = dev_set_allmulti(dev, 1);
716		if (err) {
717			ipmr_del_tunnel(dev, vifc);
718			dev_put(dev);
719			return err;
720		}
721		break;
722
723	case VIFF_USE_IFINDEX:
724	case 0:
725		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727			if (dev && dev->ip_ptr == NULL) {
728				dev_put(dev);
729				return -EADDRNOTAVAIL;
730			}
731		} else
732			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733
734		if (!dev)
735			return -EADDRNOTAVAIL;
736		err = dev_set_allmulti(dev, 1);
737		if (err) {
738			dev_put(dev);
739			return err;
740		}
741		break;
742	default:
743		return -EINVAL;
744	}
745
746	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
747		dev_put(dev);
748		return -EADDRNOTAVAIL;
749	}
750	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
751	ip_rt_multicast_event(in_dev);
752
753	/*
754	 *	Fill in the VIF structures
755	 */
756	v->rate_limit = vifc->vifc_rate_limit;
757	v->local = vifc->vifc_lcl_addr.s_addr;
758	v->remote = vifc->vifc_rmt_addr.s_addr;
759	v->flags = vifc->vifc_flags;
760	if (!mrtsock)
761		v->flags |= VIFF_STATIC;
762	v->threshold = vifc->vifc_threshold;
763	v->bytes_in = 0;
764	v->bytes_out = 0;
765	v->pkt_in = 0;
766	v->pkt_out = 0;
767	v->link = dev->ifindex;
768	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
769		v->link = dev->iflink;
770
771	/* And finish update writing critical data */
772	write_lock_bh(&mrt_lock);
773	v->dev = dev;
774#ifdef CONFIG_IP_PIMSM
775	if (v->flags&VIFF_REGISTER)
776		mrt->mroute_reg_vif_num = vifi;
777#endif
778	if (vifi+1 > mrt->maxvif)
779		mrt->maxvif = vifi+1;
780	write_unlock_bh(&mrt_lock);
781	return 0;
782}
783
784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
785					 __be32 origin,
786					 __be32 mcastgrp)
787{
788	int line = MFC_HASH(mcastgrp, origin);
789	struct mfc_cache *c;
790
791	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
792		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793			return c;
794	}
795	return NULL;
796}
797
798/*
799 *	Allocate a multicast cache entry
800 */
801static struct mfc_cache *ipmr_cache_alloc(void)
802{
803	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804	if (c == NULL)
805		return NULL;
806	c->mfc_un.res.minvif = MAXVIFS;
807	return c;
808}
809
810static struct mfc_cache *ipmr_cache_alloc_unres(void)
811{
812	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813	if (c == NULL)
814		return NULL;
815	skb_queue_head_init(&c->mfc_un.unres.unresolved);
816	c->mfc_un.unres.expires = jiffies + 10*HZ;
817	return c;
818}
819
820/*
821 *	A cache entry has gone into a resolved state from queued
822 */
823
824static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825			       struct mfc_cache *uc, struct mfc_cache *c)
826{
827	struct sk_buff *skb;
828	struct nlmsgerr *e;
829
830	/*
831	 *	Play the pending entries through our router
832	 */
833
834	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
835		if (ip_hdr(skb)->version == 0) {
836			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837
838			if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
839				nlh->nlmsg_len = (skb_tail_pointer(skb) -
840						  (u8 *)nlh);
841			} else {
842				nlh->nlmsg_type = NLMSG_ERROR;
843				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
844				skb_trim(skb, nlh->nlmsg_len);
845				e = NLMSG_DATA(nlh);
846				e->error = -EMSGSIZE;
847				memset(&e->msg, 0, sizeof(e->msg));
848			}
849
850			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
851		} else
852			ip_mr_forward(net, mrt, skb, c, 0);
853	}
854}
855
856/*
857 *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858 *	expects the following bizarre scheme.
859 *
860 *	Called under mrt_lock.
861 */
862
863static int ipmr_cache_report(struct mr_table *mrt,
864			     struct sk_buff *pkt, vifi_t vifi, int assert)
865{
866	struct sk_buff *skb;
867	const int ihl = ip_hdrlen(pkt);
868	struct igmphdr *igmp;
869	struct igmpmsg *msg;
870	int ret;
871
872#ifdef CONFIG_IP_PIMSM
873	if (assert == IGMPMSG_WHOLEPKT)
874		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
875	else
876#endif
877		skb = alloc_skb(128, GFP_ATOMIC);
878
879	if (!skb)
880		return -ENOBUFS;
881
882#ifdef CONFIG_IP_PIMSM
883	if (assert == IGMPMSG_WHOLEPKT) {
884		/* Ugly, but we have no choice with this interface.
885		   Duplicate old header, fix ihl, length etc.
886		   And all this only to mangle msg->im_msgtype and
887		   to set msg->im_mbz to "mbz" :-)
888		 */
889		skb_push(skb, sizeof(struct iphdr));
890		skb_reset_network_header(skb);
891		skb_reset_transport_header(skb);
892		msg = (struct igmpmsg *)skb_network_header(skb);
893		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
894		msg->im_msgtype = IGMPMSG_WHOLEPKT;
895		msg->im_mbz = 0;
896		msg->im_vif = mrt->mroute_reg_vif_num;
897		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
898		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
899					     sizeof(struct iphdr));
900	} else
901#endif
902	{
903
904	/*
905	 *	Copy the IP header
906	 */
907
908	skb->network_header = skb->tail;
909	skb_put(skb, ihl);
910	skb_copy_to_linear_data(skb, pkt->data, ihl);
911	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
912	msg = (struct igmpmsg *)skb_network_header(skb);
913	msg->im_vif = vifi;
914	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
915
916	/*
917	 *	Add our header
918	 */
919
920	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
921	igmp->type	=
922	msg->im_msgtype = assert;
923	igmp->code 	=	0;
924	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
925	skb->transport_header = skb->network_header;
926	}
927
928	if (mrt->mroute_sk == NULL) {
929		kfree_skb(skb);
930		return -EINVAL;
931	}
932
933	/*
934	 *	Deliver to mrouted
935	 */
936	ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
937	if (ret < 0) {
938		if (net_ratelimit())
939			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
940		kfree_skb(skb);
941	}
942
943	return ret;
944}
945
946/*
947 *	Queue a packet for resolution. It gets locked cache entry!
948 */
949
950static int
951ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
952{
953	bool found = false;
954	int err;
955	struct mfc_cache *c;
956	const struct iphdr *iph = ip_hdr(skb);
957
958	spin_lock_bh(&mfc_unres_lock);
959	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
960		if (c->mfc_mcastgrp == iph->daddr &&
961		    c->mfc_origin == iph->saddr) {
962			found = true;
963			break;
964		}
965	}
966
967	if (!found) {
968		/*
969		 *	Create a new entry if allowable
970		 */
971
972		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
973		    (c = ipmr_cache_alloc_unres()) == NULL) {
974			spin_unlock_bh(&mfc_unres_lock);
975
976			kfree_skb(skb);
977			return -ENOBUFS;
978		}
979
980		/*
981		 *	Fill in the new cache entry
982		 */
983		c->mfc_parent	= -1;
984		c->mfc_origin	= iph->saddr;
985		c->mfc_mcastgrp	= iph->daddr;
986
987		/*
988		 *	Reflect first query at mrouted.
989		 */
990		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
991		if (err < 0) {
992			/* If the report failed throw the cache entry
993			   out - Brad Parker
994			 */
995			spin_unlock_bh(&mfc_unres_lock);
996
997			ipmr_cache_free(c);
998			kfree_skb(skb);
999			return err;
1000		}
1001
1002		atomic_inc(&mrt->cache_resolve_queue_len);
1003		list_add(&c->list, &mrt->mfc_unres_queue);
1004
1005		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1006			mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1007	}
1008
1009	/*
1010	 *	See if we can append the packet
1011	 */
1012	if (c->mfc_un.unres.unresolved.qlen>3) {
1013		kfree_skb(skb);
1014		err = -ENOBUFS;
1015	} else {
1016		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1017		err = 0;
1018	}
1019
1020	spin_unlock_bh(&mfc_unres_lock);
1021	return err;
1022}
1023
1024/*
1025 *	MFC cache manipulation by user space mroute daemon
1026 */
1027
1028static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1029{
1030	int line;
1031	struct mfc_cache *c, *next;
1032
1033	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1034
1035	list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1036		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038			write_lock_bh(&mrt_lock);
1039			list_del(&c->list);
1040			write_unlock_bh(&mrt_lock);
1041
1042			ipmr_cache_free(c);
1043			return 0;
1044		}
1045	}
1046	return -ENOENT;
1047}
1048
1049static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1050			struct mfcctl *mfc, int mrtsock)
1051{
1052	bool found = false;
1053	int line;
1054	struct mfc_cache *uc, *c;
1055
1056	if (mfc->mfcc_parent >= MAXVIFS)
1057		return -ENFILE;
1058
1059	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1060
1061	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1062		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1063		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1064			found = true;
1065			break;
1066		}
1067	}
1068
1069	if (found) {
1070		write_lock_bh(&mrt_lock);
1071		c->mfc_parent = mfc->mfcc_parent;
1072		ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1073		if (!mrtsock)
1074			c->mfc_flags |= MFC_STATIC;
1075		write_unlock_bh(&mrt_lock);
1076		return 0;
1077	}
1078
1079	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1080		return -EINVAL;
1081
1082	c = ipmr_cache_alloc();
1083	if (c == NULL)
1084		return -ENOMEM;
1085
1086	c->mfc_origin = mfc->mfcc_origin.s_addr;
1087	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1088	c->mfc_parent = mfc->mfcc_parent;
1089	ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1090	if (!mrtsock)
1091		c->mfc_flags |= MFC_STATIC;
1092
1093	write_lock_bh(&mrt_lock);
1094	list_add(&c->list, &mrt->mfc_cache_array[line]);
1095	write_unlock_bh(&mrt_lock);
1096
1097	/*
1098	 *	Check to see if we resolved a queued list. If so we
1099	 *	need to send on the frames and tidy up.
1100	 */
1101	found = false;
1102	spin_lock_bh(&mfc_unres_lock);
1103	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1104		if (uc->mfc_origin == c->mfc_origin &&
1105		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1106			list_del(&uc->list);
1107			atomic_dec(&mrt->cache_resolve_queue_len);
1108			found = true;
1109			break;
1110		}
1111	}
1112	if (list_empty(&mrt->mfc_unres_queue))
1113		del_timer(&mrt->ipmr_expire_timer);
1114	spin_unlock_bh(&mfc_unres_lock);
1115
1116	if (found) {
1117		ipmr_cache_resolve(net, mrt, uc, c);
1118		ipmr_cache_free(uc);
1119	}
1120	return 0;
1121}
1122
1123/*
1124 *	Close the multicast socket, and clear the vif tables etc
1125 */
1126
1127static void mroute_clean_tables(struct mr_table *mrt)
1128{
1129	int i;
1130	LIST_HEAD(list);
1131	struct mfc_cache *c, *next;
1132
1133	/*
1134	 *	Shut down all active vif entries
1135	 */
1136	for (i = 0; i < mrt->maxvif; i++) {
1137		if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1138			vif_delete(mrt, i, 0, &list);
1139	}
1140	unregister_netdevice_many(&list);
1141
1142	/*
1143	 *	Wipe the cache
1144	 */
1145	for (i = 0; i < MFC_LINES; i++) {
1146		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1147			if (c->mfc_flags&MFC_STATIC)
1148				continue;
1149			write_lock_bh(&mrt_lock);
1150			list_del(&c->list);
1151			write_unlock_bh(&mrt_lock);
1152
1153			ipmr_cache_free(c);
1154		}
1155	}
1156
1157	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1158		spin_lock_bh(&mfc_unres_lock);
1159		list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1160			list_del(&c->list);
1161			ipmr_destroy_unres(mrt, c);
1162		}
1163		spin_unlock_bh(&mfc_unres_lock);
1164	}
1165}
1166
1167static void mrtsock_destruct(struct sock *sk)
1168{
1169	struct net *net = sock_net(sk);
1170	struct mr_table *mrt;
1171
1172	rtnl_lock();
1173	ipmr_for_each_table(mrt, net) {
1174		if (sk == mrt->mroute_sk) {
1175			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1176
1177			write_lock_bh(&mrt_lock);
1178			mrt->mroute_sk = NULL;
1179			write_unlock_bh(&mrt_lock);
1180
1181			mroute_clean_tables(mrt);
1182		}
1183	}
1184	rtnl_unlock();
1185}
1186
1187/*
1188 *	Socket options and virtual interface manipulation. The whole
1189 *	virtual interface system is a complete heap, but unfortunately
1190 *	that's how BSD mrouted happens to think. Maybe one day with a proper
1191 *	MOSPF/PIM router set up we can clean this up.
1192 */
1193
1194int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1195{
1196	int ret;
1197	struct vifctl vif;
1198	struct mfcctl mfc;
1199	struct net *net = sock_net(sk);
1200	struct mr_table *mrt;
1201
1202	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1203	if (mrt == NULL)
1204		return -ENOENT;
1205
1206	if (optname != MRT_INIT) {
1207		if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1208			return -EACCES;
1209	}
1210
1211	switch (optname) {
1212	case MRT_INIT:
1213		if (sk->sk_type != SOCK_RAW ||
1214		    inet_sk(sk)->inet_num != IPPROTO_IGMP)
1215			return -EOPNOTSUPP;
1216		if (optlen != sizeof(int))
1217			return -ENOPROTOOPT;
1218
1219		rtnl_lock();
1220		if (mrt->mroute_sk) {
1221			rtnl_unlock();
1222			return -EADDRINUSE;
1223		}
1224
1225		ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226		if (ret == 0) {
1227			write_lock_bh(&mrt_lock);
1228			mrt->mroute_sk = sk;
1229			write_unlock_bh(&mrt_lock);
1230
1231			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1232		}
1233		rtnl_unlock();
1234		return ret;
1235	case MRT_DONE:
1236		if (sk != mrt->mroute_sk)
1237			return -EACCES;
1238		return ip_ra_control(sk, 0, NULL);
1239	case MRT_ADD_VIF:
1240	case MRT_DEL_VIF:
1241		if (optlen != sizeof(vif))
1242			return -EINVAL;
1243		if (copy_from_user(&vif, optval, sizeof(vif)))
1244			return -EFAULT;
1245		if (vif.vifc_vifi >= MAXVIFS)
1246			return -ENFILE;
1247		rtnl_lock();
1248		if (optname == MRT_ADD_VIF) {
1249			ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1250		} else {
1251			ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1252		}
1253		rtnl_unlock();
1254		return ret;
1255
1256		/*
1257		 *	Manipulate the forwarding caches. These live
1258		 *	in a sort of kernel/user symbiosis.
1259		 */
1260	case MRT_ADD_MFC:
1261	case MRT_DEL_MFC:
1262		if (optlen != sizeof(mfc))
1263			return -EINVAL;
1264		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1265			return -EFAULT;
1266		rtnl_lock();
1267		if (optname == MRT_DEL_MFC)
1268			ret = ipmr_mfc_delete(mrt, &mfc);
1269		else
1270			ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1271		rtnl_unlock();
1272		return ret;
1273		/*
1274		 *	Control PIM assert.
1275		 */
1276	case MRT_ASSERT:
1277	{
1278		int v;
1279		if (get_user(v,(int __user *)optval))
1280			return -EFAULT;
1281		mrt->mroute_do_assert = (v) ? 1 : 0;
1282		return 0;
1283	}
1284#ifdef CONFIG_IP_PIMSM
1285	case MRT_PIM:
1286	{
1287		int v;
1288
1289		if (get_user(v,(int __user *)optval))
1290			return -EFAULT;
1291		v = (v) ? 1 : 0;
1292
1293		rtnl_lock();
1294		ret = 0;
1295		if (v != mrt->mroute_do_pim) {
1296			mrt->mroute_do_pim = v;
1297			mrt->mroute_do_assert = v;
1298		}
1299		rtnl_unlock();
1300		return ret;
1301	}
1302#endif
1303#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1304	case MRT_TABLE:
1305	{
1306		u32 v;
1307
1308		if (optlen != sizeof(u32))
1309			return -EINVAL;
1310		if (get_user(v, (u32 __user *)optval))
1311			return -EFAULT;
1312		if (sk == mrt->mroute_sk)
1313			return -EBUSY;
1314
1315		rtnl_lock();
1316		ret = 0;
1317		if (!ipmr_new_table(net, v))
1318			ret = -ENOMEM;
1319		raw_sk(sk)->ipmr_table = v;
1320		rtnl_unlock();
1321		return ret;
1322	}
1323#endif
1324	/*
1325	 *	Spurious command, or MRT_VERSION which you cannot
1326	 *	set.
1327	 */
1328	default:
1329		return -ENOPROTOOPT;
1330	}
1331}
1332
1333/*
1334 *	Getsock opt support for the multicast routing system.
1335 */
1336
1337int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1338{
1339	int olr;
1340	int val;
1341	struct net *net = sock_net(sk);
1342	struct mr_table *mrt;
1343
1344	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1345	if (mrt == NULL)
1346		return -ENOENT;
1347
1348	if (optname != MRT_VERSION &&
1349#ifdef CONFIG_IP_PIMSM
1350	   optname!=MRT_PIM &&
1351#endif
1352	   optname!=MRT_ASSERT)
1353		return -ENOPROTOOPT;
1354
1355	if (get_user(olr, optlen))
1356		return -EFAULT;
1357
1358	olr = min_t(unsigned int, olr, sizeof(int));
1359	if (olr < 0)
1360		return -EINVAL;
1361
1362	if (put_user(olr, optlen))
1363		return -EFAULT;
1364	if (optname == MRT_VERSION)
1365		val = 0x0305;
1366#ifdef CONFIG_IP_PIMSM
1367	else if (optname == MRT_PIM)
1368		val = mrt->mroute_do_pim;
1369#endif
1370	else
1371		val = mrt->mroute_do_assert;
1372	if (copy_to_user(optval, &val, olr))
1373		return -EFAULT;
1374	return 0;
1375}
1376
1377/*
1378 *	The IP multicast ioctl support routines.
1379 */
1380
1381int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1382{
1383	struct sioc_sg_req sr;
1384	struct sioc_vif_req vr;
1385	struct vif_device *vif;
1386	struct mfc_cache *c;
1387	struct net *net = sock_net(sk);
1388	struct mr_table *mrt;
1389
1390	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1391	if (mrt == NULL)
1392		return -ENOENT;
1393
1394	switch (cmd) {
1395	case SIOCGETVIFCNT:
1396		if (copy_from_user(&vr, arg, sizeof(vr)))
1397			return -EFAULT;
1398		if (vr.vifi >= mrt->maxvif)
1399			return -EINVAL;
1400		read_lock(&mrt_lock);
1401		vif = &mrt->vif_table[vr.vifi];
1402		if (VIF_EXISTS(mrt, vr.vifi)) {
1403			vr.icount = vif->pkt_in;
1404			vr.ocount = vif->pkt_out;
1405			vr.ibytes = vif->bytes_in;
1406			vr.obytes = vif->bytes_out;
1407			read_unlock(&mrt_lock);
1408
1409			if (copy_to_user(arg, &vr, sizeof(vr)))
1410				return -EFAULT;
1411			return 0;
1412		}
1413		read_unlock(&mrt_lock);
1414		return -EADDRNOTAVAIL;
1415	case SIOCGETSGCNT:
1416		if (copy_from_user(&sr, arg, sizeof(sr)))
1417			return -EFAULT;
1418
1419		read_lock(&mrt_lock);
1420		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1421		if (c) {
1422			sr.pktcnt = c->mfc_un.res.pkt;
1423			sr.bytecnt = c->mfc_un.res.bytes;
1424			sr.wrong_if = c->mfc_un.res.wrong_if;
1425			read_unlock(&mrt_lock);
1426
1427			if (copy_to_user(arg, &sr, sizeof(sr)))
1428				return -EFAULT;
1429			return 0;
1430		}
1431		read_unlock(&mrt_lock);
1432		return -EADDRNOTAVAIL;
1433	default:
1434		return -ENOIOCTLCMD;
1435	}
1436}
1437
1438
1439static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1440{
1441	struct net_device *dev = ptr;
1442	struct net *net = dev_net(dev);
1443	struct mr_table *mrt;
1444	struct vif_device *v;
1445	int ct;
1446	LIST_HEAD(list);
1447
1448	if (event != NETDEV_UNREGISTER)
1449		return NOTIFY_DONE;
1450
1451	ipmr_for_each_table(mrt, net) {
1452		v = &mrt->vif_table[0];
1453		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1454			if (v->dev == dev)
1455				vif_delete(mrt, ct, 1, &list);
1456		}
1457	}
1458	unregister_netdevice_many(&list);
1459	return NOTIFY_DONE;
1460}
1461
1462
1463static struct notifier_block ip_mr_notifier = {
1464	.notifier_call = ipmr_device_event,
1465};
1466
1467/*
1468 * 	Encapsulate a packet by attaching a valid IPIP header to it.
1469 *	This avoids tunnel drivers and other mess and gives us the speed so
1470 *	important for multicast video.
1471 */
1472
1473static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1474{
1475	struct iphdr *iph;
1476	struct iphdr *old_iph = ip_hdr(skb);
1477
1478	skb_push(skb, sizeof(struct iphdr));
1479	skb->transport_header = skb->network_header;
1480	skb_reset_network_header(skb);
1481	iph = ip_hdr(skb);
1482
1483	iph->version	= 	4;
1484	iph->tos	=	old_iph->tos;
1485	iph->ttl	=	old_iph->ttl;
1486	iph->frag_off	=	0;
1487	iph->daddr	=	daddr;
1488	iph->saddr	=	saddr;
1489	iph->protocol	=	IPPROTO_IPIP;
1490	iph->ihl	=	5;
1491	iph->tot_len	=	htons(skb->len);
1492	ip_select_ident(iph, skb_dst(skb), NULL);
1493	ip_send_check(iph);
1494
1495	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1496	nf_reset(skb);
1497}
1498
1499static inline int ipmr_forward_finish(struct sk_buff *skb)
1500{
1501	struct ip_options * opt	= &(IPCB(skb)->opt);
1502
1503	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1504
1505	if (unlikely(opt->optlen))
1506		ip_forward_options(skb);
1507
1508	return dst_output(skb);
1509}
1510
1511/*
1512 *	Processing handlers for ipmr_forward
1513 */
1514
1515static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1516			    struct sk_buff *skb, struct mfc_cache *c, int vifi)
1517{
1518	const struct iphdr *iph = ip_hdr(skb);
1519	struct vif_device *vif = &mrt->vif_table[vifi];
1520	struct net_device *dev;
1521	struct rtable *rt;
1522	int    encap = 0;
1523
1524	if (vif->dev == NULL)
1525		goto out_free;
1526
1527#ifdef CONFIG_IP_PIMSM
1528	if (vif->flags & VIFF_REGISTER) {
1529		vif->pkt_out++;
1530		vif->bytes_out += skb->len;
1531		vif->dev->stats.tx_bytes += skb->len;
1532		vif->dev->stats.tx_packets++;
1533		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1534		goto out_free;
1535	}
1536#endif
1537
1538	if (vif->flags&VIFF_TUNNEL) {
1539		struct flowi fl = { .oif = vif->link,
1540				    .nl_u = { .ip4_u =
1541					      { .daddr = vif->remote,
1542						.saddr = vif->local,
1543						.tos = RT_TOS(iph->tos) } },
1544				    .proto = IPPROTO_IPIP };
1545		if (ip_route_output_key(net, &rt, &fl))
1546			goto out_free;
1547		encap = sizeof(struct iphdr);
1548	} else {
1549		struct flowi fl = { .oif = vif->link,
1550				    .nl_u = { .ip4_u =
1551					      { .daddr = iph->daddr,
1552						.tos = RT_TOS(iph->tos) } },
1553				    .proto = IPPROTO_IPIP };
1554		if (ip_route_output_key(net, &rt, &fl))
1555			goto out_free;
1556	}
1557
1558	dev = rt->dst.dev;
1559
1560	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1561		/* Do not fragment multicasts. Alas, IPv4 does not
1562		   allow to send ICMP, so that packets will disappear
1563		   to blackhole.
1564		 */
1565
1566		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1567		ip_rt_put(rt);
1568		goto out_free;
1569	}
1570
1571	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1572
1573	if (skb_cow(skb, encap)) {
1574		ip_rt_put(rt);
1575		goto out_free;
1576	}
1577
1578	vif->pkt_out++;
1579	vif->bytes_out += skb->len;
1580
1581	skb_dst_drop(skb);
1582	skb_dst_set(skb, &rt->dst);
1583	ip_decrease_ttl(ip_hdr(skb));
1584
1585	if (vif->flags & VIFF_TUNNEL) {
1586		ip_encap(skb, vif->local, vif->remote);
1587		vif->dev->stats.tx_packets++;
1588		vif->dev->stats.tx_bytes += skb->len;
1589	}
1590
1591	IPCB(skb)->flags |= IPSKB_FORWARDED;
1592
1593	/*
1594	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1595	 * not only before forwarding, but after forwarding on all output
1596	 * interfaces. It is clear, if mrouter runs a multicasting
1597	 * program, it should receive packets not depending to what interface
1598	 * program is joined.
1599	 * If we will not make it, the program will have to join on all
1600	 * interfaces. On the other hand, multihoming host (or router, but
1601	 * not mrouter) cannot join to more than one interface - it will
1602	 * result in receiving multiple packets.
1603	 */
1604	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1605		ipmr_forward_finish);
1606	return;
1607
1608out_free:
1609	kfree_skb(skb);
1610}
1611
1612static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1613{
1614	int ct;
1615
1616	for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1617		if (mrt->vif_table[ct].dev == dev)
1618			break;
1619	}
1620	return ct;
1621}
1622
1623/* "local" means that we should preserve one skb (for local delivery) */
1624
1625static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1626			 struct sk_buff *skb, struct mfc_cache *cache,
1627			 int local)
1628{
1629	int psend = -1;
1630	int vif, ct;
1631
1632	vif = cache->mfc_parent;
1633	cache->mfc_un.res.pkt++;
1634	cache->mfc_un.res.bytes += skb->len;
1635
1636	/*
1637	 * Wrong interface: drop packet and (maybe) send PIM assert.
1638	 */
1639	if (mrt->vif_table[vif].dev != skb->dev) {
1640		int true_vifi;
1641
1642		if (skb_rtable(skb)->fl.iif == 0) {
1643			goto dont_forward;
1644		}
1645
1646		cache->mfc_un.res.wrong_if++;
1647		true_vifi = ipmr_find_vif(mrt, skb->dev);
1648
1649		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1650		    /* pimsm uses asserts, when switching from RPT to SPT,
1651		       so that we cannot check that packet arrived on an oif.
1652		       It is bad, but otherwise we would need to move pretty
1653		       large chunk of pimd to kernel. Ough... --ANK
1654		     */
1655		    (mrt->mroute_do_pim ||
1656		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1657		    time_after(jiffies,
1658			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1659			cache->mfc_un.res.last_assert = jiffies;
1660			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1661		}
1662		goto dont_forward;
1663	}
1664
1665	mrt->vif_table[vif].pkt_in++;
1666	mrt->vif_table[vif].bytes_in += skb->len;
1667
1668	/*
1669	 *	Forward the frame
1670	 */
1671	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1672		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1673			if (psend != -1) {
1674				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1675				if (skb2)
1676					ipmr_queue_xmit(net, mrt, skb2, cache,
1677							psend);
1678			}
1679			psend = ct;
1680		}
1681	}
1682	if (psend != -1) {
1683		if (local) {
1684			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685			if (skb2)
1686				ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1687		} else {
1688			ipmr_queue_xmit(net, mrt, skb, cache, psend);
1689			return 0;
1690		}
1691	}
1692
1693dont_forward:
1694	if (!local)
1695		kfree_skb(skb);
1696	return 0;
1697}
1698
1699
1700/*
1701 *	Multicast packets for forwarding arrive here
1702 */
1703
1704int ip_mr_input(struct sk_buff *skb)
1705{
1706	struct mfc_cache *cache;
1707	struct net *net = dev_net(skb->dev);
1708	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1709	struct mr_table *mrt;
1710	int err;
1711
1712	/* Packet is looped back after forward, it should not be
1713	   forwarded second time, but still can be delivered locally.
1714	 */
1715	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1716		goto dont_forward;
1717
1718	err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1719	if (err < 0) {
1720		kfree_skb(skb);
1721		return err;
1722	}
1723
1724	if (!local) {
1725		    if (IPCB(skb)->opt.router_alert) {
1726			    if (ip_call_ra_chain(skb))
1727				    return 0;
1728		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1729			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1730			       Cisco IOS <= 11.2(8)) do not put router alert
1731			       option to IGMP packets destined to routable
1732			       groups. It is very bad, because it means
1733			       that we can forward NO IGMP messages.
1734			     */
1735			    read_lock(&mrt_lock);
1736			    if (mrt->mroute_sk) {
1737				    nf_reset(skb);
1738				    raw_rcv(mrt->mroute_sk, skb);
1739				    read_unlock(&mrt_lock);
1740				    return 0;
1741			    }
1742			    read_unlock(&mrt_lock);
1743		    }
1744	}
1745
1746	read_lock(&mrt_lock);
1747	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1748
1749	/*
1750	 *	No usable cache entry
1751	 */
1752	if (cache == NULL) {
1753		int vif;
1754
1755		if (local) {
1756			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1757			ip_local_deliver(skb);
1758			if (skb2 == NULL) {
1759				read_unlock(&mrt_lock);
1760				return -ENOBUFS;
1761			}
1762			skb = skb2;
1763		}
1764
1765		vif = ipmr_find_vif(mrt, skb->dev);
1766		if (vif >= 0) {
1767			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1768			read_unlock(&mrt_lock);
1769
1770			return err2;
1771		}
1772		read_unlock(&mrt_lock);
1773		kfree_skb(skb);
1774		return -ENODEV;
1775	}
1776
1777	ip_mr_forward(net, mrt, skb, cache, local);
1778
1779	read_unlock(&mrt_lock);
1780
1781	if (local)
1782		return ip_local_deliver(skb);
1783
1784	return 0;
1785
1786dont_forward:
1787	if (local)
1788		return ip_local_deliver(skb);
1789	kfree_skb(skb);
1790	return 0;
1791}
1792
1793#ifdef CONFIG_IP_PIMSM
1794static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1795		     unsigned int pimlen)
1796{
1797	struct net_device *reg_dev = NULL;
1798	struct iphdr *encap;
1799
1800	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1801	/*
1802	   Check that:
1803	   a. packet is really destinted to a multicast group
1804	   b. packet is not a NULL-REGISTER
1805	   c. packet is not truncated
1806	 */
1807	if (!ipv4_is_multicast(encap->daddr) ||
1808	    encap->tot_len == 0 ||
1809	    ntohs(encap->tot_len) + pimlen > skb->len)
1810		return 1;
1811
1812	read_lock(&mrt_lock);
1813	if (mrt->mroute_reg_vif_num >= 0)
1814		reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1815	if (reg_dev)
1816		dev_hold(reg_dev);
1817	read_unlock(&mrt_lock);
1818
1819	if (reg_dev == NULL)
1820		return 1;
1821
1822	skb->mac_header = skb->network_header;
1823	skb_pull(skb, (u8*)encap - skb->data);
1824	skb_reset_network_header(skb);
1825	skb->protocol = htons(ETH_P_IP);
1826	skb->ip_summed = 0;
1827	skb->pkt_type = PACKET_HOST;
1828
1829	skb_tunnel_rx(skb, reg_dev);
1830
1831	netif_rx(skb);
1832	dev_put(reg_dev);
1833
1834	return 0;
1835}
1836#endif
1837
1838#ifdef CONFIG_IP_PIMSM_V1
1839/*
1840 * Handle IGMP messages of PIMv1
1841 */
1842
1843int pim_rcv_v1(struct sk_buff * skb)
1844{
1845	struct igmphdr *pim;
1846	struct net *net = dev_net(skb->dev);
1847	struct mr_table *mrt;
1848
1849	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1850		goto drop;
1851
1852	pim = igmp_hdr(skb);
1853
1854	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1855		goto drop;
1856
1857	if (!mrt->mroute_do_pim ||
1858	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1859		goto drop;
1860
1861	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1862drop:
1863		kfree_skb(skb);
1864	}
1865	return 0;
1866}
1867#endif
1868
1869#ifdef CONFIG_IP_PIMSM_V2
1870static int pim_rcv(struct sk_buff * skb)
1871{
1872	struct pimreghdr *pim;
1873	struct net *net = dev_net(skb->dev);
1874	struct mr_table *mrt;
1875
1876	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1877		goto drop;
1878
1879	pim = (struct pimreghdr *)skb_transport_header(skb);
1880	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1881	    (pim->flags&PIM_NULL_REGISTER) ||
1882	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1883	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1884		goto drop;
1885
1886	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1887		goto drop;
1888
1889	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1890drop:
1891		kfree_skb(skb);
1892	}
1893	return 0;
1894}
1895#endif
1896
1897static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1898			      struct mfc_cache *c, struct rtmsg *rtm)
1899{
1900	int ct;
1901	struct rtnexthop *nhp;
1902	u8 *b = skb_tail_pointer(skb);
1903	struct rtattr *mp_head;
1904
1905	/* If cache is unresolved, don't try to parse IIF and OIF */
1906	if (c->mfc_parent >= MAXVIFS)
1907		return -ENOENT;
1908
1909	if (VIF_EXISTS(mrt, c->mfc_parent))
1910		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1911
1912	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1913
1914	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1915		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1916			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1917				goto rtattr_failure;
1918			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1919			nhp->rtnh_flags = 0;
1920			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1921			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1922			nhp->rtnh_len = sizeof(*nhp);
1923		}
1924	}
1925	mp_head->rta_type = RTA_MULTIPATH;
1926	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1927	rtm->rtm_type = RTN_MULTICAST;
1928	return 1;
1929
1930rtattr_failure:
1931	nlmsg_trim(skb, b);
1932	return -EMSGSIZE;
1933}
1934
1935int ipmr_get_route(struct net *net,
1936		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1937{
1938	int err;
1939	struct mr_table *mrt;
1940	struct mfc_cache *cache;
1941	struct rtable *rt = skb_rtable(skb);
1942
1943	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1944	if (mrt == NULL)
1945		return -ENOENT;
1946
1947	read_lock(&mrt_lock);
1948	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1949
1950	if (cache == NULL) {
1951		struct sk_buff *skb2;
1952		struct iphdr *iph;
1953		struct net_device *dev;
1954		int vif;
1955
1956		if (nowait) {
1957			read_unlock(&mrt_lock);
1958			return -EAGAIN;
1959		}
1960
1961		dev = skb->dev;
1962		if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1963			read_unlock(&mrt_lock);
1964			return -ENODEV;
1965		}
1966		skb2 = skb_clone(skb, GFP_ATOMIC);
1967		if (!skb2) {
1968			read_unlock(&mrt_lock);
1969			return -ENOMEM;
1970		}
1971
1972		skb_push(skb2, sizeof(struct iphdr));
1973		skb_reset_network_header(skb2);
1974		iph = ip_hdr(skb2);
1975		iph->ihl = sizeof(struct iphdr) >> 2;
1976		iph->saddr = rt->rt_src;
1977		iph->daddr = rt->rt_dst;
1978		iph->version = 0;
1979		err = ipmr_cache_unresolved(mrt, vif, skb2);
1980		read_unlock(&mrt_lock);
1981		return err;
1982	}
1983
1984	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1985		cache->mfc_flags |= MFC_NOTIFY;
1986	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1987	read_unlock(&mrt_lock);
1988	return err;
1989}
1990
1991static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1992			    u32 pid, u32 seq, struct mfc_cache *c)
1993{
1994	struct nlmsghdr *nlh;
1995	struct rtmsg *rtm;
1996
1997	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
1998	if (nlh == NULL)
1999		return -EMSGSIZE;
2000
2001	rtm = nlmsg_data(nlh);
2002	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2003	rtm->rtm_dst_len  = 32;
2004	rtm->rtm_src_len  = 32;
2005	rtm->rtm_tos      = 0;
2006	rtm->rtm_table    = mrt->id;
2007	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2008	rtm->rtm_type     = RTN_MULTICAST;
2009	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2010	rtm->rtm_protocol = RTPROT_UNSPEC;
2011	rtm->rtm_flags    = 0;
2012
2013	NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2014	NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2015
2016	if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2017		goto nla_put_failure;
2018
2019	return nlmsg_end(skb, nlh);
2020
2021nla_put_failure:
2022	nlmsg_cancel(skb, nlh);
2023	return -EMSGSIZE;
2024}
2025
2026static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2027{
2028	struct net *net = sock_net(skb->sk);
2029	struct mr_table *mrt;
2030	struct mfc_cache *mfc;
2031	unsigned int t = 0, s_t;
2032	unsigned int h = 0, s_h;
2033	unsigned int e = 0, s_e;
2034
2035	s_t = cb->args[0];
2036	s_h = cb->args[1];
2037	s_e = cb->args[2];
2038
2039	read_lock(&mrt_lock);
2040	ipmr_for_each_table(mrt, net) {
2041		if (t < s_t)
2042			goto next_table;
2043		if (t > s_t)
2044			s_h = 0;
2045		for (h = s_h; h < MFC_LINES; h++) {
2046			list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2047				if (e < s_e)
2048					goto next_entry;
2049				if (ipmr_fill_mroute(mrt, skb,
2050						     NETLINK_CB(cb->skb).pid,
2051						     cb->nlh->nlmsg_seq,
2052						     mfc) < 0)
2053					goto done;
2054next_entry:
2055				e++;
2056			}
2057			e = s_e = 0;
2058		}
2059		s_h = 0;
2060next_table:
2061		t++;
2062	}
2063done:
2064	read_unlock(&mrt_lock);
2065
2066	cb->args[2] = e;
2067	cb->args[1] = h;
2068	cb->args[0] = t;
2069
2070	return skb->len;
2071}
2072
2073#ifdef CONFIG_PROC_FS
2074/*
2075 *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2076 */
2077struct ipmr_vif_iter {
2078	struct seq_net_private p;
2079	struct mr_table *mrt;
2080	int ct;
2081};
2082
2083static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2084					   struct ipmr_vif_iter *iter,
2085					   loff_t pos)
2086{
2087	struct mr_table *mrt = iter->mrt;
2088
2089	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2090		if (!VIF_EXISTS(mrt, iter->ct))
2091			continue;
2092		if (pos-- == 0)
2093			return &mrt->vif_table[iter->ct];
2094	}
2095	return NULL;
2096}
2097
2098static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2099	__acquires(mrt_lock)
2100{
2101	struct ipmr_vif_iter *iter = seq->private;
2102	struct net *net = seq_file_net(seq);
2103	struct mr_table *mrt;
2104
2105	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2106	if (mrt == NULL)
2107		return ERR_PTR(-ENOENT);
2108
2109	iter->mrt = mrt;
2110
2111	read_lock(&mrt_lock);
2112	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2113		: SEQ_START_TOKEN;
2114}
2115
2116static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2117{
2118	struct ipmr_vif_iter *iter = seq->private;
2119	struct net *net = seq_file_net(seq);
2120	struct mr_table *mrt = iter->mrt;
2121
2122	++*pos;
2123	if (v == SEQ_START_TOKEN)
2124		return ipmr_vif_seq_idx(net, iter, 0);
2125
2126	while (++iter->ct < mrt->maxvif) {
2127		if (!VIF_EXISTS(mrt, iter->ct))
2128			continue;
2129		return &mrt->vif_table[iter->ct];
2130	}
2131	return NULL;
2132}
2133
2134static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2135	__releases(mrt_lock)
2136{
2137	read_unlock(&mrt_lock);
2138}
2139
2140static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2141{
2142	struct ipmr_vif_iter *iter = seq->private;
2143	struct mr_table *mrt = iter->mrt;
2144
2145	if (v == SEQ_START_TOKEN) {
2146		seq_puts(seq,
2147			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
2148	} else {
2149		const struct vif_device *vif = v;
2150		const char *name =  vif->dev ? vif->dev->name : "none";
2151
2152		seq_printf(seq,
2153			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
2154			   vif - mrt->vif_table,
2155			   name, vif->bytes_in, vif->pkt_in,
2156			   vif->bytes_out, vif->pkt_out,
2157			   vif->flags, vif->local, vif->remote);
2158	}
2159	return 0;
2160}
2161
2162static const struct seq_operations ipmr_vif_seq_ops = {
2163	.start = ipmr_vif_seq_start,
2164	.next  = ipmr_vif_seq_next,
2165	.stop  = ipmr_vif_seq_stop,
2166	.show  = ipmr_vif_seq_show,
2167};
2168
2169static int ipmr_vif_open(struct inode *inode, struct file *file)
2170{
2171	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2172			    sizeof(struct ipmr_vif_iter));
2173}
2174
2175static const struct file_operations ipmr_vif_fops = {
2176	.owner	 = THIS_MODULE,
2177	.open    = ipmr_vif_open,
2178	.read    = seq_read,
2179	.llseek  = seq_lseek,
2180	.release = seq_release_net,
2181};
2182
2183struct ipmr_mfc_iter {
2184	struct seq_net_private p;
2185	struct mr_table *mrt;
2186	struct list_head *cache;
2187	int ct;
2188};
2189
2190
2191static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2192					  struct ipmr_mfc_iter *it, loff_t pos)
2193{
2194	struct mr_table *mrt = it->mrt;
2195	struct mfc_cache *mfc;
2196
2197	read_lock(&mrt_lock);
2198	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2199		it->cache = &mrt->mfc_cache_array[it->ct];
2200		list_for_each_entry(mfc, it->cache, list)
2201			if (pos-- == 0)
2202				return mfc;
2203	}
2204	read_unlock(&mrt_lock);
2205
2206	spin_lock_bh(&mfc_unres_lock);
2207	it->cache = &mrt->mfc_unres_queue;
2208	list_for_each_entry(mfc, it->cache, list)
2209		if (pos-- == 0)
2210			return mfc;
2211	spin_unlock_bh(&mfc_unres_lock);
2212
2213	it->cache = NULL;
2214	return NULL;
2215}
2216
2217
2218static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2219{
2220	struct ipmr_mfc_iter *it = seq->private;
2221	struct net *net = seq_file_net(seq);
2222	struct mr_table *mrt;
2223
2224	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2225	if (mrt == NULL)
2226		return ERR_PTR(-ENOENT);
2227
2228	it->mrt = mrt;
2229	it->cache = NULL;
2230	it->ct = 0;
2231	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2232		: SEQ_START_TOKEN;
2233}
2234
2235static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2236{
2237	struct mfc_cache *mfc = v;
2238	struct ipmr_mfc_iter *it = seq->private;
2239	struct net *net = seq_file_net(seq);
2240	struct mr_table *mrt = it->mrt;
2241
2242	++*pos;
2243
2244	if (v == SEQ_START_TOKEN)
2245		return ipmr_mfc_seq_idx(net, seq->private, 0);
2246
2247	if (mfc->list.next != it->cache)
2248		return list_entry(mfc->list.next, struct mfc_cache, list);
2249
2250	if (it->cache == &mrt->mfc_unres_queue)
2251		goto end_of_list;
2252
2253	BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2254
2255	while (++it->ct < MFC_LINES) {
2256		it->cache = &mrt->mfc_cache_array[it->ct];
2257		if (list_empty(it->cache))
2258			continue;
2259		return list_first_entry(it->cache, struct mfc_cache, list);
2260	}
2261
2262	/* exhausted cache_array, show unresolved */
2263	read_unlock(&mrt_lock);
2264	it->cache = &mrt->mfc_unres_queue;
2265	it->ct = 0;
2266
2267	spin_lock_bh(&mfc_unres_lock);
2268	if (!list_empty(it->cache))
2269		return list_first_entry(it->cache, struct mfc_cache, list);
2270
2271 end_of_list:
2272	spin_unlock_bh(&mfc_unres_lock);
2273	it->cache = NULL;
2274
2275	return NULL;
2276}
2277
2278static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2279{
2280	struct ipmr_mfc_iter *it = seq->private;
2281	struct mr_table *mrt = it->mrt;
2282
2283	if (it->cache == &mrt->mfc_unres_queue)
2284		spin_unlock_bh(&mfc_unres_lock);
2285	else if (it->cache == &mrt->mfc_cache_array[it->ct])
2286		read_unlock(&mrt_lock);
2287}
2288
2289static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2290{
2291	int n;
2292
2293	if (v == SEQ_START_TOKEN) {
2294		seq_puts(seq,
2295		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
2296	} else {
2297		const struct mfc_cache *mfc = v;
2298		const struct ipmr_mfc_iter *it = seq->private;
2299		const struct mr_table *mrt = it->mrt;
2300
2301		seq_printf(seq, "%08X %08X %-3hd",
2302			   (__force u32) mfc->mfc_mcastgrp,
2303			   (__force u32) mfc->mfc_origin,
2304			   mfc->mfc_parent);
2305
2306		if (it->cache != &mrt->mfc_unres_queue) {
2307			seq_printf(seq, " %8lu %8lu %8lu",
2308				   mfc->mfc_un.res.pkt,
2309				   mfc->mfc_un.res.bytes,
2310				   mfc->mfc_un.res.wrong_if);
2311			for (n = mfc->mfc_un.res.minvif;
2312			     n < mfc->mfc_un.res.maxvif; n++ ) {
2313				if (VIF_EXISTS(mrt, n) &&
2314				    mfc->mfc_un.res.ttls[n] < 255)
2315					seq_printf(seq,
2316					   " %2d:%-3d",
2317					   n, mfc->mfc_un.res.ttls[n]);
2318			}
2319		} else {
2320			/* unresolved mfc_caches don't contain
2321			 * pkt, bytes and wrong_if values
2322			 */
2323			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2324		}
2325		seq_putc(seq, '\n');
2326	}
2327	return 0;
2328}
2329
2330static const struct seq_operations ipmr_mfc_seq_ops = {
2331	.start = ipmr_mfc_seq_start,
2332	.next  = ipmr_mfc_seq_next,
2333	.stop  = ipmr_mfc_seq_stop,
2334	.show  = ipmr_mfc_seq_show,
2335};
2336
2337static int ipmr_mfc_open(struct inode *inode, struct file *file)
2338{
2339	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2340			    sizeof(struct ipmr_mfc_iter));
2341}
2342
2343static const struct file_operations ipmr_mfc_fops = {
2344	.owner	 = THIS_MODULE,
2345	.open    = ipmr_mfc_open,
2346	.read    = seq_read,
2347	.llseek  = seq_lseek,
2348	.release = seq_release_net,
2349};
2350#endif
2351
2352#ifdef CONFIG_IP_PIMSM_V2
2353static const struct net_protocol pim_protocol = {
2354	.handler	=	pim_rcv,
2355	.netns_ok	=	1,
2356};
2357#endif
2358
2359
2360/*
2361 *	Setup for IP multicast routing
2362 */
2363static int __net_init ipmr_net_init(struct net *net)
2364{
2365	int err;
2366
2367	err = ipmr_rules_init(net);
2368	if (err < 0)
2369		goto fail;
2370
2371#ifdef CONFIG_PROC_FS
2372	err = -ENOMEM;
2373	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2374		goto proc_vif_fail;
2375	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2376		goto proc_cache_fail;
2377#endif
2378	return 0;
2379
2380#ifdef CONFIG_PROC_FS
2381proc_cache_fail:
2382	proc_net_remove(net, "ip_mr_vif");
2383proc_vif_fail:
2384	ipmr_rules_exit(net);
2385#endif
2386fail:
2387	return err;
2388}
2389
2390static void __net_exit ipmr_net_exit(struct net *net)
2391{
2392#ifdef CONFIG_PROC_FS
2393	proc_net_remove(net, "ip_mr_cache");
2394	proc_net_remove(net, "ip_mr_vif");
2395#endif
2396	ipmr_rules_exit(net);
2397}
2398
2399static struct pernet_operations ipmr_net_ops = {
2400	.init = ipmr_net_init,
2401	.exit = ipmr_net_exit,
2402};
2403
2404int __init ip_mr_init(void)
2405{
2406	int err;
2407
2408	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2409				       sizeof(struct mfc_cache),
2410				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2411				       NULL);
2412	if (!mrt_cachep)
2413		return -ENOMEM;
2414
2415	err = register_pernet_subsys(&ipmr_net_ops);
2416	if (err)
2417		goto reg_pernet_fail;
2418
2419	err = register_netdevice_notifier(&ip_mr_notifier);
2420	if (err)
2421		goto reg_notif_fail;
2422#ifdef CONFIG_IP_PIMSM_V2
2423	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2424		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2425		err = -EAGAIN;
2426		goto add_proto_fail;
2427	}
2428#endif
2429	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2430	return 0;
2431
2432#ifdef CONFIG_IP_PIMSM_V2
2433add_proto_fail:
2434	unregister_netdevice_notifier(&ip_mr_notifier);
2435#endif
2436reg_notif_fail:
2437	unregister_pernet_subsys(&ipmr_net_ops);
2438reg_pernet_fail:
2439	kmem_cache_destroy(mrt_cachep);
2440	return err;
2441}
2442