1/*
2 *	Linux NET3:	IP/IP protocol decoder.
3 *
4 *	Version: $Id: ipip.c,v 1.1.1.1 2007/08/03 18:53:51 Exp $
5 *
6 *	Authors:
7 *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8 *
9 *	Fixes:
10 *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11 *					a module taking up 2 pages).
12 *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 *					to keep ip_forward happy.
14 *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16 *              David Woodhouse :       Perform some basic ICMP handling.
17 *                                      IPIP Routing without decapsulation.
18 *              Carlos Picoto   :       GRE over IP support
19 *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 *					I do not want to merge them together.
21 *
22 *	This program is free software; you can redistribute it and/or
23 *	modify it under the terms of the GNU General Public License
24 *	as published by the Free Software Foundation; either version
25 *	2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31	The purpose of this driver is to provide an IP tunnel through
32	which you can tunnel network traffic transparently across subnets.
33
34	This was written by looking at Nick Holloway's dummy driver
35	Thanks for the great code!
36
37		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38
39	Minor tweaks:
40		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41		dev->hard_header/hard_header_len changed to use no headers.
42		Comments/bracketing tweaked.
43		Made the tunnels use dev->name not tunnel: when error reporting.
44		Added tx_dropped stat
45
46		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47
48	Reworked:
49		Changed to tunnel to destination gateway in addition to the
50			tunnel's pointopoint address
51		Almost completely rewritten
52		Note:  There is currently no firewall or ICMP handling done.
53
54		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55
56*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60	When the tunnel_xmit() function is called, the skb contains the
61	packet to be sent (plus a great deal of extra info), and dev
62	contains the tunnel device that _we_ are.
63
64	When we are passed a packet, we are expected to fill in the
65	source address with our source IP address.
66
67	What is the proper way to allocate, copy and free a buffer?
68	After you allocate it, it is a "0 length" chunk of memory
69	starting at zero.  If you want to add headers to the buffer
70	later, you'll have to call "skb_reserve(skb, amount)" with
71	the amount of memory you want reserved.  Then, you call
72	"skb_put(skb, amount)" with the amount of space you want in
73	the buffer.  skb_put() returns a pointer to the top (#0) of
74	that buffer.  skb->len is set to the amount of space you have
75	"allocated" with skb_put().  You can then write up to skb->len
76	bytes to that buffer.  If you need more, you can call skb_put()
77	again with the additional amount of space you need.  You can
78	find out how much more space you can allocate by calling
79	"skb_tailroom(skb)".
80	Now, to add header space, call "skb_push(skb, header_len)".
81	This creates space at the beginning of the buffer and returns
82	a pointer to this new space.  If later you need to strip a
83	header from a buffer, call "skb_pull(skb, header_len)".
84	skb_headroom() will return how much space is left at the top
85	of the buffer (before the main data).  Remember, this headroom
86	space must be reserved before the skb_put() function is called.
87	*/
88
89/*
90   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92   For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
95
96#include <linux/capability.h>
97#include <linux/module.h>
98#include <linux/types.h>
99#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
110#include <linux/if_ether.h>
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
118
119#define HASH_SIZE  16
120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121
122static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev);
125
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock);
135
136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
137{
138	unsigned h0 = HASH(remote);
139	unsigned h1 = HASH(local);
140	struct ip_tunnel *t;
141
142	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143		if (local == t->parms.iph.saddr &&
144		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145			return t;
146	}
147	for (t = tunnels_r[h0]; t; t = t->next) {
148		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149			return t;
150	}
151	for (t = tunnels_l[h1]; t; t = t->next) {
152		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153			return t;
154	}
155	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156		return t;
157	return NULL;
158}
159
160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
161{
162	__be32 remote = parms->iph.daddr;
163	__be32 local = parms->iph.saddr;
164	unsigned h = 0;
165	int prio = 0;
166
167	if (remote) {
168		prio |= 2;
169		h ^= HASH(remote);
170	}
171	if (local) {
172		prio |= 1;
173		h ^= HASH(local);
174	}
175	return &tunnels[prio][h];
176}
177
178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180	return __ipip_bucket(&t->parms);
181}
182
183static void ipip_tunnel_unlink(struct ip_tunnel *t)
184{
185	struct ip_tunnel **tp;
186
187	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188		if (t == *tp) {
189			write_lock_bh(&ipip_lock);
190			*tp = t->next;
191			write_unlock_bh(&ipip_lock);
192			break;
193		}
194	}
195}
196
197static void ipip_tunnel_link(struct ip_tunnel *t)
198{
199	struct ip_tunnel **tp = ipip_bucket(t);
200
201	t->next = *tp;
202	write_lock_bh(&ipip_lock);
203	*tp = t;
204	write_unlock_bh(&ipip_lock);
205}
206
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208{
209	__be32 remote = parms->iph.daddr;
210	__be32 local = parms->iph.saddr;
211	struct ip_tunnel *t, **tp, *nt;
212	struct net_device *dev;
213	char name[IFNAMSIZ];
214
215	for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
216		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217			return t;
218	}
219	if (!create)
220		return NULL;
221
222	if (parms->name[0])
223		strlcpy(name, parms->name, IFNAMSIZ);
224	else {
225		int i;
226		for (i=1; i<100; i++) {
227			sprintf(name, "tunl%d", i);
228			if (__dev_get_by_name(name) == NULL)
229				break;
230		}
231		if (i==100)
232			goto failed;
233	}
234
235	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
236	if (dev == NULL)
237		return NULL;
238
239	nt = netdev_priv(dev);
240	SET_MODULE_OWNER(dev);
241	dev->init = ipip_tunnel_init;
242	nt->parms = *parms;
243
244	if (register_netdevice(dev) < 0) {
245		free_netdev(dev);
246		goto failed;
247	}
248
249	dev_hold(dev);
250	ipip_tunnel_link(nt);
251	return nt;
252
253failed:
254	return NULL;
255}
256
257static void ipip_tunnel_uninit(struct net_device *dev)
258{
259	if (dev == ipip_fb_tunnel_dev) {
260		write_lock_bh(&ipip_lock);
261		tunnels_wc[0] = NULL;
262		write_unlock_bh(&ipip_lock);
263	} else
264		ipip_tunnel_unlink(netdev_priv(dev));
265	dev_put(dev);
266}
267
268static int ipip_err(struct sk_buff *skb, u32 info)
269{
270#ifndef I_WISH_WORLD_WERE_PERFECT
271
272/* It is not :-( All the routers (except for Linux) return only
273   8 bytes of packet payload. It means, that precise relaying of
274   ICMP in the real Internet is absolutely infeasible.
275 */
276	struct iphdr *iph = (struct iphdr*)skb->data;
277	const int type = icmp_hdr(skb)->type;
278	const int code = icmp_hdr(skb)->code;
279	struct ip_tunnel *t;
280	int err;
281
282	switch (type) {
283	default:
284	case ICMP_PARAMETERPROB:
285		return 0;
286
287	case ICMP_DEST_UNREACH:
288		switch (code) {
289		case ICMP_SR_FAILED:
290		case ICMP_PORT_UNREACH:
291			/* Impossible event. */
292			return 0;
293		case ICMP_FRAG_NEEDED:
294			/* Soft state for pmtu is maintained by IP core. */
295			return 0;
296		default:
297			/* All others are translated to HOST_UNREACH.
298			   rfc2003 contains "deep thoughts" about NET_UNREACH,
299			   I believe they are just ether pollution. --ANK
300			 */
301			break;
302		}
303		break;
304	case ICMP_TIME_EXCEEDED:
305		if (code != ICMP_EXC_TTL)
306			return 0;
307		break;
308	}
309
310	err = -ENOENT;
311
312	read_lock(&ipip_lock);
313	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
314	if (t == NULL || t->parms.iph.daddr == 0)
315		goto out;
316
317	err = 0;
318	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
319		goto out;
320
321	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
322		t->err_count++;
323	else
324		t->err_count = 1;
325	t->err_time = jiffies;
326out:
327	read_unlock(&ipip_lock);
328	return err;
329#else
330	struct iphdr *iph = (struct iphdr*)dp;
331	int hlen = iph->ihl<<2;
332	struct iphdr *eiph;
333	const int type = icmp_hdr(skb)->type;
334	const int code = icmp_hdr(skb)->code;
335	int rel_type = 0;
336	int rel_code = 0;
337	__be32 rel_info = 0;
338	__u32 n = 0;
339	struct sk_buff *skb2;
340	struct flowi fl;
341	struct rtable *rt;
342
343	if (len < hlen + sizeof(struct iphdr))
344		return 0;
345	eiph = (struct iphdr*)(dp + hlen);
346
347	switch (type) {
348	default:
349		return 0;
350	case ICMP_PARAMETERPROB:
351		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
352		if (n < hlen)
353			return 0;
354
355		/* So... This guy found something strange INSIDE encapsulated
356		   packet. Well, he is fool, but what can we do ?
357		 */
358		rel_type = ICMP_PARAMETERPROB;
359		rel_info = htonl((n - hlen) << 24);
360		break;
361
362	case ICMP_DEST_UNREACH:
363		switch (code) {
364		case ICMP_SR_FAILED:
365		case ICMP_PORT_UNREACH:
366			/* Impossible event. */
367			return 0;
368		case ICMP_FRAG_NEEDED:
369			/* And it is the only really necessary thing :-) */
370			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
371			if (n < hlen+68)
372				return 0;
373			n -= hlen;
374			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
375			if (n > ntohs(eiph->tot_len))
376				return 0;
377			rel_info = htonl(n);
378			break;
379		default:
380			/* All others are translated to HOST_UNREACH.
381			   rfc2003 contains "deep thoughts" about NET_UNREACH,
382			   I believe, it is just ether pollution. --ANK
383			 */
384			rel_type = ICMP_DEST_UNREACH;
385			rel_code = ICMP_HOST_UNREACH;
386			break;
387		}
388		break;
389	case ICMP_TIME_EXCEEDED:
390		if (code != ICMP_EXC_TTL)
391			return 0;
392		break;
393	}
394
395	/* Prepare fake skb to feed it to icmp_send */
396	skb2 = skb_clone(skb, GFP_ATOMIC);
397	if (skb2 == NULL)
398		return 0;
399	dst_release(skb2->dst);
400	skb2->dst = NULL;
401	skb_pull(skb2, skb->data - (u8*)eiph);
402	skb_reset_network_header(skb2);
403
404	/* Try to guess incoming interface */
405	memset(&fl, 0, sizeof(fl));
406	fl.fl4_daddr = eiph->saddr;
407	fl.fl4_tos = RT_TOS(eiph->tos);
408	fl.proto = IPPROTO_IPIP;
409	if (ip_route_output_key(&rt, &key)) {
410		kfree_skb(skb2);
411		return 0;
412	}
413	skb2->dev = rt->u.dst.dev;
414
415	/* route "incoming" packet */
416	if (rt->rt_flags&RTCF_LOCAL) {
417		ip_rt_put(rt);
418		rt = NULL;
419		fl.fl4_daddr = eiph->daddr;
420		fl.fl4_src = eiph->saddr;
421		fl.fl4_tos = eiph->tos;
422		if (ip_route_output_key(&rt, &fl) ||
423		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
424			ip_rt_put(rt);
425			kfree_skb(skb2);
426			return 0;
427		}
428	} else {
429		ip_rt_put(rt);
430		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
431		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
432			kfree_skb(skb2);
433			return 0;
434		}
435	}
436
437	/* change mtu on this route */
438	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
439		if (n > dst_mtu(skb2->dst)) {
440			kfree_skb(skb2);
441			return 0;
442		}
443		skb2->dst->ops->update_pmtu(skb2->dst, n);
444	} else if (type == ICMP_TIME_EXCEEDED) {
445		struct ip_tunnel *t = netdev_priv(skb2->dev);
446		if (t->parms.iph.ttl) {
447			rel_type = ICMP_DEST_UNREACH;
448			rel_code = ICMP_HOST_UNREACH;
449		}
450	}
451
452	icmp_send(skb2, rel_type, rel_code, rel_info);
453	kfree_skb(skb2);
454	return 0;
455#endif
456}
457
458static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
459					struct sk_buff *skb)
460{
461	struct iphdr *inner_iph = ip_hdr(skb);
462
463	if (INET_ECN_is_ce(outer_iph->tos))
464		IP_ECN_set_ce(inner_iph);
465}
466
467static int ipip_rcv(struct sk_buff *skb)
468{
469	struct ip_tunnel *tunnel;
470	const struct iphdr *iph = ip_hdr(skb);
471
472	read_lock(&ipip_lock);
473	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
474		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
475			read_unlock(&ipip_lock);
476			kfree_skb(skb);
477			return 0;
478		}
479
480		secpath_reset(skb);
481
482		skb->mac_header = skb->network_header;
483		skb_reset_network_header(skb);
484		skb->protocol = htons(ETH_P_IP);
485		skb->pkt_type = PACKET_HOST;
486
487		tunnel->stat.rx_packets++;
488		tunnel->stat.rx_bytes += skb->len;
489		skb->dev = tunnel->dev;
490		dst_release(skb->dst);
491		skb->dst = NULL;
492		nf_reset(skb);
493		ipip_ecn_decapsulate(iph, skb);
494		netif_rx(skb);
495		read_unlock(&ipip_lock);
496		return 0;
497	}
498	read_unlock(&ipip_lock);
499
500	return -1;
501}
502
503/*
504 *	This function assumes it is being called from dev_queue_xmit()
505 *	and that skb is filled properly by that function.
506 */
507
508static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
509{
510	struct ip_tunnel *tunnel = netdev_priv(dev);
511	struct net_device_stats *stats = &tunnel->stat;
512	struct iphdr  *tiph = &tunnel->parms.iph;
513	u8     tos = tunnel->parms.iph.tos;
514	__be16 df = tiph->frag_off;
515	struct rtable *rt;     			/* Route to the other host */
516	struct net_device *tdev;			/* Device to other host */
517	struct iphdr  *old_iph = ip_hdr(skb);
518	struct iphdr  *iph;			/* Our new IP header */
519	int    max_headroom;			/* The extra header space needed */
520	__be32 dst = tiph->daddr;
521	int    mtu;
522
523	if (tunnel->recursion++) {
524		tunnel->stat.collisions++;
525		goto tx_error;
526	}
527
528	if (skb->protocol != htons(ETH_P_IP))
529		goto tx_error;
530
531	if (tos&1)
532		tos = old_iph->tos;
533
534	if (!dst) {
535		/* NBMA tunnel */
536		if ((rt = (struct rtable*)skb->dst) == NULL) {
537			tunnel->stat.tx_fifo_errors++;
538			goto tx_error;
539		}
540		if ((dst = rt->rt_gateway) == 0)
541			goto tx_error_icmp;
542	}
543
544	{
545		struct flowi fl = { .oif = tunnel->parms.link,
546				    .nl_u = { .ip4_u =
547					      { .daddr = dst,
548						.saddr = tiph->saddr,
549						.tos = RT_TOS(tos) } },
550				    .proto = IPPROTO_IPIP };
551		if (ip_route_output_key(&rt, &fl)) {
552			tunnel->stat.tx_carrier_errors++;
553			goto tx_error_icmp;
554		}
555	}
556	tdev = rt->u.dst.dev;
557
558	if (tdev == dev) {
559		ip_rt_put(rt);
560		tunnel->stat.collisions++;
561		goto tx_error;
562	}
563
564	if (tiph->frag_off)
565		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
566	else
567		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
568
569	if (mtu < 68) {
570		tunnel->stat.collisions++;
571		ip_rt_put(rt);
572		goto tx_error;
573	}
574	if (skb->dst)
575		skb->dst->ops->update_pmtu(skb->dst, mtu);
576
577	df |= (old_iph->frag_off&htons(IP_DF));
578
579	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
580		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
581		ip_rt_put(rt);
582		goto tx_error;
583	}
584
585	if (tunnel->err_count > 0) {
586		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
587			tunnel->err_count--;
588			dst_link_failure(skb);
589		} else
590			tunnel->err_count = 0;
591	}
592
593	/*
594	 * Okay, now see if we can stuff it in the buffer as-is.
595	 */
596	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
597
598	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
599		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
600		if (!new_skb) {
601			ip_rt_put(rt);
602			stats->tx_dropped++;
603			dev_kfree_skb(skb);
604			tunnel->recursion--;
605			return 0;
606		}
607		if (skb->sk)
608			skb_set_owner_w(new_skb, skb->sk);
609		dev_kfree_skb(skb);
610		skb = new_skb;
611		old_iph = ip_hdr(skb);
612	}
613
614	skb->transport_header = skb->network_header;
615	skb_push(skb, sizeof(struct iphdr));
616	skb_reset_network_header(skb);
617	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
618	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
619			      IPSKB_REROUTED);
620	dst_release(skb->dst);
621	skb->dst = &rt->u.dst;
622
623	/*
624	 *	Push down and install the IPIP header.
625	 */
626
627	iph 			=	ip_hdr(skb);
628	iph->version		=	4;
629	iph->ihl		=	sizeof(struct iphdr)>>2;
630	iph->frag_off		=	df;
631	iph->protocol		=	IPPROTO_IPIP;
632	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
633	iph->daddr		=	rt->rt_dst;
634	iph->saddr		=	rt->rt_src;
635
636	if ((iph->ttl = tiph->ttl) == 0)
637		iph->ttl	=	old_iph->ttl;
638
639	nf_reset(skb);
640
641	IPTUNNEL_XMIT();
642	tunnel->recursion--;
643	return 0;
644
645tx_error_icmp:
646	dst_link_failure(skb);
647tx_error:
648	stats->tx_errors++;
649	dev_kfree_skb(skb);
650	tunnel->recursion--;
651	return 0;
652}
653
654static int
655ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
656{
657	int err = 0;
658	struct ip_tunnel_parm p;
659	struct ip_tunnel *t;
660
661	switch (cmd) {
662	case SIOCGETTUNNEL:
663		t = NULL;
664		if (dev == ipip_fb_tunnel_dev) {
665			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
666				err = -EFAULT;
667				break;
668			}
669			t = ipip_tunnel_locate(&p, 0);
670		}
671		if (t == NULL)
672			t = netdev_priv(dev);
673		memcpy(&p, &t->parms, sizeof(p));
674		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
675			err = -EFAULT;
676		break;
677
678	case SIOCADDTUNNEL:
679	case SIOCCHGTUNNEL:
680		err = -EPERM;
681		if (!capable(CAP_NET_ADMIN))
682			goto done;
683
684		err = -EFAULT;
685		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
686			goto done;
687
688		err = -EINVAL;
689		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
690		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
691			goto done;
692		if (p.iph.ttl)
693			p.iph.frag_off |= htons(IP_DF);
694
695		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
696
697		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
698			if (t != NULL) {
699				if (t->dev != dev) {
700					err = -EEXIST;
701					break;
702				}
703			} else {
704				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
705				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
706					err = -EINVAL;
707					break;
708				}
709				t = netdev_priv(dev);
710				ipip_tunnel_unlink(t);
711				t->parms.iph.saddr = p.iph.saddr;
712				t->parms.iph.daddr = p.iph.daddr;
713				memcpy(dev->dev_addr, &p.iph.saddr, 4);
714				memcpy(dev->broadcast, &p.iph.daddr, 4);
715				ipip_tunnel_link(t);
716				netdev_state_change(dev);
717			}
718		}
719
720		if (t) {
721			err = 0;
722			if (cmd == SIOCCHGTUNNEL) {
723				t->parms.iph.ttl = p.iph.ttl;
724				t->parms.iph.tos = p.iph.tos;
725				t->parms.iph.frag_off = p.iph.frag_off;
726			}
727			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
728				err = -EFAULT;
729		} else
730			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
731		break;
732
733	case SIOCDELTUNNEL:
734		err = -EPERM;
735		if (!capable(CAP_NET_ADMIN))
736			goto done;
737
738		if (dev == ipip_fb_tunnel_dev) {
739			err = -EFAULT;
740			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
741				goto done;
742			err = -ENOENT;
743			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
744				goto done;
745			err = -EPERM;
746			if (t->dev == ipip_fb_tunnel_dev)
747				goto done;
748			dev = t->dev;
749		}
750		unregister_netdevice(dev);
751		err = 0;
752		break;
753
754	default:
755		err = -EINVAL;
756	}
757
758done:
759	return err;
760}
761
762static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
763{
764	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
765}
766
767static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
768{
769	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
770		return -EINVAL;
771	dev->mtu = new_mtu;
772	return 0;
773}
774
775static void ipip_tunnel_setup(struct net_device *dev)
776{
777	SET_MODULE_OWNER(dev);
778	dev->uninit		= ipip_tunnel_uninit;
779	dev->hard_start_xmit	= ipip_tunnel_xmit;
780	dev->get_stats		= ipip_tunnel_get_stats;
781	dev->do_ioctl		= ipip_tunnel_ioctl;
782	dev->change_mtu		= ipip_tunnel_change_mtu;
783	dev->destructor		= free_netdev;
784
785	dev->type		= ARPHRD_TUNNEL;
786	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
787	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
788	dev->flags		= IFF_NOARP;
789	dev->iflink		= 0;
790	dev->addr_len		= 4;
791}
792
793static int ipip_tunnel_init(struct net_device *dev)
794{
795	struct net_device *tdev = NULL;
796	struct ip_tunnel *tunnel;
797	struct iphdr *iph;
798
799	tunnel = netdev_priv(dev);
800	iph = &tunnel->parms.iph;
801
802	tunnel->dev = dev;
803	strcpy(tunnel->parms.name, dev->name);
804
805	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
806	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
807
808	if (iph->daddr) {
809		struct flowi fl = { .oif = tunnel->parms.link,
810				    .nl_u = { .ip4_u =
811					      { .daddr = iph->daddr,
812						.saddr = iph->saddr,
813						.tos = RT_TOS(iph->tos) } },
814				    .proto = IPPROTO_IPIP };
815		struct rtable *rt;
816		if (!ip_route_output_key(&rt, &fl)) {
817			tdev = rt->u.dst.dev;
818			ip_rt_put(rt);
819		}
820		dev->flags |= IFF_POINTOPOINT;
821	}
822
823	if (!tdev && tunnel->parms.link)
824		tdev = __dev_get_by_index(tunnel->parms.link);
825
826	if (tdev) {
827		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
828		dev->mtu = tdev->mtu - sizeof(struct iphdr);
829	}
830	dev->iflink = tunnel->parms.link;
831
832	return 0;
833}
834
835static int __init ipip_fb_tunnel_init(struct net_device *dev)
836{
837	struct ip_tunnel *tunnel = netdev_priv(dev);
838	struct iphdr *iph = &tunnel->parms.iph;
839
840	tunnel->dev = dev;
841	strcpy(tunnel->parms.name, dev->name);
842
843	iph->version		= 4;
844	iph->protocol		= IPPROTO_IPIP;
845	iph->ihl		= 5;
846
847	dev_hold(dev);
848	tunnels_wc[0]		= tunnel;
849	return 0;
850}
851
852static struct xfrm_tunnel ipip_handler = {
853	.handler	=	ipip_rcv,
854	.err_handler	=	ipip_err,
855	.priority	=	1,
856};
857
858static char banner[] __initdata =
859	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
860
861static int __init ipip_init(void)
862{
863	int err;
864
865	printk(banner);
866
867	if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
868		printk(KERN_INFO "ipip init: can't register tunnel\n");
869		return -EAGAIN;
870	}
871
872	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
873					   "tunl0",
874					   ipip_tunnel_setup);
875	if (!ipip_fb_tunnel_dev) {
876		err = -ENOMEM;
877		goto err1;
878	}
879
880	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
881
882	if ((err = register_netdev(ipip_fb_tunnel_dev)))
883		goto err2;
884 out:
885	return err;
886 err2:
887	free_netdev(ipip_fb_tunnel_dev);
888 err1:
889	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
890	goto out;
891}
892
893static void __exit ipip_destroy_tunnels(void)
894{
895	int prio;
896
897	for (prio = 1; prio < 4; prio++) {
898		int h;
899		for (h = 0; h < HASH_SIZE; h++) {
900			struct ip_tunnel *t;
901			while ((t = tunnels[prio][h]) != NULL)
902				unregister_netdevice(t->dev);
903		}
904	}
905}
906
907static void __exit ipip_fini(void)
908{
909	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
910		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
911
912	rtnl_lock();
913	ipip_destroy_tunnels();
914	unregister_netdevice(ipip_fb_tunnel_dev);
915	rtnl_unlock();
916}
917
918module_init(ipip_init);
919module_exit(ipip_fini);
920MODULE_LICENSE("GPL");
921