• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/
1/*
2 *	Linux NET3:	GRE over IP protocol decoder.
3 *
4 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 *	This program is free software; you can redistribute it and/or
7 *	modify it under the terms of the GNU General Public License
8 *	as published by the Free Software Foundation; either version
9 *	2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/capability.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/slab.h>
18#include <asm/uaccess.h>
19#include <linux/skbuff.h>
20#include <linux/netdevice.h>
21#include <linux/in.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/if_arp.h>
25#include <linux/mroute.h>
26#include <linux/init.h>
27#include <linux/in6.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h>
31#include <linux/etherdevice.h>
32#include <linux/if_ether.h>
33
34#include <net/sock.h>
35#include <net/ip.h>
36#include <net/icmp.h>
37#include <net/protocol.h>
38#include <net/ipip.h>
39#include <net/arp.h>
40#include <net/checksum.h>
41#include <net/dsfield.h>
42#include <net/inet_ecn.h>
43#include <net/xfrm.h>
44#include <net/net_namespace.h>
45#include <net/netns/generic.h>
46#include <net/rtnetlink.h>
47
48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#endif
53
54/*
55   Problems & solutions
56   --------------------
57
58   1. The most important issue is detecting local dead loops.
59   They would cause complete host lockup in transmit, which
60   would be "resolved" by stack overflow or, if queueing is enabled,
61   with infinite looping in net_bh.
62
63   We cannot track such dead loops during route installation,
64   it is infeasible task. The most general solutions would be
65   to keep skb->encapsulation counter (sort of local ttl),
66   and silently drop packet when it expires. It is the best
67   solution, but it supposes maintaing new variable in ALL
68   skb, even if no tunneling is used.
69
70   Current solution: HARD_TX_LOCK lock breaks dead loops.
71
72
73
74   2. Networking dead loops would not kill routers, but would really
75   kill network. IP hop limit plays role of "t->recursion" in this case,
76   if we copy it from packet being encapsulated to upper header.
77   It is very good solution, but it introduces two problems:
78
79   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80     do not work over tunnels.
81   - traceroute does not work. I planned to relay ICMP from tunnel,
82     so that this problem would be solved and traceroute output
83     would even more informative. This idea appeared to be wrong:
84     only Linux complies to rfc1812 now (yes, guys, Linux is the only
85     true router now :-)), all routers (at least, in neighbourhood of mine)
86     return only 8 bytes of payload. It is the end.
87
88   Hence, if we want that OSPF worked or traceroute said something reasonable,
89   we should search for another solution.
90
91   One of them is to parse packet trying to detect inner encapsulation
92   made by our node. It is difficult or even impossible, especially,
93   taking into account fragmentation. TO be short, tt is not solution at all.
94
95   Current solution: The solution was UNEXPECTEDLY SIMPLE.
96   We force DF flag on tunnels with preconfigured hop limit,
97   that is ALL. :-) Well, it does not remove the problem completely,
98   but exponential growth of network traffic is changed to linear
99   (branches, that exceed pmtu are pruned) and tunnel mtu
100   fastly degrades to value <68, where looping stops.
101   Yes, it is not good if there exists a router in the loop,
102   which does not force DF, even when encapsulating packets have DF set.
103   But it is not our problem! Nobody could accuse us, we made
104   all that we could make. Even if it is your gated who injected
105   fatal route to network, even if it were you who configured
106   fatal static route: you are innocent. :-)
107
108
109
110   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111   practically identical code. It would be good to glue them
112   together, but it is not very evident, how to make them modular.
113   sit is integral part of IPv6, ipip and gre are naturally modular.
114   We could extract common parts (hash table, ioctl etc)
115   to a separate module (ip_tunnel.c).
116
117   Alexey Kuznetsov.
118 */
119
120static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121static int ipgre_tunnel_init(struct net_device *dev);
122static void ipgre_tunnel_setup(struct net_device *dev);
123static int ipgre_tunnel_bind_dev(struct net_device *dev);
124
125/* Fallback tunnel: no source, no destination, no key, no options */
126
127#define HASH_SIZE  16
128
129static int ipgre_net_id __read_mostly;
130struct ipgre_net {
131	struct ip_tunnel *tunnels[4][HASH_SIZE];
132
133	struct net_device *fb_tunnel_dev;
134};
135
136/* Tunnel hash table */
137
138/*
139   4 hash tables:
140
141   3: (remote,local)
142   2: (remote,*)
143   1: (*,local)
144   0: (*,*)
145
146   We require exact key match i.e. if a key is present in packet
147   it will match only tunnel with the same key; if it is not present,
148   it will match only keyless tunnel.
149
150   All keysless packets, if not matched configured keyless tunnels
151   will match fallback tunnel.
152 */
153
154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155
156#define tunnels_r_l	tunnels[3]
157#define tunnels_r	tunnels[2]
158#define tunnels_l	tunnels[1]
159#define tunnels_wc	tunnels[0]
160/*
161 * Locking : hash tables are protected by RCU and a spinlock
162 */
163static DEFINE_SPINLOCK(ipgre_lock);
164
165#define for_each_ip_tunnel_rcu(start) \
166	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167
168/* Given src, dst and key, find appropriate for input tunnel. */
169
170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
171					      __be32 remote, __be32 local,
172					      __be32 key, __be16 gre_proto)
173{
174	struct net *net = dev_net(dev);
175	int link = dev->ifindex;
176	unsigned h0 = HASH(remote);
177	unsigned h1 = HASH(key);
178	struct ip_tunnel *t, *cand = NULL;
179	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181		       ARPHRD_ETHER : ARPHRD_IPGRE;
182	int score, cand_score = 4;
183
184	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185		if (local != t->parms.iph.saddr ||
186		    remote != t->parms.iph.daddr ||
187		    key != t->parms.i_key ||
188		    !(t->dev->flags & IFF_UP))
189			continue;
190
191		if (t->dev->type != ARPHRD_IPGRE &&
192		    t->dev->type != dev_type)
193			continue;
194
195		score = 0;
196		if (t->parms.link != link)
197			score |= 1;
198		if (t->dev->type != dev_type)
199			score |= 2;
200		if (score == 0)
201			return t;
202
203		if (score < cand_score) {
204			cand = t;
205			cand_score = score;
206		}
207	}
208
209	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210		if (remote != t->parms.iph.daddr ||
211		    key != t->parms.i_key ||
212		    !(t->dev->flags & IFF_UP))
213			continue;
214
215		if (t->dev->type != ARPHRD_IPGRE &&
216		    t->dev->type != dev_type)
217			continue;
218
219		score = 0;
220		if (t->parms.link != link)
221			score |= 1;
222		if (t->dev->type != dev_type)
223			score |= 2;
224		if (score == 0)
225			return t;
226
227		if (score < cand_score) {
228			cand = t;
229			cand_score = score;
230		}
231	}
232
233	for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234		if ((local != t->parms.iph.saddr &&
235		     (local != t->parms.iph.daddr ||
236		      !ipv4_is_multicast(local))) ||
237		    key != t->parms.i_key ||
238		    !(t->dev->flags & IFF_UP))
239			continue;
240
241		if (t->dev->type != ARPHRD_IPGRE &&
242		    t->dev->type != dev_type)
243			continue;
244
245		score = 0;
246		if (t->parms.link != link)
247			score |= 1;
248		if (t->dev->type != dev_type)
249			score |= 2;
250		if (score == 0)
251			return t;
252
253		if (score < cand_score) {
254			cand = t;
255			cand_score = score;
256		}
257	}
258
259	for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260		if (t->parms.i_key != key ||
261		    !(t->dev->flags & IFF_UP))
262			continue;
263
264		if (t->dev->type != ARPHRD_IPGRE &&
265		    t->dev->type != dev_type)
266			continue;
267
268		score = 0;
269		if (t->parms.link != link)
270			score |= 1;
271		if (t->dev->type != dev_type)
272			score |= 2;
273		if (score == 0)
274			return t;
275
276		if (score < cand_score) {
277			cand = t;
278			cand_score = score;
279		}
280	}
281
282	if (cand != NULL)
283		return cand;
284
285	dev = ign->fb_tunnel_dev;
286	if (dev->flags & IFF_UP)
287		return netdev_priv(dev);
288
289	return NULL;
290}
291
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
293		struct ip_tunnel_parm *parms)
294{
295	__be32 remote = parms->iph.daddr;
296	__be32 local = parms->iph.saddr;
297	__be32 key = parms->i_key;
298	unsigned h = HASH(key);
299	int prio = 0;
300
301	if (local)
302		prio |= 1;
303	if (remote && !ipv4_is_multicast(remote)) {
304		prio |= 2;
305		h ^= HASH(remote);
306	}
307
308	return &ign->tunnels[prio][h];
309}
310
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
312		struct ip_tunnel *t)
313{
314	return __ipgre_bucket(ign, &t->parms);
315}
316
317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318{
319	struct ip_tunnel **tp = ipgre_bucket(ign, t);
320
321	spin_lock_bh(&ipgre_lock);
322	t->next = *tp;
323	rcu_assign_pointer(*tp, t);
324	spin_unlock_bh(&ipgre_lock);
325}
326
327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328{
329	struct ip_tunnel **tp;
330
331	for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
332		if (t == *tp) {
333			spin_lock_bh(&ipgre_lock);
334			*tp = t->next;
335			spin_unlock_bh(&ipgre_lock);
336			break;
337		}
338	}
339}
340
341static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
342					   struct ip_tunnel_parm *parms,
343					   int type)
344{
345	__be32 remote = parms->iph.daddr;
346	__be32 local = parms->iph.saddr;
347	__be32 key = parms->i_key;
348	int link = parms->link;
349	struct ip_tunnel *t, **tp;
350	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351
352	for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
353		if (local == t->parms.iph.saddr &&
354		    remote == t->parms.iph.daddr &&
355		    key == t->parms.i_key &&
356		    link == t->parms.link &&
357		    type == t->dev->type)
358			break;
359
360	return t;
361}
362
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
364		struct ip_tunnel_parm *parms, int create)
365{
366	struct ip_tunnel *t, *nt;
367	struct net_device *dev;
368	char name[IFNAMSIZ];
369	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
370
371	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
372	if (t || !create)
373		return t;
374
375	if (parms->name[0])
376		strlcpy(name, parms->name, IFNAMSIZ);
377	else
378		sprintf(name, "gre%%d");
379
380	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
381	if (!dev)
382	  return NULL;
383
384	dev_net_set(dev, net);
385
386	if (strchr(name, '%')) {
387		if (dev_alloc_name(dev, name) < 0)
388			goto failed_free;
389	}
390
391	nt = netdev_priv(dev);
392	nt->parms = *parms;
393	dev->rtnl_link_ops = &ipgre_link_ops;
394
395	dev->mtu = ipgre_tunnel_bind_dev(dev);
396
397	if (register_netdevice(dev) < 0)
398		goto failed_free;
399
400	dev_hold(dev);
401	ipgre_tunnel_link(ign, nt);
402	return nt;
403
404failed_free:
405	free_netdev(dev);
406	return NULL;
407}
408
409static void ipgre_tunnel_uninit(struct net_device *dev)
410{
411	struct net *net = dev_net(dev);
412	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
413
414	ipgre_tunnel_unlink(ign, netdev_priv(dev));
415	dev_put(dev);
416}
417
418
419static void ipgre_err(struct sk_buff *skb, u32 info)
420{
421
422/* All the routers (except for Linux) return only
423   8 bytes of packet payload. It means, that precise relaying of
424   ICMP in the real Internet is absolutely infeasible.
425
426   Moreover, Cisco "wise men" put GRE key to the third word
427   in GRE header. It makes impossible maintaining even soft state for keyed
428   GRE tunnels with enabled checksum. Tell them "thank you".
429
430   Well, I wonder, rfc1812 was written by Cisco employee,
431   what the hell these idiots break standrads established
432   by themself???
433 */
434
435	struct iphdr *iph = (struct iphdr *)skb->data;
436	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
437	int grehlen = (iph->ihl<<2) + 4;
438	const int type = icmp_hdr(skb)->type;
439	const int code = icmp_hdr(skb)->code;
440	struct ip_tunnel *t;
441	__be16 flags;
442
443	flags = p[0];
444	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
445		if (flags&(GRE_VERSION|GRE_ROUTING))
446			return;
447		if (flags&GRE_KEY) {
448			grehlen += 4;
449			if (flags&GRE_CSUM)
450				grehlen += 4;
451		}
452	}
453
454	/* If only 8 bytes returned, keyed message will be dropped here */
455	if (skb_headlen(skb) < grehlen)
456		return;
457
458	switch (type) {
459	default:
460	case ICMP_PARAMETERPROB:
461		return;
462
463	case ICMP_DEST_UNREACH:
464		switch (code) {
465		case ICMP_SR_FAILED:
466		case ICMP_PORT_UNREACH:
467			/* Impossible event. */
468			return;
469		case ICMP_FRAG_NEEDED:
470			/* Soft state for pmtu is maintained by IP core. */
471			return;
472		default:
473			/* All others are translated to HOST_UNREACH.
474			   rfc2003 contains "deep thoughts" about NET_UNREACH,
475			   I believe they are just ether pollution. --ANK
476			 */
477			break;
478		}
479		break;
480	case ICMP_TIME_EXCEEDED:
481		if (code != ICMP_EXC_TTL)
482			return;
483		break;
484	}
485
486	rcu_read_lock();
487	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
488				flags & GRE_KEY ?
489				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
490				p[1]);
491	if (t == NULL || t->parms.iph.daddr == 0 ||
492	    ipv4_is_multicast(t->parms.iph.daddr))
493		goto out;
494
495	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
496		goto out;
497
498	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
499		t->err_count++;
500	else
501		t->err_count = 1;
502	t->err_time = jiffies;
503out:
504	rcu_read_unlock();
505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509	if (INET_ECN_is_ce(iph->tos)) {
510		if (skb->protocol == htons(ETH_P_IP)) {
511			IP_ECN_set_ce(ip_hdr(skb));
512		} else if (skb->protocol == htons(ETH_P_IPV6)) {
513			IP6_ECN_set_ce(ipv6_hdr(skb));
514		}
515	}
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521	u8 inner = 0;
522	if (skb->protocol == htons(ETH_P_IP))
523		inner = old_iph->tos;
524	else if (skb->protocol == htons(ETH_P_IPV6))
525		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526	return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531	struct iphdr *iph;
532	u8     *h;
533	__be16    flags;
534	__sum16   csum = 0;
535	__be32 key = 0;
536	u32    seqno = 0;
537	struct ip_tunnel *tunnel;
538	int    offset = 4;
539	__be16 gre_proto;
540
541	if (!pskb_may_pull(skb, 16))
542		goto drop_nolock;
543
544	iph = ip_hdr(skb);
545	h = skb->data;
546	flags = *(__be16*)h;
547
548	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
549		/* - Version must be 0.
550		   - We do not support routing headers.
551		 */
552		if (flags&(GRE_VERSION|GRE_ROUTING))
553			goto drop_nolock;
554
555		if (flags&GRE_CSUM) {
556			switch (skb->ip_summed) {
557			case CHECKSUM_COMPLETE:
558				csum = csum_fold(skb->csum);
559				if (!csum)
560					break;
561				/* fall through */
562			case CHECKSUM_NONE:
563				skb->csum = 0;
564				csum = __skb_checksum_complete(skb);
565				skb->ip_summed = CHECKSUM_COMPLETE;
566			}
567			offset += 4;
568		}
569		if (flags&GRE_KEY) {
570			key = *(__be32*)(h + offset);
571			offset += 4;
572		}
573		if (flags&GRE_SEQ) {
574			seqno = ntohl(*(__be32*)(h + offset));
575			offset += 4;
576		}
577	}
578
579	gre_proto = *(__be16 *)(h + 2);
580
581	rcu_read_lock();
582	if ((tunnel = ipgre_tunnel_lookup(skb->dev,
583					  iph->saddr, iph->daddr, key,
584					  gre_proto))) {
585		struct net_device_stats *stats = &tunnel->dev->stats;
586
587		secpath_reset(skb);
588
589		skb->protocol = gre_proto;
590		/* WCCP version 1 and 2 protocol decoding.
591		 * - Change protocol to IP
592		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
593		 */
594		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
595			skb->protocol = htons(ETH_P_IP);
596			if ((*(h + offset) & 0xF0) != 0x40)
597				offset += 4;
598		}
599
600		skb->mac_header = skb->network_header;
601		__pskb_pull(skb, offset);
602		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
603		skb->pkt_type = PACKET_HOST;
604#ifdef CONFIG_NET_IPGRE_BROADCAST
605		if (ipv4_is_multicast(iph->daddr)) {
606			/* Looped back packet, drop it! */
607			if (skb_rtable(skb)->fl.iif == 0)
608				goto drop;
609			stats->multicast++;
610			skb->pkt_type = PACKET_BROADCAST;
611		}
612#endif
613
614		if (((flags&GRE_CSUM) && csum) ||
615		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
616			stats->rx_crc_errors++;
617			stats->rx_errors++;
618			goto drop;
619		}
620		if (tunnel->parms.i_flags&GRE_SEQ) {
621			if (!(flags&GRE_SEQ) ||
622			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
623				stats->rx_fifo_errors++;
624				stats->rx_errors++;
625				goto drop;
626			}
627			tunnel->i_seqno = seqno + 1;
628		}
629
630		/* Warning: All skb pointers will be invalidated! */
631		if (tunnel->dev->type == ARPHRD_ETHER) {
632			if (!pskb_may_pull(skb, ETH_HLEN)) {
633				stats->rx_length_errors++;
634				stats->rx_errors++;
635				goto drop;
636			}
637
638			iph = ip_hdr(skb);
639			skb->protocol = eth_type_trans(skb, tunnel->dev);
640			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
641		}
642
643		skb_tunnel_rx(skb, tunnel->dev);
644
645		skb_reset_network_header(skb);
646		ipgre_ecn_decapsulate(iph, skb);
647
648		netif_rx(skb);
649		rcu_read_unlock();
650		return(0);
651	}
652	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
653
654drop:
655	rcu_read_unlock();
656drop_nolock:
657	kfree_skb(skb);
658	return(0);
659}
660
661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
662{
663	struct ip_tunnel *tunnel = netdev_priv(dev);
664	struct net_device_stats *stats = &dev->stats;
665	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
666	struct iphdr  *old_iph = ip_hdr(skb);
667	struct iphdr  *tiph;
668	u8     tos;
669	__be16 df;
670	struct rtable *rt;     			/* Route to the other host */
671	struct net_device *tdev;			/* Device to other host */
672	struct iphdr  *iph;			/* Our new IP header */
673	unsigned int max_headroom;		/* The extra header space needed */
674	int    gre_hlen;
675	__be32 dst;
676	int    mtu;
677
678	if (dev->type == ARPHRD_ETHER)
679		IPCB(skb)->flags = 0;
680
681	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
682		gre_hlen = 0;
683		tiph = (struct iphdr *)skb->data;
684	} else {
685		gre_hlen = tunnel->hlen;
686		tiph = &tunnel->parms.iph;
687	}
688
689	if ((dst = tiph->daddr) == 0) {
690		/* NBMA tunnel */
691
692		if (skb_dst(skb) == NULL) {
693			stats->tx_fifo_errors++;
694			goto tx_error;
695		}
696
697		if (skb->protocol == htons(ETH_P_IP)) {
698			rt = skb_rtable(skb);
699			if ((dst = rt->rt_gateway) == 0)
700				goto tx_error_icmp;
701		}
702#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
703		else if (skb->protocol == htons(ETH_P_IPV6)) {
704			struct in6_addr *addr6;
705			int addr_type;
706			struct neighbour *neigh = skb_dst(skb)->neighbour;
707
708			if (neigh == NULL)
709				goto tx_error;
710
711			addr6 = (struct in6_addr *)&neigh->primary_key;
712			addr_type = ipv6_addr_type(addr6);
713
714			if (addr_type == IPV6_ADDR_ANY) {
715				addr6 = &ipv6_hdr(skb)->daddr;
716				addr_type = ipv6_addr_type(addr6);
717			}
718
719			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720				goto tx_error_icmp;
721
722			dst = addr6->s6_addr32[3];
723		}
724#endif
725		else
726			goto tx_error;
727	}
728
729	tos = tiph->tos;
730	if (tos == 1) {
731		tos = 0;
732		if (skb->protocol == htons(ETH_P_IP))
733			tos = old_iph->tos;
734		else if (skb->protocol == htons(ETH_P_IPV6))
735			tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
736	}
737
738	{
739		struct flowi fl = { .oif = tunnel->parms.link,
740				    .nl_u = { .ip4_u =
741					      { .daddr = dst,
742						.saddr = tiph->saddr,
743						.tos = RT_TOS(tos) } },
744				    .proto = IPPROTO_GRE };
745		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
746			stats->tx_carrier_errors++;
747			goto tx_error;
748		}
749	}
750	tdev = rt->dst.dev;
751
752	if (tdev == dev) {
753		ip_rt_put(rt);
754		stats->collisions++;
755		goto tx_error;
756	}
757
758	df = tiph->frag_off;
759	if (df)
760		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
761	else
762		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
763
764	if (skb_dst(skb))
765		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
766
767	if (skb->protocol == htons(ETH_P_IP)) {
768		df |= (old_iph->frag_off&htons(IP_DF));
769
770		if ((old_iph->frag_off&htons(IP_DF)) &&
771		    mtu < ntohs(old_iph->tot_len)) {
772			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
773			ip_rt_put(rt);
774			goto tx_error;
775		}
776	}
777#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
778	else if (skb->protocol == htons(ETH_P_IPV6)) {
779		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
780
781		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
782			if ((tunnel->parms.iph.daddr &&
783			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
784			    rt6->rt6i_dst.plen == 128) {
785				rt6->rt6i_flags |= RTF_MODIFIED;
786				skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
787			}
788		}
789
790		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
791			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
792			ip_rt_put(rt);
793			goto tx_error;
794		}
795	}
796#endif
797
798	if (tunnel->err_count > 0) {
799		if (time_before(jiffies,
800				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
801			tunnel->err_count--;
802
803			dst_link_failure(skb);
804		} else
805			tunnel->err_count = 0;
806	}
807
808	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
809
810	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
811	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
812		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
813		if (max_headroom > dev->needed_headroom)
814			dev->needed_headroom = max_headroom;
815		if (!new_skb) {
816			ip_rt_put(rt);
817			txq->tx_dropped++;
818			dev_kfree_skb(skb);
819			return NETDEV_TX_OK;
820		}
821		if (skb->sk)
822			skb_set_owner_w(new_skb, skb->sk);
823		dev_kfree_skb(skb);
824		skb = new_skb;
825		old_iph = ip_hdr(skb);
826	}
827
828	skb_reset_transport_header(skb);
829	skb_push(skb, gre_hlen);
830	skb_reset_network_header(skb);
831	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
832	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
833			      IPSKB_REROUTED);
834	skb_dst_drop(skb);
835	skb_dst_set(skb, &rt->dst);
836
837	/*
838	 *	Push down and install the IPIP header.
839	 */
840
841	iph 			=	ip_hdr(skb);
842	iph->version		=	4;
843	iph->ihl		=	sizeof(struct iphdr) >> 2;
844	iph->frag_off		=	df;
845	iph->protocol		=	IPPROTO_GRE;
846	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
847	iph->daddr		=	rt->rt_dst;
848	iph->saddr		=	rt->rt_src;
849
850	if ((iph->ttl = tiph->ttl) == 0) {
851		if (skb->protocol == htons(ETH_P_IP))
852			iph->ttl = old_iph->ttl;
853#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
854		else if (skb->protocol == htons(ETH_P_IPV6))
855			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
856#endif
857		else
858			iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
859	}
860
861	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
862	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
863				   htons(ETH_P_TEB) : skb->protocol;
864
865	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
866		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
867
868		if (tunnel->parms.o_flags&GRE_SEQ) {
869			++tunnel->o_seqno;
870			*ptr = htonl(tunnel->o_seqno);
871			ptr--;
872		}
873		if (tunnel->parms.o_flags&GRE_KEY) {
874			*ptr = tunnel->parms.o_key;
875			ptr--;
876		}
877		if (tunnel->parms.o_flags&GRE_CSUM) {
878			*ptr = 0;
879			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
880		}
881	}
882
883	nf_reset(skb);
884
885	IPTUNNEL_XMIT();
886	return NETDEV_TX_OK;
887
888tx_error_icmp:
889	dst_link_failure(skb);
890
891tx_error:
892	stats->tx_errors++;
893	dev_kfree_skb(skb);
894	return NETDEV_TX_OK;
895}
896
897static int ipgre_tunnel_bind_dev(struct net_device *dev)
898{
899	struct net_device *tdev = NULL;
900	struct ip_tunnel *tunnel;
901	struct iphdr *iph;
902	int hlen = LL_MAX_HEADER;
903	int mtu = ETH_DATA_LEN;
904	int addend = sizeof(struct iphdr) + 4;
905
906	tunnel = netdev_priv(dev);
907	iph = &tunnel->parms.iph;
908
909	/* Guess output device to choose reasonable mtu and needed_headroom */
910
911	if (iph->daddr) {
912		struct flowi fl = { .oif = tunnel->parms.link,
913				    .nl_u = { .ip4_u =
914					      { .daddr = iph->daddr,
915						.saddr = iph->saddr,
916						.tos = RT_TOS(iph->tos) } },
917				    .proto = IPPROTO_GRE };
918		struct rtable *rt;
919		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
920			tdev = rt->dst.dev;
921			ip_rt_put(rt);
922		}
923
924		if (dev->type != ARPHRD_ETHER)
925			dev->flags |= IFF_POINTOPOINT;
926	}
927
928	if (!tdev && tunnel->parms.link)
929		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
930
931	if (tdev) {
932		hlen = tdev->hard_header_len + tdev->needed_headroom;
933		mtu = tdev->mtu;
934	}
935	dev->iflink = tunnel->parms.link;
936
937	/* Precalculate GRE options length */
938	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
939		if (tunnel->parms.o_flags&GRE_CSUM)
940			addend += 4;
941		if (tunnel->parms.o_flags&GRE_KEY)
942			addend += 4;
943		if (tunnel->parms.o_flags&GRE_SEQ)
944			addend += 4;
945	}
946	dev->needed_headroom = addend + hlen;
947	mtu -= dev->hard_header_len + addend;
948
949	if (mtu < 68)
950		mtu = 68;
951
952	tunnel->hlen = addend;
953
954	return mtu;
955}
956
957static int
958ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
959{
960	int err = 0;
961	struct ip_tunnel_parm p;
962	struct ip_tunnel *t;
963	struct net *net = dev_net(dev);
964	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
965
966	switch (cmd) {
967	case SIOCGETTUNNEL:
968		t = NULL;
969		if (dev == ign->fb_tunnel_dev) {
970			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
971				err = -EFAULT;
972				break;
973			}
974			t = ipgre_tunnel_locate(net, &p, 0);
975		}
976		if (t == NULL)
977			t = netdev_priv(dev);
978		memcpy(&p, &t->parms, sizeof(p));
979		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
980			err = -EFAULT;
981		break;
982
983	case SIOCADDTUNNEL:
984	case SIOCCHGTUNNEL:
985		err = -EPERM;
986		if (!capable(CAP_NET_ADMIN))
987			goto done;
988
989		err = -EFAULT;
990		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
991			goto done;
992
993		err = -EINVAL;
994		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
995		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
996		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
997			goto done;
998		if (p.iph.ttl)
999			p.iph.frag_off |= htons(IP_DF);
1000
1001		if (!(p.i_flags&GRE_KEY))
1002			p.i_key = 0;
1003		if (!(p.o_flags&GRE_KEY))
1004			p.o_key = 0;
1005
1006		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1007
1008		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1009			if (t != NULL) {
1010				if (t->dev != dev) {
1011					err = -EEXIST;
1012					break;
1013				}
1014			} else {
1015				unsigned nflags = 0;
1016
1017				t = netdev_priv(dev);
1018
1019				if (ipv4_is_multicast(p.iph.daddr))
1020					nflags = IFF_BROADCAST;
1021				else if (p.iph.daddr)
1022					nflags = IFF_POINTOPOINT;
1023
1024				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1025					err = -EINVAL;
1026					break;
1027				}
1028				ipgre_tunnel_unlink(ign, t);
1029				t->parms.iph.saddr = p.iph.saddr;
1030				t->parms.iph.daddr = p.iph.daddr;
1031				t->parms.i_key = p.i_key;
1032				t->parms.o_key = p.o_key;
1033				memcpy(dev->dev_addr, &p.iph.saddr, 4);
1034				memcpy(dev->broadcast, &p.iph.daddr, 4);
1035				ipgre_tunnel_link(ign, t);
1036				netdev_state_change(dev);
1037			}
1038		}
1039
1040		if (t) {
1041			err = 0;
1042			if (cmd == SIOCCHGTUNNEL) {
1043				t->parms.iph.ttl = p.iph.ttl;
1044				t->parms.iph.tos = p.iph.tos;
1045				t->parms.iph.frag_off = p.iph.frag_off;
1046				if (t->parms.link != p.link) {
1047					t->parms.link = p.link;
1048					dev->mtu = ipgre_tunnel_bind_dev(dev);
1049					netdev_state_change(dev);
1050				}
1051			}
1052			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1053				err = -EFAULT;
1054		} else
1055			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1056		break;
1057
1058	case SIOCDELTUNNEL:
1059		err = -EPERM;
1060		if (!capable(CAP_NET_ADMIN))
1061			goto done;
1062
1063		if (dev == ign->fb_tunnel_dev) {
1064			err = -EFAULT;
1065			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1066				goto done;
1067			err = -ENOENT;
1068			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1069				goto done;
1070			err = -EPERM;
1071			if (t == netdev_priv(ign->fb_tunnel_dev))
1072				goto done;
1073			dev = t->dev;
1074		}
1075		unregister_netdevice(dev);
1076		err = 0;
1077		break;
1078
1079	default:
1080		err = -EINVAL;
1081	}
1082
1083done:
1084	return err;
1085}
1086
1087static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1088{
1089	struct ip_tunnel *tunnel = netdev_priv(dev);
1090	if (new_mtu < 68 ||
1091	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1092		return -EINVAL;
1093	dev->mtu = new_mtu;
1094	return 0;
1095}
1096
1097/* Nice toy. Unfortunately, useless in real life :-)
1098   It allows to construct virtual multiprotocol broadcast "LAN"
1099   over the Internet, provided multicast routing is tuned.
1100
1101
1102   I have no idea was this bicycle invented before me,
1103   so that I had to set ARPHRD_IPGRE to a random value.
1104   I have an impression, that Cisco could make something similar,
1105   but this feature is apparently missing in IOS<=11.2(8).
1106
1107   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1108   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1109
1110   ping -t 255 224.66.66.66
1111
1112   If nobody answers, mbone does not work.
1113
1114   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1115   ip addr add 10.66.66.<somewhat>/24 dev Universe
1116   ifconfig Universe up
1117   ifconfig Universe add fe80::<Your_real_addr>/10
1118   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1119   ftp 10.66.66.66
1120   ...
1121   ftp fec0:6666:6666::193.233.7.65
1122   ...
1123
1124 */
1125
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127			unsigned short type,
1128			const void *daddr, const void *saddr, unsigned len)
1129{
1130	struct ip_tunnel *t = netdev_priv(dev);
1131	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1132	__be16 *p = (__be16*)(iph+1);
1133
1134	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1135	p[0]		= t->parms.o_flags;
1136	p[1]		= htons(type);
1137
1138	/*
1139	 *	Set the source hardware address.
1140	 */
1141
1142	if (saddr)
1143		memcpy(&iph->saddr, saddr, 4);
1144	if (daddr)
1145		memcpy(&iph->daddr, daddr, 4);
1146	if (iph->daddr)
1147		return t->hlen;
1148
1149	return -t->hlen;
1150}
1151
1152static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1153{
1154	struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1155	memcpy(haddr, &iph->saddr, 4);
1156	return 4;
1157}
1158
1159static const struct header_ops ipgre_header_ops = {
1160	.create	= ipgre_header,
1161	.parse	= ipgre_header_parse,
1162};
1163
1164#ifdef CONFIG_NET_IPGRE_BROADCAST
1165static int ipgre_open(struct net_device *dev)
1166{
1167	struct ip_tunnel *t = netdev_priv(dev);
1168
1169	if (ipv4_is_multicast(t->parms.iph.daddr)) {
1170		struct flowi fl = { .oif = t->parms.link,
1171				    .nl_u = { .ip4_u =
1172					      { .daddr = t->parms.iph.daddr,
1173						.saddr = t->parms.iph.saddr,
1174						.tos = RT_TOS(t->parms.iph.tos) } },
1175				    .proto = IPPROTO_GRE };
1176		struct rtable *rt;
1177		if (ip_route_output_key(dev_net(dev), &rt, &fl))
1178			return -EADDRNOTAVAIL;
1179		dev = rt->dst.dev;
1180		ip_rt_put(rt);
1181		if (__in_dev_get_rtnl(dev) == NULL)
1182			return -EADDRNOTAVAIL;
1183		t->mlink = dev->ifindex;
1184		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1185	}
1186	return 0;
1187}
1188
1189static int ipgre_close(struct net_device *dev)
1190{
1191	struct ip_tunnel *t = netdev_priv(dev);
1192
1193	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1194		struct in_device *in_dev;
1195		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1196		if (in_dev) {
1197			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1198			in_dev_put(in_dev);
1199		}
1200	}
1201	return 0;
1202}
1203
1204#endif
1205
1206static const struct net_device_ops ipgre_netdev_ops = {
1207	.ndo_init		= ipgre_tunnel_init,
1208	.ndo_uninit		= ipgre_tunnel_uninit,
1209#ifdef CONFIG_NET_IPGRE_BROADCAST
1210	.ndo_open		= ipgre_open,
1211	.ndo_stop		= ipgre_close,
1212#endif
1213	.ndo_start_xmit		= ipgre_tunnel_xmit,
1214	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
1215	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1216};
1217
1218static void ipgre_tunnel_setup(struct net_device *dev)
1219{
1220	dev->netdev_ops		= &ipgre_netdev_ops;
1221	dev->destructor 	= free_netdev;
1222
1223	dev->type		= ARPHRD_IPGRE;
1224	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1225	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1226	dev->flags		= IFF_NOARP;
1227	dev->iflink		= 0;
1228	dev->addr_len		= 4;
1229	dev->features		|= NETIF_F_NETNS_LOCAL;
1230	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
1231}
1232
1233static int ipgre_tunnel_init(struct net_device *dev)
1234{
1235	struct ip_tunnel *tunnel;
1236	struct iphdr *iph;
1237
1238	tunnel = netdev_priv(dev);
1239	iph = &tunnel->parms.iph;
1240
1241	tunnel->dev = dev;
1242	strcpy(tunnel->parms.name, dev->name);
1243
1244	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1245	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1246
1247	if (iph->daddr) {
1248#ifdef CONFIG_NET_IPGRE_BROADCAST
1249		if (ipv4_is_multicast(iph->daddr)) {
1250			if (!iph->saddr)
1251				return -EINVAL;
1252			dev->flags = IFF_BROADCAST;
1253			dev->header_ops = &ipgre_header_ops;
1254		}
1255#endif
1256	} else
1257		dev->header_ops = &ipgre_header_ops;
1258
1259	return 0;
1260}
1261
1262static void ipgre_fb_tunnel_init(struct net_device *dev)
1263{
1264	struct ip_tunnel *tunnel = netdev_priv(dev);
1265	struct iphdr *iph = &tunnel->parms.iph;
1266	struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1267
1268	tunnel->dev = dev;
1269	strcpy(tunnel->parms.name, dev->name);
1270
1271	iph->version		= 4;
1272	iph->protocol		= IPPROTO_GRE;
1273	iph->ihl		= 5;
1274	tunnel->hlen		= sizeof(struct iphdr) + 4;
1275
1276	dev_hold(dev);
1277	ign->tunnels_wc[0]	= tunnel;
1278}
1279
1280
1281static const struct net_protocol ipgre_protocol = {
1282	.handler	=	ipgre_rcv,
1283	.err_handler	=	ipgre_err,
1284	.netns_ok	=	1,
1285};
1286
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1288{
1289	int prio;
1290
1291	for (prio = 0; prio < 4; prio++) {
1292		int h;
1293		for (h = 0; h < HASH_SIZE; h++) {
1294			struct ip_tunnel *t = ign->tunnels[prio][h];
1295
1296			while (t != NULL) {
1297				unregister_netdevice_queue(t->dev, head);
1298				t = t->next;
1299			}
1300		}
1301	}
1302}
1303
1304static int __net_init ipgre_init_net(struct net *net)
1305{
1306	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1307	int err;
1308
1309	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1310					   ipgre_tunnel_setup);
1311	if (!ign->fb_tunnel_dev) {
1312		err = -ENOMEM;
1313		goto err_alloc_dev;
1314	}
1315	dev_net_set(ign->fb_tunnel_dev, net);
1316
1317	ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1318	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1319
1320	if ((err = register_netdev(ign->fb_tunnel_dev)))
1321		goto err_reg_dev;
1322
1323	return 0;
1324
1325err_reg_dev:
1326	free_netdev(ign->fb_tunnel_dev);
1327err_alloc_dev:
1328	return err;
1329}
1330
1331static void __net_exit ipgre_exit_net(struct net *net)
1332{
1333	struct ipgre_net *ign;
1334	LIST_HEAD(list);
1335
1336	ign = net_generic(net, ipgre_net_id);
1337	rtnl_lock();
1338	ipgre_destroy_tunnels(ign, &list);
1339	unregister_netdevice_many(&list);
1340	rtnl_unlock();
1341}
1342
1343static struct pernet_operations ipgre_net_ops = {
1344	.init = ipgre_init_net,
1345	.exit = ipgre_exit_net,
1346	.id   = &ipgre_net_id,
1347	.size = sizeof(struct ipgre_net),
1348};
1349
1350static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1351{
1352	__be16 flags;
1353
1354	if (!data)
1355		return 0;
1356
1357	flags = 0;
1358	if (data[IFLA_GRE_IFLAGS])
1359		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1360	if (data[IFLA_GRE_OFLAGS])
1361		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1362	if (flags & (GRE_VERSION|GRE_ROUTING))
1363		return -EINVAL;
1364
1365	return 0;
1366}
1367
1368static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1369{
1370	__be32 daddr;
1371
1372	if (tb[IFLA_ADDRESS]) {
1373		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1374			return -EINVAL;
1375		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1376			return -EADDRNOTAVAIL;
1377	}
1378
1379	if (!data)
1380		goto out;
1381
1382	if (data[IFLA_GRE_REMOTE]) {
1383		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1384		if (!daddr)
1385			return -EINVAL;
1386	}
1387
1388out:
1389	return ipgre_tunnel_validate(tb, data);
1390}
1391
1392static void ipgre_netlink_parms(struct nlattr *data[],
1393				struct ip_tunnel_parm *parms)
1394{
1395	memset(parms, 0, sizeof(*parms));
1396
1397	parms->iph.protocol = IPPROTO_GRE;
1398
1399	if (!data)
1400		return;
1401
1402	if (data[IFLA_GRE_LINK])
1403		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1404
1405	if (data[IFLA_GRE_IFLAGS])
1406		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1407
1408	if (data[IFLA_GRE_OFLAGS])
1409		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1410
1411	if (data[IFLA_GRE_IKEY])
1412		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1413
1414	if (data[IFLA_GRE_OKEY])
1415		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1416
1417	if (data[IFLA_GRE_LOCAL])
1418		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1419
1420	if (data[IFLA_GRE_REMOTE])
1421		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1422
1423	if (data[IFLA_GRE_TTL])
1424		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1425
1426	if (data[IFLA_GRE_TOS])
1427		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1428
1429	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1430		parms->iph.frag_off = htons(IP_DF);
1431}
1432
1433static int ipgre_tap_init(struct net_device *dev)
1434{
1435	struct ip_tunnel *tunnel;
1436
1437	tunnel = netdev_priv(dev);
1438
1439	tunnel->dev = dev;
1440	strcpy(tunnel->parms.name, dev->name);
1441
1442	ipgre_tunnel_bind_dev(dev);
1443
1444	return 0;
1445}
1446
1447static const struct net_device_ops ipgre_tap_netdev_ops = {
1448	.ndo_init		= ipgre_tap_init,
1449	.ndo_uninit		= ipgre_tunnel_uninit,
1450	.ndo_start_xmit		= ipgre_tunnel_xmit,
1451	.ndo_set_mac_address 	= eth_mac_addr,
1452	.ndo_validate_addr	= eth_validate_addr,
1453	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1454};
1455
1456static void ipgre_tap_setup(struct net_device *dev)
1457{
1458
1459	ether_setup(dev);
1460
1461	dev->netdev_ops		= &ipgre_tap_netdev_ops;
1462	dev->destructor 	= free_netdev;
1463
1464	dev->iflink		= 0;
1465	dev->features		|= NETIF_F_NETNS_LOCAL;
1466}
1467
1468static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1469			 struct nlattr *data[])
1470{
1471	struct ip_tunnel *nt;
1472	struct net *net = dev_net(dev);
1473	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1474	int mtu;
1475	int err;
1476
1477	nt = netdev_priv(dev);
1478	ipgre_netlink_parms(data, &nt->parms);
1479
1480	if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1481		return -EEXIST;
1482
1483	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1484		random_ether_addr(dev->dev_addr);
1485
1486	mtu = ipgre_tunnel_bind_dev(dev);
1487	if (!tb[IFLA_MTU])
1488		dev->mtu = mtu;
1489
1490	err = register_netdevice(dev);
1491	if (err)
1492		goto out;
1493
1494	dev_hold(dev);
1495	ipgre_tunnel_link(ign, nt);
1496
1497out:
1498	return err;
1499}
1500
1501static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1502			    struct nlattr *data[])
1503{
1504	struct ip_tunnel *t, *nt;
1505	struct net *net = dev_net(dev);
1506	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1507	struct ip_tunnel_parm p;
1508	int mtu;
1509
1510	if (dev == ign->fb_tunnel_dev)
1511		return -EINVAL;
1512
1513	nt = netdev_priv(dev);
1514	ipgre_netlink_parms(data, &p);
1515
1516	t = ipgre_tunnel_locate(net, &p, 0);
1517
1518	if (t) {
1519		if (t->dev != dev)
1520			return -EEXIST;
1521	} else {
1522		t = nt;
1523
1524		if (dev->type != ARPHRD_ETHER) {
1525			unsigned nflags = 0;
1526
1527			if (ipv4_is_multicast(p.iph.daddr))
1528				nflags = IFF_BROADCAST;
1529			else if (p.iph.daddr)
1530				nflags = IFF_POINTOPOINT;
1531
1532			if ((dev->flags ^ nflags) &
1533			    (IFF_POINTOPOINT | IFF_BROADCAST))
1534				return -EINVAL;
1535		}
1536
1537		ipgre_tunnel_unlink(ign, t);
1538		t->parms.iph.saddr = p.iph.saddr;
1539		t->parms.iph.daddr = p.iph.daddr;
1540		t->parms.i_key = p.i_key;
1541		if (dev->type != ARPHRD_ETHER) {
1542			memcpy(dev->dev_addr, &p.iph.saddr, 4);
1543			memcpy(dev->broadcast, &p.iph.daddr, 4);
1544		}
1545		ipgre_tunnel_link(ign, t);
1546		netdev_state_change(dev);
1547	}
1548
1549	t->parms.o_key = p.o_key;
1550	t->parms.iph.ttl = p.iph.ttl;
1551	t->parms.iph.tos = p.iph.tos;
1552	t->parms.iph.frag_off = p.iph.frag_off;
1553
1554	if (t->parms.link != p.link) {
1555		t->parms.link = p.link;
1556		mtu = ipgre_tunnel_bind_dev(dev);
1557		if (!tb[IFLA_MTU])
1558			dev->mtu = mtu;
1559		netdev_state_change(dev);
1560	}
1561
1562	return 0;
1563}
1564
1565static size_t ipgre_get_size(const struct net_device *dev)
1566{
1567	return
1568		/* IFLA_GRE_LINK */
1569		nla_total_size(4) +
1570		/* IFLA_GRE_IFLAGS */
1571		nla_total_size(2) +
1572		/* IFLA_GRE_OFLAGS */
1573		nla_total_size(2) +
1574		/* IFLA_GRE_IKEY */
1575		nla_total_size(4) +
1576		/* IFLA_GRE_OKEY */
1577		nla_total_size(4) +
1578		/* IFLA_GRE_LOCAL */
1579		nla_total_size(4) +
1580		/* IFLA_GRE_REMOTE */
1581		nla_total_size(4) +
1582		/* IFLA_GRE_TTL */
1583		nla_total_size(1) +
1584		/* IFLA_GRE_TOS */
1585		nla_total_size(1) +
1586		/* IFLA_GRE_PMTUDISC */
1587		nla_total_size(1) +
1588		0;
1589}
1590
1591static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1592{
1593	struct ip_tunnel *t = netdev_priv(dev);
1594	struct ip_tunnel_parm *p = &t->parms;
1595
1596	NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1597	NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1598	NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1599	NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1600	NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1601	NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1602	NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1603	NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1604	NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1605	NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1606
1607	return 0;
1608
1609nla_put_failure:
1610	return -EMSGSIZE;
1611}
1612
1613static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1614	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1615	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1616	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1617	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1618	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1619	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1620	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1621	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1622	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1623	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1624};
1625
1626static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1627	.kind		= "gre",
1628	.maxtype	= IFLA_GRE_MAX,
1629	.policy		= ipgre_policy,
1630	.priv_size	= sizeof(struct ip_tunnel),
1631	.setup		= ipgre_tunnel_setup,
1632	.validate	= ipgre_tunnel_validate,
1633	.newlink	= ipgre_newlink,
1634	.changelink	= ipgre_changelink,
1635	.get_size	= ipgre_get_size,
1636	.fill_info	= ipgre_fill_info,
1637};
1638
1639static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1640	.kind		= "gretap",
1641	.maxtype	= IFLA_GRE_MAX,
1642	.policy		= ipgre_policy,
1643	.priv_size	= sizeof(struct ip_tunnel),
1644	.setup		= ipgre_tap_setup,
1645	.validate	= ipgre_tap_validate,
1646	.newlink	= ipgre_newlink,
1647	.changelink	= ipgre_changelink,
1648	.get_size	= ipgre_get_size,
1649	.fill_info	= ipgre_fill_info,
1650};
1651
1652/*
1653 *	And now the modules code and kernel interface.
1654 */
1655
1656static int __init ipgre_init(void)
1657{
1658	int err;
1659
1660	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1661
1662	err = register_pernet_device(&ipgre_net_ops);
1663	if (err < 0)
1664		return err;
1665
1666	err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1667	if (err < 0) {
1668		printk(KERN_INFO "ipgre init: can't add protocol\n");
1669		goto add_proto_failed;
1670	}
1671
1672	err = rtnl_link_register(&ipgre_link_ops);
1673	if (err < 0)
1674		goto rtnl_link_failed;
1675
1676	err = rtnl_link_register(&ipgre_tap_ops);
1677	if (err < 0)
1678		goto tap_ops_failed;
1679
1680out:
1681	return err;
1682
1683tap_ops_failed:
1684	rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed:
1686	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1687add_proto_failed:
1688	unregister_pernet_device(&ipgre_net_ops);
1689	goto out;
1690}
1691
1692static void __exit ipgre_fini(void)
1693{
1694	rtnl_link_unregister(&ipgre_tap_ops);
1695	rtnl_link_unregister(&ipgre_link_ops);
1696	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1697		printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698	unregister_pernet_device(&ipgre_net_ops);
1699}
1700
1701module_init(ipgre_init);
1702module_exit(ipgre_fini);
1703MODULE_LICENSE("GPL");
1704MODULE_ALIAS_RTNL_LINK("gre");
1705MODULE_ALIAS_RTNL_LINK("gretap");
1706