• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv6/
1/*
2 *	RAW sockets for IPv6
3 *	Linux INET6 implementation
4 *
5 *	Authors:
6 *	Pedro Roque		<roque@di.fc.ul.pt>
7 *
8 *	Adapted from linux/net/ipv4/raw.c
9 *
10 *	Fixes:
11 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
12 *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance)
13 *	Kazunori MIYAZAWA @USAGI:	change process style to use ip6_append_data
14 *
15 *	This program is free software; you can redistribute it and/or
16 *      modify it under the terms of the GNU General Public License
17 *      as published by the Free Software Foundation; either version
18 *      2 of the License, or (at your option) any later version.
19 */
20
21#include <linux/errno.h>
22#include <linux/types.h>
23#include <linux/socket.h>
24#include <linux/slab.h>
25#include <linux/sockios.h>
26#include <linux/net.h>
27#include <linux/in6.h>
28#include <linux/netdevice.h>
29#include <linux/if_arp.h>
30#include <linux/icmpv6.h>
31#include <linux/netfilter.h>
32#include <linux/netfilter_ipv6.h>
33#include <linux/skbuff.h>
34#include <asm/uaccess.h>
35#include <asm/ioctls.h>
36
37#include <net/net_namespace.h>
38#include <net/ip.h>
39#include <net/sock.h>
40#include <net/snmp.h>
41
42#include <net/ipv6.h>
43#include <net/ndisc.h>
44#include <net/protocol.h>
45#include <net/ip6_route.h>
46#include <net/ip6_checksum.h>
47#include <net/addrconf.h>
48#include <net/transp_v6.h>
49#include <net/udp.h>
50#include <net/inet_common.h>
51#include <net/tcp_states.h>
52#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
53#include <net/mip6.h>
54#endif
55#include <linux/mroute6.h>
56
57#include <net/raw.h>
58#include <net/rawv6.h>
59#include <net/xfrm.h>
60
61#include <linux/proc_fs.h>
62#include <linux/seq_file.h>
63
64static struct raw_hashinfo raw_v6_hashinfo = {
65	.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
66};
67
68static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
69		unsigned short num, struct in6_addr *loc_addr,
70		struct in6_addr *rmt_addr, int dif)
71{
72	struct hlist_node *node;
73	int is_multicast = ipv6_addr_is_multicast(loc_addr);
74
75	sk_for_each_from(sk, node)
76		if (inet_sk(sk)->inet_num == num) {
77			struct ipv6_pinfo *np = inet6_sk(sk);
78
79			if (!net_eq(sock_net(sk), net))
80				continue;
81
82			if (!ipv6_addr_any(&np->daddr) &&
83			    !ipv6_addr_equal(&np->daddr, rmt_addr))
84				continue;
85
86			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
87				continue;
88
89			if (!ipv6_addr_any(&np->rcv_saddr)) {
90				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
91					goto found;
92				if (is_multicast &&
93				    inet6_mc_check(sk, loc_addr, rmt_addr))
94					goto found;
95				continue;
96			}
97			goto found;
98		}
99	sk = NULL;
100found:
101	return sk;
102}
103
104/*
105 *	0 - deliver
106 *	1 - block
107 */
108static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
109{
110	struct icmp6hdr *icmph;
111	struct raw6_sock *rp = raw6_sk(sk);
112
113	if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
114		__u32 *data = &rp->filter.data[0];
115		int bit_nr;
116
117		icmph = (struct icmp6hdr *) skb->data;
118		bit_nr = icmph->icmp6_type;
119
120		return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
121	}
122	return 0;
123}
124
125#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
126static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
127
128int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
129					   struct sk_buff *skb))
130{
131	rcu_assign_pointer(mh_filter, filter);
132	return 0;
133}
134EXPORT_SYMBOL(rawv6_mh_filter_register);
135
136int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
137					     struct sk_buff *skb))
138{
139	rcu_assign_pointer(mh_filter, NULL);
140	synchronize_rcu();
141	return 0;
142}
143EXPORT_SYMBOL(rawv6_mh_filter_unregister);
144
145#endif
146
147/*
148 *	demultiplex raw sockets.
149 *	(should consider queueing the skb in the sock receive_queue
150 *	without calling rawv6.c)
151 *
152 *	Caller owns SKB so we must make clones.
153 */
154static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
155{
156	struct in6_addr *saddr;
157	struct in6_addr *daddr;
158	struct sock *sk;
159	int delivered = 0;
160	__u8 hash;
161	struct net *net;
162
163	saddr = &ipv6_hdr(skb)->saddr;
164	daddr = saddr + 1;
165
166	hash = nexthdr & (MAX_INET_PROTOS - 1);
167
168	read_lock(&raw_v6_hashinfo.lock);
169	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
170
171	if (sk == NULL)
172		goto out;
173
174	net = dev_net(skb->dev);
175	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
176
177	while (sk) {
178		int filtered;
179
180		delivered = 1;
181		switch (nexthdr) {
182		case IPPROTO_ICMPV6:
183			filtered = icmpv6_filter(sk, skb);
184			break;
185
186#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
187		case IPPROTO_MH:
188		{
189			int (*filter)(struct sock *sock, struct sk_buff *skb);
190
191			filter = rcu_dereference(mh_filter);
192			filtered = filter ? filter(sk, skb) : 0;
193			break;
194		}
195#endif
196		default:
197			filtered = 0;
198			break;
199		}
200
201		if (filtered < 0)
202			break;
203		if (filtered == 0) {
204			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
205
206			/* Not releasing hash table! */
207			if (clone) {
208				nf_reset(clone);
209				rawv6_rcv(sk, clone);
210			}
211		}
212		sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
213				     IP6CB(skb)->iif);
214	}
215out:
216	read_unlock(&raw_v6_hashinfo.lock);
217	return delivered;
218}
219
220int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
221{
222	struct sock *raw_sk;
223
224	raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
225	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
226		raw_sk = NULL;
227
228	return raw_sk != NULL;
229}
230
231/* This cleans up af_inet6 a bit. -DaveM */
232static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
233{
234	struct inet_sock *inet = inet_sk(sk);
235	struct ipv6_pinfo *np = inet6_sk(sk);
236	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
237	__be32 v4addr = 0;
238	int addr_type;
239	int err;
240
241	if (addr_len < SIN6_LEN_RFC2133)
242		return -EINVAL;
243	addr_type = ipv6_addr_type(&addr->sin6_addr);
244
245	/* Raw sockets are IPv6 only */
246	if (addr_type == IPV6_ADDR_MAPPED)
247		return -EADDRNOTAVAIL;
248
249	lock_sock(sk);
250
251	err = -EINVAL;
252	if (sk->sk_state != TCP_CLOSE)
253		goto out;
254
255	rcu_read_lock();
256	/* Check if the address belongs to the host. */
257	if (addr_type != IPV6_ADDR_ANY) {
258		struct net_device *dev = NULL;
259
260		if (addr_type & IPV6_ADDR_LINKLOCAL) {
261			if (addr_len >= sizeof(struct sockaddr_in6) &&
262			    addr->sin6_scope_id) {
263				/* Override any existing binding, if another
264				 * one is supplied by user.
265				 */
266				sk->sk_bound_dev_if = addr->sin6_scope_id;
267			}
268
269			/* Binding to link-local address requires an interface */
270			if (!sk->sk_bound_dev_if)
271				goto out_unlock;
272
273			err = -ENODEV;
274			dev = dev_get_by_index_rcu(sock_net(sk),
275						   sk->sk_bound_dev_if);
276			if (!dev)
277				goto out_unlock;
278		}
279
280		/* ipv4 addr of the socket is invalid.  Only the
281		 * unspecified and mapped address have a v4 equivalent.
282		 */
283		v4addr = LOOPBACK4_IPV6;
284		if (!(addr_type & IPV6_ADDR_MULTICAST))	{
285			err = -EADDRNOTAVAIL;
286			if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
287					   dev, 0)) {
288				goto out_unlock;
289			}
290		}
291	}
292
293	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
294	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
295	if (!(addr_type & IPV6_ADDR_MULTICAST))
296		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
297	err = 0;
298out_unlock:
299	rcu_read_unlock();
300out:
301	release_sock(sk);
302	return err;
303}
304
305static void rawv6_err(struct sock *sk, struct sk_buff *skb,
306	       struct inet6_skb_parm *opt,
307	       u8 type, u8 code, int offset, __be32 info)
308{
309	struct inet_sock *inet = inet_sk(sk);
310	struct ipv6_pinfo *np = inet6_sk(sk);
311	int err;
312	int harderr;
313
314	/* Report error on raw socket, if:
315	   1. User requested recverr.
316	   2. Socket is connected (otherwise the error indication
317	      is useless without recverr and error is hard.
318	 */
319	if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
320		return;
321
322	harderr = icmpv6_err_convert(type, code, &err);
323	if (type == ICMPV6_PKT_TOOBIG)
324		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
325
326	if (np->recverr) {
327		u8 *payload = skb->data;
328		if (!inet->hdrincl)
329			payload += offset;
330		ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
331	}
332
333	if (np->recverr || harderr) {
334		sk->sk_err = err;
335		sk->sk_error_report(sk);
336	}
337}
338
339void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
340		u8 type, u8 code, int inner_offset, __be32 info)
341{
342	struct sock *sk;
343	int hash;
344	struct in6_addr *saddr, *daddr;
345	struct net *net;
346
347	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
348
349	read_lock(&raw_v6_hashinfo.lock);
350	sk = sk_head(&raw_v6_hashinfo.ht[hash]);
351	if (sk != NULL) {
352		/* Note: ipv6_hdr(skb) != skb->data */
353		struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data;
354		saddr = &ip6h->saddr;
355		daddr = &ip6h->daddr;
356		net = dev_net(skb->dev);
357
358		while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
359						IP6CB(skb)->iif))) {
360			rawv6_err(sk, skb, NULL, type, code,
361					inner_offset, info);
362			sk = sk_next(sk);
363		}
364	}
365	read_unlock(&raw_v6_hashinfo.lock);
366}
367
368static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
369{
370	if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
371	    skb_checksum_complete(skb)) {
372		atomic_inc(&sk->sk_drops);
373		kfree_skb(skb);
374		return NET_RX_DROP;
375	}
376
377	/* Charge it to the socket. */
378	if (ip_queue_rcv_skb(sk, skb) < 0) {
379		kfree_skb(skb);
380		return NET_RX_DROP;
381	}
382
383	return 0;
384}
385
386/*
387 *	This is next to useless...
388 *	if we demultiplex in network layer we don't need the extra call
389 *	just to queue the skb...
390 *	maybe we could have the network decide upon a hint if it
391 *	should call raw_rcv for demultiplexing
392 */
393int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
394{
395	struct inet_sock *inet = inet_sk(sk);
396	struct raw6_sock *rp = raw6_sk(sk);
397
398	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
399		atomic_inc(&sk->sk_drops);
400		kfree_skb(skb);
401		return NET_RX_DROP;
402	}
403
404	if (!rp->checksum)
405		skb->ip_summed = CHECKSUM_UNNECESSARY;
406
407	if (skb->ip_summed == CHECKSUM_COMPLETE) {
408		skb_postpull_rcsum(skb, skb_network_header(skb),
409				   skb_network_header_len(skb));
410		if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
411				     &ipv6_hdr(skb)->daddr,
412				     skb->len, inet->inet_num, skb->csum))
413			skb->ip_summed = CHECKSUM_UNNECESSARY;
414	}
415	if (!skb_csum_unnecessary(skb))
416		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
417							 &ipv6_hdr(skb)->daddr,
418							 skb->len,
419							 inet->inet_num, 0));
420
421	if (inet->hdrincl) {
422		if (skb_checksum_complete(skb)) {
423			atomic_inc(&sk->sk_drops);
424			kfree_skb(skb);
425			return NET_RX_DROP;
426		}
427	}
428
429	rawv6_rcv_skb(sk, skb);
430	return 0;
431}
432
433
434/*
435 *	This should be easy, if there is something there
436 *	we return it, otherwise we block.
437 */
438
439static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
440		  struct msghdr *msg, size_t len,
441		  int noblock, int flags, int *addr_len)
442{
443	struct ipv6_pinfo *np = inet6_sk(sk);
444	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
445	struct sk_buff *skb;
446	size_t copied;
447	int err;
448
449	if (flags & MSG_OOB)
450		return -EOPNOTSUPP;
451
452	if (addr_len)
453		*addr_len=sizeof(*sin6);
454
455	if (flags & MSG_ERRQUEUE)
456		return ipv6_recv_error(sk, msg, len);
457
458	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
459		return ipv6_recv_rxpmtu(sk, msg, len);
460
461	skb = skb_recv_datagram(sk, flags, noblock, &err);
462	if (!skb)
463		goto out;
464
465	copied = skb->len;
466	if (copied > len) {
467		copied = len;
468		msg->msg_flags |= MSG_TRUNC;
469	}
470
471	if (skb_csum_unnecessary(skb)) {
472		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
473	} else if (msg->msg_flags&MSG_TRUNC) {
474		if (__skb_checksum_complete(skb))
475			goto csum_copy_err;
476		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
477	} else {
478		err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
479		if (err == -EINVAL)
480			goto csum_copy_err;
481	}
482	if (err)
483		goto out_free;
484
485	/* Copy the address. */
486	if (sin6) {
487		sin6->sin6_family = AF_INET6;
488		sin6->sin6_port = 0;
489		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
490		sin6->sin6_flowinfo = 0;
491		sin6->sin6_scope_id = 0;
492		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
493			sin6->sin6_scope_id = IP6CB(skb)->iif;
494	}
495
496	sock_recv_ts_and_drops(msg, sk, skb);
497
498	if (np->rxopt.all)
499		datagram_recv_ctl(sk, msg, skb);
500
501	err = copied;
502	if (flags & MSG_TRUNC)
503		err = skb->len;
504
505out_free:
506	skb_free_datagram(sk, skb);
507out:
508	return err;
509
510csum_copy_err:
511	skb_kill_datagram(sk, skb, flags);
512
513	/* Error for blocking case is chosen to masquerade
514	   as some normal condition.
515	 */
516	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
517	goto out;
518}
519
520static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
521				     struct raw6_sock *rp)
522{
523	struct sk_buff *skb;
524	int err = 0;
525	int offset;
526	int len;
527	int total_len;
528	__wsum tmp_csum;
529	__sum16 csum;
530
531	if (!rp->checksum)
532		goto send;
533
534	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
535		goto out;
536
537	offset = rp->offset;
538	total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
539						skb->data);
540	if (offset >= total_len - 1) {
541		err = -EINVAL;
542		ip6_flush_pending_frames(sk);
543		goto out;
544	}
545
546	/* should be check HW csum miyazawa */
547	if (skb_queue_len(&sk->sk_write_queue) == 1) {
548		/*
549		 * Only one fragment on the socket.
550		 */
551		tmp_csum = skb->csum;
552	} else {
553		struct sk_buff *csum_skb = NULL;
554		tmp_csum = 0;
555
556		skb_queue_walk(&sk->sk_write_queue, skb) {
557			tmp_csum = csum_add(tmp_csum, skb->csum);
558
559			if (csum_skb)
560				continue;
561
562			len = skb->len - skb_transport_offset(skb);
563			if (offset >= len) {
564				offset -= len;
565				continue;
566			}
567
568			csum_skb = skb;
569		}
570
571		skb = csum_skb;
572	}
573
574	offset += skb_transport_offset(skb);
575	if (skb_copy_bits(skb, offset, &csum, 2))
576		BUG();
577
578	/* in case cksum was not initialized */
579	if (unlikely(csum))
580		tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
581
582	csum = csum_ipv6_magic(&fl->fl6_src,
583				   &fl->fl6_dst,
584				   total_len, fl->proto, tmp_csum);
585
586	if (csum == 0 && fl->proto == IPPROTO_UDP)
587		csum = CSUM_MANGLED_0;
588
589	if (skb_store_bits(skb, offset, &csum, 2))
590		BUG();
591
592send:
593	err = ip6_push_pending_frames(sk);
594out:
595	return err;
596}
597
598static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
599			struct flowi *fl, struct dst_entry **dstp,
600			unsigned int flags)
601{
602	struct ipv6_pinfo *np = inet6_sk(sk);
603	struct ipv6hdr *iph;
604	struct sk_buff *skb;
605	int err;
606	struct rt6_info *rt = (struct rt6_info *)*dstp;
607
608	if (length > rt->dst.dev->mtu) {
609		ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
610		return -EMSGSIZE;
611	}
612	if (flags&MSG_PROBE)
613		goto out;
614
615	skb = sock_alloc_send_skb(sk,
616				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
617				  flags & MSG_DONTWAIT, &err);
618	if (skb == NULL)
619		goto error;
620	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
621
622	skb->priority = sk->sk_priority;
623	skb->mark = sk->sk_mark;
624	skb_dst_set(skb, &rt->dst);
625	*dstp = NULL;
626
627	skb_put(skb, length);
628	skb_reset_network_header(skb);
629	iph = ipv6_hdr(skb);
630
631	skb->ip_summed = CHECKSUM_NONE;
632
633	skb->transport_header = skb->network_header;
634	err = memcpy_fromiovecend((void *)iph, from, 0, length);
635	if (err)
636		goto error_fault;
637
638	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
639	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
640		      rt->dst.dev, dst_output);
641	if (err > 0)
642		err = net_xmit_errno(err);
643	if (err)
644		goto error;
645out:
646	return 0;
647
648error_fault:
649	err = -EFAULT;
650	kfree_skb(skb);
651error:
652	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
653	if (err == -ENOBUFS && !np->recverr)
654		err = 0;
655	return err;
656}
657
658static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
659{
660	struct iovec *iov;
661	u8 __user *type = NULL;
662	u8 __user *code = NULL;
663	u8 len = 0;
664	int probed = 0;
665	int i;
666
667	if (!msg->msg_iov)
668		return 0;
669
670	for (i = 0; i < msg->msg_iovlen; i++) {
671		iov = &msg->msg_iov[i];
672		if (!iov)
673			continue;
674
675		switch (fl->proto) {
676		case IPPROTO_ICMPV6:
677			/* check if one-byte field is readable or not. */
678			if (iov->iov_base && iov->iov_len < 1)
679				break;
680
681			if (!type) {
682				type = iov->iov_base;
683				/* check if code field is readable or not. */
684				if (iov->iov_len > 1)
685					code = type + 1;
686			} else if (!code)
687				code = iov->iov_base;
688
689			if (type && code) {
690				if (get_user(fl->fl_icmp_type, type) ||
691				    get_user(fl->fl_icmp_code, code))
692					return -EFAULT;
693				probed = 1;
694			}
695			break;
696		case IPPROTO_MH:
697			if (iov->iov_base && iov->iov_len < 1)
698				break;
699			/* check if type field is readable or not. */
700			if (iov->iov_len > 2 - len) {
701				u8 __user *p = iov->iov_base;
702				if (get_user(fl->fl_mh_type, &p[2 - len]))
703					return -EFAULT;
704				probed = 1;
705			} else
706				len += iov->iov_len;
707
708			break;
709		default:
710			probed = 1;
711			break;
712		}
713		if (probed)
714			break;
715	}
716	return 0;
717}
718
719static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
720		   struct msghdr *msg, size_t len)
721{
722	struct ipv6_txoptions opt_space;
723	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
724	struct in6_addr *daddr, *final_p, final;
725	struct inet_sock *inet = inet_sk(sk);
726	struct ipv6_pinfo *np = inet6_sk(sk);
727	struct raw6_sock *rp = raw6_sk(sk);
728	struct ipv6_txoptions *opt = NULL;
729	struct ip6_flowlabel *flowlabel = NULL;
730	struct dst_entry *dst = NULL;
731	struct flowi fl;
732	int addr_len = msg->msg_namelen;
733	int hlimit = -1;
734	int tclass = -1;
735	int dontfrag = -1;
736	u16 proto;
737	int err;
738
739	/* Rough check on arithmetic overflow,
740	   better check is made in ip6_append_data().
741	 */
742	if (len > INT_MAX)
743		return -EMSGSIZE;
744
745	/* Mirror BSD error message compatibility */
746	if (msg->msg_flags & MSG_OOB)
747		return -EOPNOTSUPP;
748
749	/*
750	 *	Get and verify the address.
751	 */
752	memset(&fl, 0, sizeof(fl));
753
754	fl.mark = sk->sk_mark;
755
756	if (sin6) {
757		if (addr_len < SIN6_LEN_RFC2133)
758			return -EINVAL;
759
760		if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
761			return(-EAFNOSUPPORT);
762
763		/* port is the proto value [0..255] carried in nexthdr */
764		proto = ntohs(sin6->sin6_port);
765
766		if (!proto)
767			proto = inet->inet_num;
768		else if (proto != inet->inet_num)
769			return(-EINVAL);
770
771		if (proto > 255)
772			return(-EINVAL);
773
774		daddr = &sin6->sin6_addr;
775		if (np->sndflow) {
776			fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
777			if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
778				flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
779				if (flowlabel == NULL)
780					return -EINVAL;
781				daddr = &flowlabel->dst;
782			}
783		}
784
785		/*
786		 * Otherwise it will be difficult to maintain
787		 * sk->sk_dst_cache.
788		 */
789		if (sk->sk_state == TCP_ESTABLISHED &&
790		    ipv6_addr_equal(daddr, &np->daddr))
791			daddr = &np->daddr;
792
793		if (addr_len >= sizeof(struct sockaddr_in6) &&
794		    sin6->sin6_scope_id &&
795		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
796			fl.oif = sin6->sin6_scope_id;
797	} else {
798		if (sk->sk_state != TCP_ESTABLISHED)
799			return -EDESTADDRREQ;
800
801		proto = inet->inet_num;
802		daddr = &np->daddr;
803		fl.fl6_flowlabel = np->flow_label;
804	}
805
806	if (fl.oif == 0)
807		fl.oif = sk->sk_bound_dev_if;
808
809	if (msg->msg_controllen) {
810		opt = &opt_space;
811		memset(opt, 0, sizeof(struct ipv6_txoptions));
812		opt->tot_len = sizeof(struct ipv6_txoptions);
813
814		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
815					&tclass, &dontfrag);
816		if (err < 0) {
817			fl6_sock_release(flowlabel);
818			return err;
819		}
820		if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
821			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
822			if (flowlabel == NULL)
823				return -EINVAL;
824		}
825		if (!(opt->opt_nflen|opt->opt_flen))
826			opt = NULL;
827	}
828	if (opt == NULL)
829		opt = np->opt;
830	if (flowlabel)
831		opt = fl6_merge_options(&opt_space, flowlabel, opt);
832	opt = ipv6_fixup_options(&opt_space, opt);
833
834	fl.proto = proto;
835	err = rawv6_probe_proto_opt(&fl, msg);
836	if (err)
837		goto out;
838
839	if (!ipv6_addr_any(daddr))
840		ipv6_addr_copy(&fl.fl6_dst, daddr);
841	else
842		fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
843	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
844		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
845
846	final_p = fl6_update_dst(&fl, opt, &final);
847
848	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
849		fl.oif = np->mcast_oif;
850	security_sk_classify_flow(sk, &fl);
851
852	err = ip6_dst_lookup(sk, &dst, &fl);
853	if (err)
854		goto out;
855	if (final_p)
856		ipv6_addr_copy(&fl.fl6_dst, final_p);
857
858	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
859	if (err < 0) {
860		if (err == -EREMOTE)
861			err = ip6_dst_blackhole(sk, &dst, &fl);
862		if (err < 0)
863			goto out;
864	}
865
866	if (hlimit < 0) {
867		if (ipv6_addr_is_multicast(&fl.fl6_dst))
868			hlimit = np->mcast_hops;
869		else
870			hlimit = np->hop_limit;
871		if (hlimit < 0)
872			hlimit = ip6_dst_hoplimit(dst);
873	}
874
875	if (tclass < 0)
876		tclass = np->tclass;
877
878	if (dontfrag < 0)
879		dontfrag = np->dontfrag;
880
881	if (msg->msg_flags&MSG_CONFIRM)
882		goto do_confirm;
883
884back_from_confirm:
885	if (inet->hdrincl)
886		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags);
887	else {
888		lock_sock(sk);
889		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
890			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
891			msg->msg_flags, dontfrag);
892
893		if (err)
894			ip6_flush_pending_frames(sk);
895		else if (!(msg->msg_flags & MSG_MORE))
896			err = rawv6_push_pending_frames(sk, &fl, rp);
897		release_sock(sk);
898	}
899done:
900	dst_release(dst);
901out:
902	fl6_sock_release(flowlabel);
903	return err<0?err:len;
904do_confirm:
905	dst_confirm(dst);
906	if (!(msg->msg_flags & MSG_PROBE) || len)
907		goto back_from_confirm;
908	err = 0;
909	goto done;
910}
911
912static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
913			       char __user *optval, int optlen)
914{
915	switch (optname) {
916	case ICMPV6_FILTER:
917		if (optlen > sizeof(struct icmp6_filter))
918			optlen = sizeof(struct icmp6_filter);
919		if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
920			return -EFAULT;
921		return 0;
922	default:
923		return -ENOPROTOOPT;
924	}
925
926	return 0;
927}
928
929static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
930			       char __user *optval, int __user *optlen)
931{
932	int len;
933
934	switch (optname) {
935	case ICMPV6_FILTER:
936		if (get_user(len, optlen))
937			return -EFAULT;
938		if (len < 0)
939			return -EINVAL;
940		if (len > sizeof(struct icmp6_filter))
941			len = sizeof(struct icmp6_filter);
942		if (put_user(len, optlen))
943			return -EFAULT;
944		if (copy_to_user(optval, &raw6_sk(sk)->filter, len))
945			return -EFAULT;
946		return 0;
947	default:
948		return -ENOPROTOOPT;
949	}
950
951	return 0;
952}
953
954
955static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
956			    char __user *optval, unsigned int optlen)
957{
958	struct raw6_sock *rp = raw6_sk(sk);
959	int val;
960
961	if (get_user(val, (int __user *)optval))
962		return -EFAULT;
963
964	switch (optname) {
965		case IPV6_CHECKSUM:
966			if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
967			    level == IPPROTO_IPV6) {
968				/*
969				 * RFC3542 tells that IPV6_CHECKSUM socket
970				 * option in the IPPROTO_IPV6 level is not
971				 * allowed on ICMPv6 sockets.
972				 * If you want to set it, use IPPROTO_RAW
973				 * level IPV6_CHECKSUM socket option
974				 * (Linux extension).
975				 */
976				return -EINVAL;
977			}
978
979			/* You may get strange result with a positive odd offset;
980			   RFC2292bis agrees with me. */
981			if (val > 0 && (val&1))
982				return(-EINVAL);
983			if (val < 0) {
984				rp->checksum = 0;
985			} else {
986				rp->checksum = 1;
987				rp->offset = val;
988			}
989
990			return 0;
991			break;
992
993		default:
994			return(-ENOPROTOOPT);
995	}
996}
997
998static int rawv6_setsockopt(struct sock *sk, int level, int optname,
999			  char __user *optval, unsigned int optlen)
1000{
1001	switch(level) {
1002		case SOL_RAW:
1003			break;
1004
1005		case SOL_ICMPV6:
1006			if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1007				return -EOPNOTSUPP;
1008			return rawv6_seticmpfilter(sk, level, optname, optval,
1009						   optlen);
1010		case SOL_IPV6:
1011			if (optname == IPV6_CHECKSUM)
1012				break;
1013		default:
1014			return ipv6_setsockopt(sk, level, optname, optval,
1015					       optlen);
1016	}
1017
1018	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
1019}
1020
1021#ifdef CONFIG_COMPAT
1022static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
1023				   char __user *optval, unsigned int optlen)
1024{
1025	switch (level) {
1026	case SOL_RAW:
1027		break;
1028	case SOL_ICMPV6:
1029		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1030			return -EOPNOTSUPP;
1031		return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
1032	case SOL_IPV6:
1033		if (optname == IPV6_CHECKSUM)
1034			break;
1035	default:
1036		return compat_ipv6_setsockopt(sk, level, optname,
1037					      optval, optlen);
1038	}
1039	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
1040}
1041#endif
1042
1043static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
1044			    char __user *optval, int __user *optlen)
1045{
1046	struct raw6_sock *rp = raw6_sk(sk);
1047	int val, len;
1048
1049	if (get_user(len,optlen))
1050		return -EFAULT;
1051
1052	switch (optname) {
1053	case IPV6_CHECKSUM:
1054		/*
1055		 * We allow getsockopt() for IPPROTO_IPV6-level
1056		 * IPV6_CHECKSUM socket option on ICMPv6 sockets
1057		 * since RFC3542 is silent about it.
1058		 */
1059		if (rp->checksum == 0)
1060			val = -1;
1061		else
1062			val = rp->offset;
1063		break;
1064
1065	default:
1066		return -ENOPROTOOPT;
1067	}
1068
1069	len = min_t(unsigned int, sizeof(int), len);
1070
1071	if (put_user(len, optlen))
1072		return -EFAULT;
1073	if (copy_to_user(optval,&val,len))
1074		return -EFAULT;
1075	return 0;
1076}
1077
1078static int rawv6_getsockopt(struct sock *sk, int level, int optname,
1079			  char __user *optval, int __user *optlen)
1080{
1081	switch(level) {
1082		case SOL_RAW:
1083			break;
1084
1085		case SOL_ICMPV6:
1086			if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1087				return -EOPNOTSUPP;
1088			return rawv6_geticmpfilter(sk, level, optname, optval,
1089						   optlen);
1090		case SOL_IPV6:
1091			if (optname == IPV6_CHECKSUM)
1092				break;
1093		default:
1094			return ipv6_getsockopt(sk, level, optname, optval,
1095					       optlen);
1096	}
1097
1098	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1099}
1100
1101#ifdef CONFIG_COMPAT
1102static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
1103				   char __user *optval, int __user *optlen)
1104{
1105	switch (level) {
1106	case SOL_RAW:
1107		break;
1108	case SOL_ICMPV6:
1109		if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1110			return -EOPNOTSUPP;
1111		return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
1112	case SOL_IPV6:
1113		if (optname == IPV6_CHECKSUM)
1114			break;
1115	default:
1116		return compat_ipv6_getsockopt(sk, level, optname,
1117					      optval, optlen);
1118	}
1119	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1120}
1121#endif
1122
1123static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
1124{
1125	switch(cmd) {
1126		case SIOCOUTQ:
1127		{
1128			int amount = sk_wmem_alloc_get(sk);
1129
1130			return put_user(amount, (int __user *)arg);
1131		}
1132		case SIOCINQ:
1133		{
1134			struct sk_buff *skb;
1135			int amount = 0;
1136
1137			spin_lock_bh(&sk->sk_receive_queue.lock);
1138			skb = skb_peek(&sk->sk_receive_queue);
1139			if (skb != NULL)
1140				amount = skb->tail - skb->transport_header;
1141			spin_unlock_bh(&sk->sk_receive_queue.lock);
1142			return put_user(amount, (int __user *)arg);
1143		}
1144
1145		default:
1146#ifdef CONFIG_IPV6_MROUTE
1147			return ip6mr_ioctl(sk, cmd, (void __user *)arg);
1148#else
1149			return -ENOIOCTLCMD;
1150#endif
1151	}
1152}
1153
1154static void rawv6_close(struct sock *sk, long timeout)
1155{
1156	if (inet_sk(sk)->inet_num == IPPROTO_RAW)
1157		ip6_ra_control(sk, -1);
1158	ip6mr_sk_done(sk);
1159	sk_common_release(sk);
1160}
1161
1162static void raw6_destroy(struct sock *sk)
1163{
1164	lock_sock(sk);
1165	ip6_flush_pending_frames(sk);
1166	release_sock(sk);
1167
1168	inet6_destroy_sock(sk);
1169}
1170
1171static int rawv6_init_sk(struct sock *sk)
1172{
1173	struct raw6_sock *rp = raw6_sk(sk);
1174
1175	switch (inet_sk(sk)->inet_num) {
1176	case IPPROTO_ICMPV6:
1177		rp->checksum = 1;
1178		rp->offset   = 2;
1179		break;
1180	case IPPROTO_MH:
1181		rp->checksum = 1;
1182		rp->offset   = 4;
1183		break;
1184	default:
1185		break;
1186	}
1187	return(0);
1188}
1189
1190struct proto rawv6_prot = {
1191	.name		   = "RAWv6",
1192	.owner		   = THIS_MODULE,
1193	.close		   = rawv6_close,
1194	.destroy	   = raw6_destroy,
1195	.connect	   = ip6_datagram_connect,
1196	.disconnect	   = udp_disconnect,
1197	.ioctl		   = rawv6_ioctl,
1198	.init		   = rawv6_init_sk,
1199	.setsockopt	   = rawv6_setsockopt,
1200	.getsockopt	   = rawv6_getsockopt,
1201	.sendmsg	   = rawv6_sendmsg,
1202	.recvmsg	   = rawv6_recvmsg,
1203	.bind		   = rawv6_bind,
1204	.backlog_rcv	   = rawv6_rcv_skb,
1205	.hash		   = raw_hash_sk,
1206	.unhash		   = raw_unhash_sk,
1207	.obj_size	   = sizeof(struct raw6_sock),
1208	.h.raw_hash	   = &raw_v6_hashinfo,
1209#ifdef CONFIG_COMPAT
1210	.compat_setsockopt = compat_rawv6_setsockopt,
1211	.compat_getsockopt = compat_rawv6_getsockopt,
1212#endif
1213};
1214
1215#ifdef CONFIG_PROC_FS
1216static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1217{
1218	struct ipv6_pinfo *np = inet6_sk(sp);
1219	struct in6_addr *dest, *src;
1220	__u16 destp, srcp;
1221
1222	dest  = &np->daddr;
1223	src   = &np->rcv_saddr;
1224	destp = 0;
1225	srcp  = inet_sk(sp)->inet_num;
1226	seq_printf(seq,
1227		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1228		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
1229		   i,
1230		   src->s6_addr32[0], src->s6_addr32[1],
1231		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1232		   dest->s6_addr32[0], dest->s6_addr32[1],
1233		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1234		   sp->sk_state,
1235		   sk_wmem_alloc_get(sp),
1236		   sk_rmem_alloc_get(sp),
1237		   0, 0L, 0,
1238		   sock_i_uid(sp), 0,
1239		   sock_i_ino(sp),
1240		   atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1241}
1242
1243static int raw6_seq_show(struct seq_file *seq, void *v)
1244{
1245	if (v == SEQ_START_TOKEN)
1246		seq_printf(seq,
1247			   "  sl  "
1248			   "local_address                         "
1249			   "remote_address                        "
1250			   "st tx_queue rx_queue tr tm->when retrnsmt"
1251			   "   uid  timeout inode ref pointer drops\n");
1252	else
1253		raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
1254	return 0;
1255}
1256
1257static const struct seq_operations raw6_seq_ops = {
1258	.start =	raw_seq_start,
1259	.next =		raw_seq_next,
1260	.stop =		raw_seq_stop,
1261	.show =		raw6_seq_show,
1262};
1263
1264static int raw6_seq_open(struct inode *inode, struct file *file)
1265{
1266	return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops);
1267}
1268
1269static const struct file_operations raw6_seq_fops = {
1270	.owner =	THIS_MODULE,
1271	.open =		raw6_seq_open,
1272	.read =		seq_read,
1273	.llseek =	seq_lseek,
1274	.release =	seq_release_net,
1275};
1276
1277static int __net_init raw6_init_net(struct net *net)
1278{
1279	if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
1280		return -ENOMEM;
1281
1282	return 0;
1283}
1284
1285static void __net_exit raw6_exit_net(struct net *net)
1286{
1287	proc_net_remove(net, "raw6");
1288}
1289
1290static struct pernet_operations raw6_net_ops = {
1291	.init = raw6_init_net,
1292	.exit = raw6_exit_net,
1293};
1294
1295int __init raw6_proc_init(void)
1296{
1297	return register_pernet_subsys(&raw6_net_ops);
1298}
1299
1300void raw6_proc_exit(void)
1301{
1302	unregister_pernet_subsys(&raw6_net_ops);
1303}
1304#endif	/* CONFIG_PROC_FS */
1305
1306/* Same as inet6_dgram_ops, sans udp_poll.  */
1307static const struct proto_ops inet6_sockraw_ops = {
1308	.family		   = PF_INET6,
1309	.owner		   = THIS_MODULE,
1310	.release	   = inet6_release,
1311	.bind		   = inet6_bind,
1312	.connect	   = inet_dgram_connect,	/* ok		*/
1313	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
1314	.accept		   = sock_no_accept,		/* a do nothing	*/
1315	.getname	   = inet6_getname,
1316	.poll		   = datagram_poll,		/* ok		*/
1317	.ioctl		   = inet6_ioctl,		/* must change  */
1318	.listen		   = sock_no_listen,		/* ok		*/
1319	.shutdown	   = inet_shutdown,		/* ok		*/
1320	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
1321	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
1322	.sendmsg	   = inet_sendmsg,		/* ok		*/
1323	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
1324	.mmap		   = sock_no_mmap,
1325	.sendpage	   = sock_no_sendpage,
1326#ifdef CONFIG_COMPAT
1327	.compat_setsockopt = compat_sock_common_setsockopt,
1328	.compat_getsockopt = compat_sock_common_getsockopt,
1329#endif
1330};
1331
1332static struct inet_protosw rawv6_protosw = {
1333	.type		= SOCK_RAW,
1334	.protocol	= IPPROTO_IP,	/* wild card */
1335	.prot		= &rawv6_prot,
1336	.ops		= &inet6_sockraw_ops,
1337	.no_check	= UDP_CSUM_DEFAULT,
1338	.flags		= INET_PROTOSW_REUSE,
1339};
1340
1341int __init rawv6_init(void)
1342{
1343	int ret;
1344
1345	ret = inet6_register_protosw(&rawv6_protosw);
1346	if (ret)
1347		goto out;
1348out:
1349	return ret;
1350}
1351
1352void rawv6_exit(void)
1353{
1354	inet6_unregister_protosw(&rawv6_protosw);
1355}
1356