1/*
2 *	IPv6 output functions
3 *	Linux INET6 implementation
4 *
5 *	Authors:
6 *	Pedro Roque		<roque@di.fc.ul.pt>
7 *
8 *	Based on linux/net/ipv4/ip_output.c
9 *
10 *	This program is free software; you can redistribute it and/or
11 *      modify it under the terms of the GNU General Public License
12 *      as published by the Free Software Foundation; either version
13 *      2 of the License, or (at your option) any later version.
14 *
15 *	Changes:
16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17 *				extension headers are implemented.
18 *				route changes now work.
19 *				ip6_forward does not confuse sniffers.
20 *				etc.
21 *
22 *      H. von Brand    :       Added missing #include <linux/string.h>
23 *	Imran Patel	: 	frag id should be in NBO
24 *      Kazunori MIYAZAWA @USAGI
25 *			:       add ip6_append_data and related functions
26 *				for datagram xmit
27 */
28
29#include <linux/errno.h>
30#include <linux/kernel.h>
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
39#include <linux/module.h>
40#include <linux/slab.h>
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
57#include <linux/mroute6.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61int __ip6_local_out(struct sk_buff *skb)
62{
63	int len;
64
65	len = skb->len - sizeof(struct ipv6hdr);
66	if (len > IPV6_MAXPLEN)
67		len = 0;
68	ipv6_hdr(skb)->payload_len = htons(len);
69
70	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71		       skb_dst(skb)->dev, dst_output);
72}
73
74int ip6_local_out(struct sk_buff *skb)
75{
76	int err;
77
78	err = __ip6_local_out(skb);
79	if (likely(err == 1))
80		err = dst_output(skb);
81
82	return err;
83}
84EXPORT_SYMBOL_GPL(ip6_local_out);
85
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
89	skb_reset_mac_header(newskb);
90	__skb_pull(newskb, skb_network_offset(newskb));
91	newskb->pkt_type = PACKET_LOOPBACK;
92	newskb->ip_summed = CHECKSUM_UNNECESSARY;
93	WARN_ON(!skb_dst(newskb));
94
95	netif_rx_ni(newskb);
96	return 0;
97}
98
99static int ip6_finish_output2(struct sk_buff *skb)
100{
101	struct dst_entry *dst = skb_dst(skb);
102	struct net_device *dev = dst->dev;
103
104	skb->protocol = htons(ETH_P_IPV6);
105	skb->dev = dev;
106
107	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
108		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
109
110		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111		    ((mroute6_socket(dev_net(dev), skb) &&
112		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114					 &ipv6_hdr(skb)->saddr))) {
115			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
116
117			/* Do not check for IFF_ALLMULTI; multicast routing
118			   is not supported in any case.
119			 */
120			if (newskb)
121				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122					newskb, NULL, newskb->dev,
123					ip6_dev_loopback_xmit);
124
125			if (ipv6_hdr(skb)->hop_limit == 0) {
126				IP6_INC_STATS(dev_net(dev), idev,
127					      IPSTATS_MIB_OUTDISCARDS);
128				kfree_skb(skb);
129				return 0;
130			}
131		}
132
133		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134				skb->len);
135	}
136
137	if (dst->hh)
138		return neigh_hh_output(dst->hh, skb);
139	else if (dst->neighbour)
140		return dst->neighbour->output(skb);
141
142	IP6_INC_STATS_BH(dev_net(dst->dev),
143			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144	kfree_skb(skb);
145	return -EINVAL;
146}
147
148static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
149{
150	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
151
152	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
153	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
154}
155
156static int ip6_finish_output(struct sk_buff *skb)
157{
158	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
159	    dst_allfrag(skb_dst(skb)))
160		return ip6_fragment(skb, ip6_finish_output2);
161	else
162		return ip6_finish_output2(skb);
163}
164
165int ip6_output(struct sk_buff *skb)
166{
167	struct net_device *dev = skb_dst(skb)->dev;
168	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169	if (unlikely(idev->cnf.disable_ipv6)) {
170		IP6_INC_STATS(dev_net(dev), idev,
171			      IPSTATS_MIB_OUTDISCARDS);
172		kfree_skb(skb);
173		return 0;
174	}
175
176	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
177			    ip6_finish_output,
178			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179}
180
181/*
182 *	xmit an sk_buff (used by TCP, SCTP and DCCP)
183 */
184
185int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
186	     struct ipv6_txoptions *opt)
187{
188	struct net *net = sock_net(sk);
189	struct ipv6_pinfo *np = inet6_sk(sk);
190	struct in6_addr *first_hop = &fl->fl6_dst;
191	struct dst_entry *dst = skb_dst(skb);
192	struct ipv6hdr *hdr;
193	u8  proto = fl->proto;
194	int seg_len = skb->len;
195	int hlimit = -1;
196	int tclass = 0;
197	u32 mtu;
198
199	if (opt) {
200		unsigned int head_room;
201
202		/* First: exthdrs may take lots of space (~8K for now)
203		   MAX_HEADER is not enough.
204		 */
205		head_room = opt->opt_nflen + opt->opt_flen;
206		seg_len += head_room;
207		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209		if (skb_headroom(skb) < head_room) {
210			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211			if (skb2 == NULL) {
212				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
213					      IPSTATS_MIB_OUTDISCARDS);
214				kfree_skb(skb);
215				return -ENOBUFS;
216			}
217			kfree_skb(skb);
218			skb = skb2;
219			skb_set_owner_w(skb, sk);
220		}
221		if (opt->opt_flen)
222			ipv6_push_frag_opts(skb, opt, &proto);
223		if (opt->opt_nflen)
224			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
225	}
226
227	skb_push(skb, sizeof(struct ipv6hdr));
228	skb_reset_network_header(skb);
229	hdr = ipv6_hdr(skb);
230
231	/*
232	 *	Fill in the IPv6 header
233	 */
234	if (np) {
235		tclass = np->tclass;
236		hlimit = np->hop_limit;
237	}
238	if (hlimit < 0)
239		hlimit = ip6_dst_hoplimit(dst);
240
241	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
242
243	hdr->payload_len = htons(seg_len);
244	hdr->nexthdr = proto;
245	hdr->hop_limit = hlimit;
246
247	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
248	ipv6_addr_copy(&hdr->daddr, first_hop);
249
250	skb->priority = sk->sk_priority;
251	skb->mark = sk->sk_mark;
252
253	mtu = dst_mtu(dst);
254	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
255		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
256			      IPSTATS_MIB_OUT, skb->len);
257		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
258			       dst->dev, dst_output);
259	}
260
261	if (net_ratelimit())
262		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
263	skb->dev = dst->dev;
264	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
265	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
266	kfree_skb(skb);
267	return -EMSGSIZE;
268}
269
270EXPORT_SYMBOL(ip6_xmit);
271
272/*
273 *	To avoid extra problems ND packets are send through this
274 *	routine. It's code duplication but I really want to avoid
275 *	extra checks since ipv6_build_header is used by TCP (which
276 *	is for us performance critical)
277 */
278
279int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
280	       const struct in6_addr *saddr, const struct in6_addr *daddr,
281	       int proto, int len)
282{
283	struct ipv6_pinfo *np = inet6_sk(sk);
284	struct ipv6hdr *hdr;
285	int totlen;
286
287	skb->protocol = htons(ETH_P_IPV6);
288	skb->dev = dev;
289
290	totlen = len + sizeof(struct ipv6hdr);
291
292	skb_reset_network_header(skb);
293	skb_put(skb, sizeof(struct ipv6hdr));
294	hdr = ipv6_hdr(skb);
295
296	*(__be32*)hdr = htonl(0x60000000);
297
298	hdr->payload_len = htons(len);
299	hdr->nexthdr = proto;
300	hdr->hop_limit = np->hop_limit;
301
302	ipv6_addr_copy(&hdr->saddr, saddr);
303	ipv6_addr_copy(&hdr->daddr, daddr);
304
305	return 0;
306}
307
308static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
309{
310	struct ip6_ra_chain *ra;
311	struct sock *last = NULL;
312
313	read_lock(&ip6_ra_lock);
314	for (ra = ip6_ra_chain; ra; ra = ra->next) {
315		struct sock *sk = ra->sk;
316		if (sk && ra->sel == sel &&
317		    (!sk->sk_bound_dev_if ||
318		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
319			if (last) {
320				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
321				if (skb2)
322					rawv6_rcv(last, skb2);
323			}
324			last = sk;
325		}
326	}
327
328	if (last) {
329		rawv6_rcv(last, skb);
330		read_unlock(&ip6_ra_lock);
331		return 1;
332	}
333	read_unlock(&ip6_ra_lock);
334	return 0;
335}
336
337static int ip6_forward_proxy_check(struct sk_buff *skb)
338{
339	struct ipv6hdr *hdr = ipv6_hdr(skb);
340	u8 nexthdr = hdr->nexthdr;
341	int offset;
342
343	if (ipv6_ext_hdr(nexthdr)) {
344		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
345		if (offset < 0)
346			return 0;
347	} else
348		offset = sizeof(struct ipv6hdr);
349
350	if (nexthdr == IPPROTO_ICMPV6) {
351		struct icmp6hdr *icmp6;
352
353		if (!pskb_may_pull(skb, (skb_network_header(skb) +
354					 offset + 1 - skb->data)))
355			return 0;
356
357		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
358
359		switch (icmp6->icmp6_type) {
360		case NDISC_ROUTER_SOLICITATION:
361		case NDISC_ROUTER_ADVERTISEMENT:
362		case NDISC_NEIGHBOUR_SOLICITATION:
363		case NDISC_NEIGHBOUR_ADVERTISEMENT:
364		case NDISC_REDIRECT:
365			/* For reaction involving unicast neighbor discovery
366			 * message destined to the proxied address, pass it to
367			 * input function.
368			 */
369			return 1;
370		default:
371			break;
372		}
373	}
374
375	/*
376	 * The proxying router can't forward traffic sent to a link-local
377	 * address, so signal the sender and discard the packet. This
378	 * behavior is clarified by the MIPv6 specification.
379	 */
380	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
381		dst_link_failure(skb);
382		return -1;
383	}
384
385	return 0;
386}
387
388static inline int ip6_forward_finish(struct sk_buff *skb)
389{
390	return dst_output(skb);
391}
392
393int ip6_forward(struct sk_buff *skb)
394{
395	struct dst_entry *dst = skb_dst(skb);
396	struct ipv6hdr *hdr = ipv6_hdr(skb);
397	struct inet6_skb_parm *opt = IP6CB(skb);
398	struct net *net = dev_net(dst->dev);
399	u32 mtu;
400
401	if (net->ipv6.devconf_all->forwarding == 0)
402		goto error;
403
404	if (skb_warn_if_lro(skb))
405		goto drop;
406
407	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
408		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
409		goto drop;
410	}
411
412	skb_forward_csum(skb);
413
414	/*
415	 *	We DO NOT make any processing on
416	 *	RA packets, pushing them to user level AS IS
417	 *	without ane WARRANTY that application will be able
418	 *	to interpret them. The reason is that we
419	 *	cannot make anything clever here.
420	 *
421	 *	We are not end-node, so that if packet contains
422	 *	AH/ESP, we cannot make anything.
423	 *	Defragmentation also would be mistake, RA packets
424	 *	cannot be fragmented, because there is no warranty
425	 *	that different fragments will go along one path. --ANK
426	 */
427	if (opt->ra) {
428		u8 *ptr = skb_network_header(skb) + opt->ra;
429		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
430			return 0;
431	}
432
433	/*
434	 *	check and decrement ttl
435	 */
436	if (hdr->hop_limit <= 1) {
437		/* Force OUTPUT device used as source address */
438		skb->dev = dst->dev;
439		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
440		IP6_INC_STATS_BH(net,
441				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
442
443		kfree_skb(skb);
444		return -ETIMEDOUT;
445	}
446
447	if (net->ipv6.devconf_all->proxy_ndp &&
448	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
449		int proxied = ip6_forward_proxy_check(skb);
450		if (proxied > 0)
451			return ip6_input(skb);
452		else if (proxied < 0) {
453			IP6_INC_STATS(net, ip6_dst_idev(dst),
454				      IPSTATS_MIB_INDISCARDS);
455			goto drop;
456		}
457	}
458
459	if (!xfrm6_route_forward(skb)) {
460		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
461		goto drop;
462	}
463	dst = skb_dst(skb);
464
465	/* IPv6 specs say nothing about it, but it is clear that we cannot
466	   send redirects to source routed frames.
467	   We don't send redirects to frames decapsulated from IPsec.
468	 */
469	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
470	    !skb_sec_path(skb)) {
471		struct in6_addr *target = NULL;
472		struct rt6_info *rt;
473		struct neighbour *n = dst->neighbour;
474
475		/*
476		 *	incoming and outgoing devices are the same
477		 *	send a redirect.
478		 */
479
480		rt = (struct rt6_info *) dst;
481		if ((rt->rt6i_flags & RTF_GATEWAY))
482			target = (struct in6_addr*)&n->primary_key;
483		else
484			target = &hdr->daddr;
485
486		/* Limit redirects both by destination (here)
487		   and by source (inside ndisc_send_redirect)
488		 */
489		if (xrlim_allow(dst, 1*HZ))
490			ndisc_send_redirect(skb, n, target);
491	} else {
492		int addrtype = ipv6_addr_type(&hdr->saddr);
493
494		/* This check is security critical. */
495		if (addrtype == IPV6_ADDR_ANY ||
496		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
497			goto error;
498		if (addrtype & IPV6_ADDR_LINKLOCAL) {
499			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
500				    ICMPV6_NOT_NEIGHBOUR, 0);
501			goto error;
502		}
503#if 1 /* IPv6Ready- Test v6LC.1.1.10 Part C: Request sent from unspecified address
504       * RFC 2460: Internet Protocol, Version 6 (IPv6) Specification
505       */
506		if (addrtype == IPV6_ADDR_ANY) {
507		   //IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
508		   goto drop;
509      }
510#endif
511	}
512
513	mtu = dst_mtu(dst);
514	if (mtu < IPV6_MIN_MTU)
515		mtu = IPV6_MIN_MTU;
516
517	if (skb->len > mtu && !skb_is_gso(skb)) {
518		/* Again, force OUTPUT device used as source address */
519		skb->dev = dst->dev;
520		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
521		IP6_INC_STATS_BH(net,
522				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
523		IP6_INC_STATS_BH(net,
524				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
525		kfree_skb(skb);
526		return -EMSGSIZE;
527	}
528
529	if (skb_cow(skb, dst->dev->hard_header_len)) {
530		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
531		goto drop;
532	}
533
534	hdr = ipv6_hdr(skb);
535
536	/* Mangling hops number delayed to point after skb COW */
537
538	hdr->hop_limit--;
539
540	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
541	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
542		       ip6_forward_finish);
543
544error:
545	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
546drop:
547	kfree_skb(skb);
548	return -EINVAL;
549}
550
551static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
552{
553	to->pkt_type = from->pkt_type;
554	to->priority = from->priority;
555	to->protocol = from->protocol;
556	skb_dst_drop(to);
557	skb_dst_set(to, dst_clone(skb_dst(from)));
558	to->dev = from->dev;
559	to->mark = from->mark;
560
561#ifdef CONFIG_NET_SCHED
562	to->tc_index = from->tc_index;
563#endif
564	nf_copy(to, from);
565#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
566	defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
567	to->nf_trace = from->nf_trace;
568#endif
569	skb_copy_secmark(to, from);
570}
571
572int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
573{
574	u16 offset = sizeof(struct ipv6hdr);
575	struct ipv6_opt_hdr *exthdr =
576				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
577	unsigned int packet_len = skb->tail - skb->network_header;
578	int found_rhdr = 0;
579	*nexthdr = &ipv6_hdr(skb)->nexthdr;
580
581	while (offset + 1 <= packet_len) {
582
583		switch (**nexthdr) {
584
585		case NEXTHDR_HOP:
586			break;
587		case NEXTHDR_ROUTING:
588			found_rhdr = 1;
589			break;
590		case NEXTHDR_DEST:
591#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
592			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
593				break;
594#endif
595			if (found_rhdr)
596				return offset;
597			break;
598		default :
599			return offset;
600		}
601
602		offset += ipv6_optlen(exthdr);
603		*nexthdr = &exthdr->nexthdr;
604		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
605						 offset);
606	}
607
608	return offset;
609}
610
611static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
612{
613	struct sk_buff *frag;
614	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
615	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
616	struct ipv6hdr *tmp_hdr;
617	struct frag_hdr *fh;
618	unsigned int mtu, hlen, left, len;
619	__be32 frag_id = 0;
620	int ptr, offset = 0, err=0;
621	u8 *prevhdr, nexthdr = 0;
622	struct net *net = dev_net(skb_dst(skb)->dev);
623
624	hlen = ip6_find_1stfragopt(skb, &prevhdr);
625	nexthdr = *prevhdr;
626
627	mtu = ip6_skb_dst_mtu(skb);
628
629	/* We must not fragment if the socket is set to force MTU discovery
630	 * or if the skb it not generated by a local socket.
631	 */
632	if (!skb->local_df && skb->len > mtu) {
633		skb->dev = skb_dst(skb)->dev;
634		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
635		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
636			      IPSTATS_MIB_FRAGFAILS);
637		kfree_skb(skb);
638		return -EMSGSIZE;
639	}
640
641	if (np && np->frag_size < mtu) {
642		if (np->frag_size)
643			mtu = np->frag_size;
644	}
645	mtu -= hlen + sizeof(struct frag_hdr);
646
647	if (skb_has_frags(skb)) {
648		int first_len = skb_pagelen(skb);
649		struct sk_buff *frag2;
650
651		if (first_len - hlen > mtu ||
652		    ((first_len - hlen) & 7) ||
653		    skb_cloned(skb))
654			goto slow_path;
655
656		skb_walk_frags(skb, frag) {
657			/* Correct geometry. */
658			if (frag->len > mtu ||
659			    ((frag->len & 7) && frag->next) ||
660			    skb_headroom(frag) < hlen)
661				goto slow_path_clean;
662
663			/* Partially cloned skb? */
664			if (skb_shared(frag))
665				goto slow_path_clean;
666
667			BUG_ON(frag->sk);
668			if (skb->sk) {
669				frag->sk = skb->sk;
670				frag->destructor = sock_wfree;
671			}
672			skb->truesize -= frag->truesize;
673		}
674
675		err = 0;
676		offset = 0;
677		frag = skb_shinfo(skb)->frag_list;
678		skb_frag_list_init(skb);
679		/* BUILD HEADER */
680
681		*prevhdr = NEXTHDR_FRAGMENT;
682		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
683		if (!tmp_hdr) {
684			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
685				      IPSTATS_MIB_FRAGFAILS);
686			return -ENOMEM;
687		}
688
689		__skb_pull(skb, hlen);
690		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
691		__skb_push(skb, hlen);
692		skb_reset_network_header(skb);
693		memcpy(skb_network_header(skb), tmp_hdr, hlen);
694
695		ipv6_select_ident(fh);
696		fh->nexthdr = nexthdr;
697		fh->reserved = 0;
698		fh->frag_off = htons(IP6_MF);
699		frag_id = fh->identification;
700
701		first_len = skb_pagelen(skb);
702		skb->data_len = first_len - skb_headlen(skb);
703		skb->len = first_len;
704		ipv6_hdr(skb)->payload_len = htons(first_len -
705						   sizeof(struct ipv6hdr));
706
707		dst_hold(&rt->dst);
708
709		for (;;) {
710			/* Prepare header of the next frame,
711			 * before previous one went down. */
712			if (frag) {
713				frag->ip_summed = CHECKSUM_NONE;
714				skb_reset_transport_header(frag);
715				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
716				__skb_push(frag, hlen);
717				skb_reset_network_header(frag);
718				memcpy(skb_network_header(frag), tmp_hdr,
719				       hlen);
720				offset += skb->len - hlen - sizeof(struct frag_hdr);
721				fh->nexthdr = nexthdr;
722				fh->reserved = 0;
723				fh->frag_off = htons(offset);
724				if (frag->next != NULL)
725					fh->frag_off |= htons(IP6_MF);
726				fh->identification = frag_id;
727				ipv6_hdr(frag)->payload_len =
728						htons(frag->len -
729						      sizeof(struct ipv6hdr));
730				ip6_copy_metadata(frag, skb);
731			}
732
733			err = output(skb);
734			if(!err)
735				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
736					      IPSTATS_MIB_FRAGCREATES);
737
738			if (err || !frag)
739				break;
740
741			skb = frag;
742			frag = skb->next;
743			skb->next = NULL;
744		}
745
746		kfree(tmp_hdr);
747
748		if (err == 0) {
749			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
750				      IPSTATS_MIB_FRAGOKS);
751			dst_release(&rt->dst);
752			return 0;
753		}
754
755		while (frag) {
756			skb = frag->next;
757			kfree_skb(frag);
758			frag = skb;
759		}
760
761		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
762			      IPSTATS_MIB_FRAGFAILS);
763		dst_release(&rt->dst);
764		return err;
765
766slow_path_clean:
767		skb_walk_frags(skb, frag2) {
768			if (frag2 == frag)
769				break;
770			frag2->sk = NULL;
771			frag2->destructor = NULL;
772			skb->truesize += frag2->truesize;
773		}
774	}
775
776slow_path:
777	left = skb->len - hlen;		/* Space per frame */
778	ptr = hlen;			/* Where to start from */
779
780	/*
781	 *	Fragment the datagram.
782	 */
783
784	*prevhdr = NEXTHDR_FRAGMENT;
785
786	/*
787	 *	Keep copying data until we run out.
788	 */
789	while(left > 0)	{
790		len = left;
791		/* IF: it doesn't fit, use 'mtu' - the data space left */
792		if (len > mtu)
793			len = mtu;
794		/* IF: we are not sending upto and including the packet end
795		   then align the next start on an eight byte boundary */
796		if (len < left)	{
797			len &= ~7;
798		}
799		/*
800		 *	Allocate buffer.
801		 */
802
803		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
804			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
805			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
806				      IPSTATS_MIB_FRAGFAILS);
807			err = -ENOMEM;
808			goto fail;
809		}
810
811		/*
812		 *	Set up data on packet
813		 */
814
815		ip6_copy_metadata(frag, skb);
816		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
817		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
818		skb_reset_network_header(frag);
819		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
820		frag->transport_header = (frag->network_header + hlen +
821					  sizeof(struct frag_hdr));
822
823		/*
824		 *	Charge the memory for the fragment to any owner
825		 *	it might possess
826		 */
827		if (skb->sk)
828			skb_set_owner_w(frag, skb->sk);
829
830		/*
831		 *	Copy the packet header into the new buffer.
832		 */
833		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
834
835		/*
836		 *	Build fragment header.
837		 */
838		fh->nexthdr = nexthdr;
839		fh->reserved = 0;
840		if (!frag_id) {
841			ipv6_select_ident(fh);
842			frag_id = fh->identification;
843		} else
844			fh->identification = frag_id;
845
846		/*
847		 *	Copy a block of the IP datagram.
848		 */
849		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
850			BUG();
851		left -= len;
852
853		fh->frag_off = htons(offset);
854		if (left > 0)
855			fh->frag_off |= htons(IP6_MF);
856		ipv6_hdr(frag)->payload_len = htons(frag->len -
857						    sizeof(struct ipv6hdr));
858
859		ptr += len;
860		offset += len;
861
862		/*
863		 *	Put this fragment into the sending queue.
864		 */
865		err = output(frag);
866		if (err)
867			goto fail;
868
869		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
870			      IPSTATS_MIB_FRAGCREATES);
871	}
872	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
873		      IPSTATS_MIB_FRAGOKS);
874	kfree_skb(skb);
875	return err;
876
877fail:
878	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
879		      IPSTATS_MIB_FRAGFAILS);
880	kfree_skb(skb);
881	return err;
882}
883
884static inline int ip6_rt_check(struct rt6key *rt_key,
885			       struct in6_addr *fl_addr,
886			       struct in6_addr *addr_cache)
887{
888	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
889		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
890}
891
892static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
893					  struct dst_entry *dst,
894					  struct flowi *fl)
895{
896	struct ipv6_pinfo *np = inet6_sk(sk);
897	struct rt6_info *rt = (struct rt6_info *)dst;
898
899	if (!dst)
900		goto out;
901
902	/* Yes, checking route validity in not connected
903	 * case is not very simple. Take into account,
904	 * that we do not support routing by source, TOS,
905	 * and MSG_DONTROUTE 		--ANK (980726)
906	 *
907	 * 1. ip6_rt_check(): If route was host route,
908	 *    check that cached destination is current.
909	 *    If it is network route, we still may
910	 *    check its validity using saved pointer
911	 *    to the last used address: daddr_cache.
912	 *    We do not want to save whole address now,
913	 *    (because main consumer of this service
914	 *    is tcp, which has not this problem),
915	 *    so that the last trick works only on connected
916	 *    sockets.
917	 * 2. oif also should be the same.
918	 */
919	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
920#ifdef CONFIG_IPV6_SUBTREES
921	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
922#endif
923	    (fl->oif && fl->oif != dst->dev->ifindex)) {
924		dst_release(dst);
925		dst = NULL;
926	}
927
928out:
929	return dst;
930}
931
932static int ip6_dst_lookup_tail(struct sock *sk,
933			       struct dst_entry **dst, struct flowi *fl)
934{
935	int err;
936	struct net *net = sock_net(sk);
937
938	if (*dst == NULL)
939		*dst = ip6_route_output(net, sk, fl);
940
941	if ((err = (*dst)->error))
942		goto out_err_release;
943
944	if (ipv6_addr_any(&fl->fl6_src)) {
945		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
946					 &fl->fl6_dst,
947					 sk ? inet6_sk(sk)->srcprefs : 0,
948					 &fl->fl6_src);
949		if (err)
950			goto out_err_release;
951	}
952
953#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
954	/*
955	 * Here if the dst entry we've looked up
956	 * has a neighbour entry that is in the INCOMPLETE
957	 * state and the src address from the flow is
958	 * marked as OPTIMISTIC, we release the found
959	 * dst entry and replace it instead with the
960	 * dst entry of the nexthop router
961	 */
962	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
963		struct inet6_ifaddr *ifp;
964		struct flowi fl_gw;
965		int redirect;
966
967		ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
968				      (*dst)->dev, 1);
969
970		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
971		if (ifp)
972			in6_ifa_put(ifp);
973
974		if (redirect) {
975			/*
976			 * We need to get the dst entry for the
977			 * default router instead
978			 */
979			dst_release(*dst);
980			memcpy(&fl_gw, fl, sizeof(struct flowi));
981			memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
982			*dst = ip6_route_output(net, sk, &fl_gw);
983			if ((err = (*dst)->error))
984				goto out_err_release;
985		}
986	}
987#endif
988
989	return 0;
990
991out_err_release:
992	if (err == -ENETUNREACH)
993		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
994	dst_release(*dst);
995	*dst = NULL;
996	return err;
997}
998
999/**
1000 *	ip6_dst_lookup - perform route lookup on flow
1001 *	@sk: socket which provides route info
1002 *	@dst: pointer to dst_entry * for result
1003 *	@fl: flow to lookup
1004 *
1005 *	This function performs a route lookup on the given flow.
1006 *
1007 *	It returns zero on success, or a standard errno code on error.
1008 */
1009int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1010{
1011	*dst = NULL;
1012	return ip6_dst_lookup_tail(sk, dst, fl);
1013}
1014EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1015
1016/**
1017 *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
1018 *	@sk: socket which provides the dst cache and route info
1019 *	@dst: pointer to dst_entry * for result
1020 *	@fl: flow to lookup
1021 *
1022 *	This function performs a route lookup on the given flow with the
1023 *	possibility of using the cached route in the socket if it is valid.
1024 *	It will take the socket dst lock when operating on the dst cache.
1025 *	As a result, this function can only be used in process context.
1026 *
1027 *	It returns zero on success, or a standard errno code on error.
1028 */
1029int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1030{
1031	*dst = NULL;
1032	if (sk) {
1033		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1034		*dst = ip6_sk_dst_check(sk, *dst, fl);
1035	}
1036
1037	return ip6_dst_lookup_tail(sk, dst, fl);
1038}
1039EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1040
1041static inline int ip6_ufo_append_data(struct sock *sk,
1042			int getfrag(void *from, char *to, int offset, int len,
1043			int odd, struct sk_buff *skb),
1044			void *from, int length, int hh_len, int fragheaderlen,
1045			int transhdrlen, int mtu,unsigned int flags)
1046
1047{
1048	struct sk_buff *skb;
1049	int err;
1050
1051	/* There is support for UDP large send offload by network
1052	 * device, so create one single skb packet containing complete
1053	 * udp datagram
1054	 */
1055	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1056		skb = sock_alloc_send_skb(sk,
1057			hh_len + fragheaderlen + transhdrlen + 20,
1058			(flags & MSG_DONTWAIT), &err);
1059		if (skb == NULL)
1060			return -ENOMEM;
1061
1062		/* reserve space for Hardware header */
1063		skb_reserve(skb, hh_len);
1064
1065		/* create space for UDP/IP header */
1066		skb_put(skb,fragheaderlen + transhdrlen);
1067
1068		/* initialize network header pointer */
1069		skb_reset_network_header(skb);
1070
1071		/* initialize protocol header pointer */
1072		skb->transport_header = skb->network_header + fragheaderlen;
1073
1074		skb->ip_summed = CHECKSUM_PARTIAL;
1075		skb->csum = 0;
1076		sk->sk_sndmsg_off = 0;
1077	}
1078
1079	err = skb_append_datato_frags(sk,skb, getfrag, from,
1080				      (length - transhdrlen));
1081	if (!err) {
1082		struct frag_hdr fhdr;
1083
1084		/* Specify the length of each IPv6 datagram fragment.
1085		 * It has to be a multiple of 8.
1086		 */
1087		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1088					     sizeof(struct frag_hdr)) & ~7;
1089		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1090		ipv6_select_ident(&fhdr);
1091		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1092		__skb_queue_tail(&sk->sk_write_queue, skb);
1093
1094		return 0;
1095	}
1096	/* There is not enough support do UPD LSO,
1097	 * so follow normal path
1098	 */
1099	kfree_skb(skb);
1100
1101	return err;
1102}
1103
1104static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1105					       gfp_t gfp)
1106{
1107	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1108}
1109
1110static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1111						gfp_t gfp)
1112{
1113	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1114}
1115
1116int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1117	int offset, int len, int odd, struct sk_buff *skb),
1118	void *from, int length, int transhdrlen,
1119	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1120	struct rt6_info *rt, unsigned int flags, int dontfrag)
1121{
1122	struct inet_sock *inet = inet_sk(sk);
1123	struct ipv6_pinfo *np = inet6_sk(sk);
1124	struct sk_buff *skb;
1125	unsigned int maxfraglen, fragheaderlen;
1126	int exthdrlen;
1127	int hh_len;
1128	int mtu;
1129	int copy;
1130	int err;
1131	int offset = 0;
1132	int csummode = CHECKSUM_NONE;
1133
1134	if (flags&MSG_PROBE)
1135		return 0;
1136	if (skb_queue_empty(&sk->sk_write_queue)) {
1137		/*
1138		 * setup for corking
1139		 */
1140		if (opt) {
1141			if (WARN_ON(np->cork.opt))
1142				return -EINVAL;
1143
1144			np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1145			if (unlikely(np->cork.opt == NULL))
1146				return -ENOBUFS;
1147
1148			np->cork.opt->tot_len = opt->tot_len;
1149			np->cork.opt->opt_flen = opt->opt_flen;
1150			np->cork.opt->opt_nflen = opt->opt_nflen;
1151
1152			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1153							    sk->sk_allocation);
1154			if (opt->dst0opt && !np->cork.opt->dst0opt)
1155				return -ENOBUFS;
1156
1157			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1158							    sk->sk_allocation);
1159			if (opt->dst1opt && !np->cork.opt->dst1opt)
1160				return -ENOBUFS;
1161
1162			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1163							   sk->sk_allocation);
1164			if (opt->hopopt && !np->cork.opt->hopopt)
1165				return -ENOBUFS;
1166
1167			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1168							    sk->sk_allocation);
1169			if (opt->srcrt && !np->cork.opt->srcrt)
1170				return -ENOBUFS;
1171
1172			/* need source address above miyazawa*/
1173		}
1174		dst_hold(&rt->dst);
1175		inet->cork.dst = &rt->dst;
1176		inet->cork.fl = *fl;
1177		np->cork.hop_limit = hlimit;
1178		np->cork.tclass = tclass;
1179		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1180		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1181		if (np->frag_size < mtu) {
1182			if (np->frag_size)
1183				mtu = np->frag_size;
1184		}
1185		inet->cork.fragsize = mtu;
1186		if (dst_allfrag(rt->dst.path))
1187			inet->cork.flags |= IPCORK_ALLFRAG;
1188		inet->cork.length = 0;
1189		sk->sk_sndmsg_page = NULL;
1190		sk->sk_sndmsg_off = 0;
1191		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1192			    rt->rt6i_nfheader_len;
1193		length += exthdrlen;
1194		transhdrlen += exthdrlen;
1195	} else {
1196		rt = (struct rt6_info *)inet->cork.dst;
1197		fl = &inet->cork.fl;
1198		opt = np->cork.opt;
1199		transhdrlen = 0;
1200		exthdrlen = 0;
1201		mtu = inet->cork.fragsize;
1202	}
1203
1204	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1205
1206	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1207			(opt ? opt->opt_nflen : 0);
1208	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1209
1210	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1211		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1212			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1213			return -EMSGSIZE;
1214		}
1215	}
1216
1217
1218	inet->cork.length += length;
1219	if (length > mtu) {
1220		int proto = sk->sk_protocol;
1221		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1222			ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1223			return -EMSGSIZE;
1224		}
1225
1226		if (proto == IPPROTO_UDP &&
1227		    (rt->dst.dev->features & NETIF_F_UFO)) {
1228
1229			err = ip6_ufo_append_data(sk, getfrag, from, length,
1230						  hh_len, fragheaderlen,
1231						  transhdrlen, mtu, flags);
1232			if (err)
1233				goto error;
1234			return 0;
1235		}
1236	}
1237
1238	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1239		goto alloc_new_skb;
1240
1241	while (length > 0) {
1242		/* Check if the remaining data fits into current packet. */
1243		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1244		if (copy < length)
1245			copy = maxfraglen - skb->len;
1246
1247		if (copy <= 0) {
1248			char *data;
1249			unsigned int datalen;
1250			unsigned int fraglen;
1251			unsigned int fraggap;
1252			unsigned int alloclen;
1253			struct sk_buff *skb_prev;
1254alloc_new_skb:
1255			skb_prev = skb;
1256
1257			/* There's no room in the current skb */
1258			if (skb_prev)
1259				fraggap = skb_prev->len - maxfraglen;
1260			else
1261				fraggap = 0;
1262
1263			/*
1264			 * If remaining data exceeds the mtu,
1265			 * we know we need more fragment(s).
1266			 */
1267			datalen = length + fraggap;
1268			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1269				datalen = maxfraglen - fragheaderlen;
1270
1271			fraglen = datalen + fragheaderlen;
1272			if ((flags & MSG_MORE) &&
1273			    !(rt->dst.dev->features&NETIF_F_SG))
1274				alloclen = mtu;
1275			else
1276				alloclen = datalen + fragheaderlen;
1277
1278			/*
1279			 * The last fragment gets additional space at tail.
1280			 * Note: we overallocate on fragments with MSG_MODE
1281			 * because we have no idea if we're the last one.
1282			 */
1283			if (datalen == length + fraggap)
1284				alloclen += rt->dst.trailer_len;
1285
1286			/*
1287			 * We just reserve space for fragment header.
1288			 * Note: this may be overallocation if the message
1289			 * (without MSG_MORE) fits into the MTU.
1290			 */
1291			alloclen += sizeof(struct frag_hdr);
1292
1293			if (transhdrlen) {
1294				skb = sock_alloc_send_skb(sk,
1295						alloclen + hh_len,
1296						(flags & MSG_DONTWAIT), &err);
1297			} else {
1298				skb = NULL;
1299				if (atomic_read(&sk->sk_wmem_alloc) <=
1300				    2 * sk->sk_sndbuf)
1301					skb = sock_wmalloc(sk,
1302							   alloclen + hh_len, 1,
1303							   sk->sk_allocation);
1304				if (unlikely(skb == NULL))
1305					err = -ENOBUFS;
1306			}
1307			if (skb == NULL)
1308				goto error;
1309			/*
1310			 *	Fill in the control structures
1311			 */
1312			skb->ip_summed = csummode;
1313			skb->csum = 0;
1314			/* reserve for fragmentation */
1315			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1316
1317			/*
1318			 *	Find where to start putting bytes
1319			 */
1320			data = skb_put(skb, fraglen);
1321			skb_set_network_header(skb, exthdrlen);
1322			data += fragheaderlen;
1323			skb->transport_header = (skb->network_header +
1324						 fragheaderlen);
1325			if (fraggap) {
1326				skb->csum = skb_copy_and_csum_bits(
1327					skb_prev, maxfraglen,
1328					data + transhdrlen, fraggap, 0);
1329				skb_prev->csum = csum_sub(skb_prev->csum,
1330							  skb->csum);
1331				data += fraggap;
1332				pskb_trim_unique(skb_prev, maxfraglen);
1333			}
1334			copy = datalen - transhdrlen - fraggap;
1335			if (copy < 0) {
1336				err = -EINVAL;
1337				kfree_skb(skb);
1338				goto error;
1339			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1340				err = -EFAULT;
1341				kfree_skb(skb);
1342				goto error;
1343			}
1344
1345			offset += copy;
1346			length -= datalen - fraggap;
1347			transhdrlen = 0;
1348			exthdrlen = 0;
1349			csummode = CHECKSUM_NONE;
1350
1351			/*
1352			 * Put the packet on the pending queue
1353			 */
1354			__skb_queue_tail(&sk->sk_write_queue, skb);
1355			continue;
1356		}
1357
1358		if (copy > length)
1359			copy = length;
1360
1361		if (!(rt->dst.dev->features&NETIF_F_SG)) {
1362			unsigned int off;
1363
1364			off = skb->len;
1365			if (getfrag(from, skb_put(skb, copy),
1366						offset, copy, off, skb) < 0) {
1367				__skb_trim(skb, off);
1368				err = -EFAULT;
1369				goto error;
1370			}
1371		} else {
1372			int i = skb_shinfo(skb)->nr_frags;
1373			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1374			struct page *page = sk->sk_sndmsg_page;
1375			int off = sk->sk_sndmsg_off;
1376			unsigned int left;
1377
1378			if (page && (left = PAGE_SIZE - off) > 0) {
1379				if (copy >= left)
1380					copy = left;
1381				if (page != frag->page) {
1382					if (i == MAX_SKB_FRAGS) {
1383						err = -EMSGSIZE;
1384						goto error;
1385					}
1386					get_page(page);
1387					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1388					frag = &skb_shinfo(skb)->frags[i];
1389				}
1390			} else if(i < MAX_SKB_FRAGS) {
1391				if (copy > PAGE_SIZE)
1392					copy = PAGE_SIZE;
1393				page = alloc_pages(sk->sk_allocation, 0);
1394				if (page == NULL) {
1395					err = -ENOMEM;
1396					goto error;
1397				}
1398				sk->sk_sndmsg_page = page;
1399				sk->sk_sndmsg_off = 0;
1400
1401				skb_fill_page_desc(skb, i, page, 0, 0);
1402				frag = &skb_shinfo(skb)->frags[i];
1403			} else {
1404				err = -EMSGSIZE;
1405				goto error;
1406			}
1407			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1408				err = -EFAULT;
1409				goto error;
1410			}
1411			sk->sk_sndmsg_off += copy;
1412			frag->size += copy;
1413			skb->len += copy;
1414			skb->data_len += copy;
1415			skb->truesize += copy;
1416			atomic_add(copy, &sk->sk_wmem_alloc);
1417		}
1418		offset += copy;
1419		length -= copy;
1420	}
1421	return 0;
1422error:
1423	inet->cork.length -= length;
1424	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1425	return err;
1426}
1427
1428static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1429{
1430	if (np->cork.opt) {
1431		kfree(np->cork.opt->dst0opt);
1432		kfree(np->cork.opt->dst1opt);
1433		kfree(np->cork.opt->hopopt);
1434		kfree(np->cork.opt->srcrt);
1435		kfree(np->cork.opt);
1436		np->cork.opt = NULL;
1437	}
1438
1439	if (inet->cork.dst) {
1440		dst_release(inet->cork.dst);
1441		inet->cork.dst = NULL;
1442		inet->cork.flags &= ~IPCORK_ALLFRAG;
1443	}
1444	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1445}
1446
1447int ip6_push_pending_frames(struct sock *sk)
1448{
1449	struct sk_buff *skb, *tmp_skb;
1450	struct sk_buff **tail_skb;
1451	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1452	struct inet_sock *inet = inet_sk(sk);
1453	struct ipv6_pinfo *np = inet6_sk(sk);
1454	struct net *net = sock_net(sk);
1455	struct ipv6hdr *hdr;
1456	struct ipv6_txoptions *opt = np->cork.opt;
1457	struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1458	struct flowi *fl = &inet->cork.fl;
1459	unsigned char proto = fl->proto;
1460	int err = 0;
1461
1462	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1463		goto out;
1464	tail_skb = &(skb_shinfo(skb)->frag_list);
1465
1466	/* move skb->data to ip header from ext header */
1467	if (skb->data < skb_network_header(skb))
1468		__skb_pull(skb, skb_network_offset(skb));
1469	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1470		__skb_pull(tmp_skb, skb_network_header_len(skb));
1471		*tail_skb = tmp_skb;
1472		tail_skb = &(tmp_skb->next);
1473		skb->len += tmp_skb->len;
1474		skb->data_len += tmp_skb->len;
1475		skb->truesize += tmp_skb->truesize;
1476		tmp_skb->destructor = NULL;
1477		tmp_skb->sk = NULL;
1478	}
1479
1480	/* Allow local fragmentation. */
1481	if (np->pmtudisc < IPV6_PMTUDISC_DO)
1482		skb->local_df = 1;
1483
1484	ipv6_addr_copy(final_dst, &fl->fl6_dst);
1485	__skb_pull(skb, skb_network_header_len(skb));
1486	if (opt && opt->opt_flen)
1487		ipv6_push_frag_opts(skb, opt, &proto);
1488	if (opt && opt->opt_nflen)
1489		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1490
1491	skb_push(skb, sizeof(struct ipv6hdr));
1492	skb_reset_network_header(skb);
1493	hdr = ipv6_hdr(skb);
1494
1495	*(__be32*)hdr = fl->fl6_flowlabel |
1496		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1497
1498	hdr->hop_limit = np->cork.hop_limit;
1499	hdr->nexthdr = proto;
1500	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1501	ipv6_addr_copy(&hdr->daddr, final_dst);
1502
1503	skb->priority = sk->sk_priority;
1504	skb->mark = sk->sk_mark;
1505
1506	skb_dst_set(skb, dst_clone(&rt->dst));
1507	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1508	if (proto == IPPROTO_ICMPV6) {
1509		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1510
1511		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1512		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1513	}
1514
1515	err = ip6_local_out(skb);
1516	if (err) {
1517		if (err > 0)
1518			err = net_xmit_errno(err);
1519		if (err)
1520			goto error;
1521	}
1522
1523out:
1524	ip6_cork_release(inet, np);
1525	return err;
1526error:
1527	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1528	goto out;
1529}
1530
1531void ip6_flush_pending_frames(struct sock *sk)
1532{
1533	struct sk_buff *skb;
1534
1535	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1536		if (skb_dst(skb))
1537			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1538				      IPSTATS_MIB_OUTDISCARDS);
1539		kfree_skb(skb);
1540	}
1541
1542	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1543}
1544