1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		The Internet Protocol (IP) output module.
7 *
8 * Version:	$Id: ip_output.c,v 1.1.1.1 2007/08/03 18:53:51 Exp $
9 *
10 * Authors:	Ross Biro
11 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *		Donald Becker, <becker@super.org>
13 *		Alan Cox, <Alan.Cox@linux.org>
14 *		Richard Underwood
15 *		Stefan Becker, <stefanb@yello.ping.de>
16 *		Jorge Cwik, <jorge@laser.satlink.net>
17 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *		Hirokazu Takahashi, <taka@valinux.co.jp>
19 *
20 *	See ip_input.c for original log
21 *
22 *	Fixes:
23 *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
24 *		Mike Kilburn	:	htons() missing in ip_build_xmit.
25 *		Bradford Johnson:	Fix faulty handling of some frames when
26 *					no route is found.
27 *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
28 *					(in case if packet not accepted by
29 *					output firewall rules)
30 *		Mike McLagan	:	Routing by source
31 *		Alexey Kuznetsov:	use new route cache
32 *		Andi Kleen:		Fix broken PMTU recovery and remove
33 *					some redundant tests.
34 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
35 *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
36 *		Andi Kleen	:	Split fast and slow ip_build_xmit path
37 *					for decreased register pressure on x86
38 *					and more readibility.
39 *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
40 *					silently drop skb instead of failing with -EPERM.
41 *		Detlev Wengorz	:	Copy protocol for fragments.
42 *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
43 *					datagrams.
44 *		Hirokazu Takahashi:	sendfile() on UDP works now.
45 */
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/mm.h>
53#include <linux/string.h>
54#include <linux/errno.h>
55#include <linux/highmem.h>
56
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/in.h>
60#include <linux/inet.h>
61#include <linux/netdevice.h>
62#include <linux/etherdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/stat.h>
65#include <linux/init.h>
66
67#include <net/snmp.h>
68#include <net/ip.h>
69#include <net/protocol.h>
70#include <net/route.h>
71#include <net/xfrm.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <net/arp.h>
75#include <net/icmp.h>
76#include <net/checksum.h>
77#include <net/inetpeer.h>
78#include <net/checksum.h>
79#include <linux/igmp.h>
80#include <linux/netfilter_ipv4.h>
81#include <linux/netfilter_bridge.h>
82#include <linux/mroute.h>
83#include <linux/netlink.h>
84#include <linux/tcp.h>
85
86int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
87
88/* Generate a checksum for an outgoing IP datagram. */
89__inline__ void ip_send_check(struct iphdr *iph)
90{
91	iph->check = 0;
92	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
93}
94
95/* dev_loopback_xmit for use with netfilter. */
96static int ip_dev_loopback_xmit(struct sk_buff *newskb)
97{
98	skb_reset_mac_header(newskb);
99	__skb_pull(newskb, skb_network_offset(newskb));
100	newskb->pkt_type = PACKET_LOOPBACK;
101	newskb->ip_summed = CHECKSUM_UNNECESSARY;
102	BUG_TRAP(newskb->dst);
103	netif_rx(newskb);
104	return 0;
105}
106
107static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
108{
109	int ttl = inet->uc_ttl;
110
111	if (ttl < 0)
112		ttl = dst_metric(dst, RTAX_HOPLIMIT);
113	return ttl;
114}
115
116/*
117 *		Add an ip header to a skbuff and send it out.
118 *
119 */
120int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
121			  __be32 saddr, __be32 daddr, struct ip_options *opt)
122{
123	struct inet_sock *inet = inet_sk(sk);
124	struct rtable *rt = (struct rtable *)skb->dst;
125	struct iphdr *iph;
126
127	/* Build the IP header. */
128	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
129	skb_reset_network_header(skb);
130	iph = ip_hdr(skb);
131	iph->version  = 4;
132	iph->ihl      = 5;
133	iph->tos      = inet->tos;
134	if (ip_dont_fragment(sk, &rt->u.dst))
135		iph->frag_off = htons(IP_DF);
136	else
137		iph->frag_off = 0;
138	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
139	iph->daddr    = rt->rt_dst;
140	iph->saddr    = rt->rt_src;
141	iph->protocol = sk->sk_protocol;
142	iph->tot_len  = htons(skb->len);
143	ip_select_ident(iph, &rt->u.dst, sk);
144
145	if (opt && opt->optlen) {
146		iph->ihl += opt->optlen>>2;
147		ip_options_build(skb, opt, daddr, rt, 0);
148	}
149	ip_send_check(iph);
150
151	skb->priority = sk->sk_priority;
152
153	/* Send it out. */
154	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
155		       dst_output);
156}
157
158EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
159
160static inline int ip_finish_output2(struct sk_buff *skb)
161{
162	struct dst_entry *dst = skb->dst;
163	struct rtable *rt = (struct rtable *)dst;
164	struct net_device *dev = dst->dev;
165	int hh_len = LL_RESERVED_SPACE(dev);
166
167	if (rt->rt_type == RTN_MULTICAST)
168		IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
169	else if (rt->rt_type == RTN_BROADCAST)
170		IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
171
172	/* Be paranoid, rather than too clever. */
173	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
174		struct sk_buff *skb2;
175
176		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
177		if (skb2 == NULL) {
178			kfree_skb(skb);
179			return -ENOMEM;
180		}
181		if (skb->sk)
182			skb_set_owner_w(skb2, skb->sk);
183		kfree_skb(skb);
184		skb = skb2;
185	}
186
187	if (dst->hh)
188		return neigh_hh_output(dst->hh, skb);
189	else if (dst->neighbour)
190		return dst->neighbour->output(skb);
191
192	if (net_ratelimit())
193		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
194	kfree_skb(skb);
195	return -EINVAL;
196}
197
198static inline int ip_skb_dst_mtu(struct sk_buff *skb)
199{
200	struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
201
202	return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
203	       skb->dst->dev->mtu : dst_mtu(skb->dst);
204}
205
206/* Fxcn port-S Wins, 0714-09 */
207int (*br_post_insert_hook)(struct sk_buff *skb);//Foxconn add , Lewis Min, for OpenDNS, 03/12/2009
208/* Fxcn port-E Wins, 0714-09 */
209static inline int ip_finish_output(struct sk_buff *skb)
210{
211#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
212	/* Policy lookup after SNAT yielded a new policy */
213	if (skb->dst->xfrm != NULL) {
214		IPCB(skb)->flags |= IPSKB_REROUTED;
215		return dst_output(skb);
216	}
217#endif
218
219/* Fxcn port-S Wins, 0714-09 */
220    //Foxconn add start, Lewis Min, for OpenDNS, 03/12/2009
221	if(NULL!=br_post_insert_hook)
222	{
223        int ret;
224
225		ret = br_post_insert_hook(skb);
226		if((ret==NF_DROP)||(ret==NF_STOLEN))
227		{
228//			read_unlock(&br->lock);
229//            spin_unlock_bh(&br->lock);
230			return;
231		}
232	}
233    //Foxconn add end, Lewis Min, for OpenDNS, 03/12/2009
234/* Fxcn port-E Wins, 0714-09 */
235
236	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
237		return ip_fragment(skb, ip_finish_output2);
238	else
239		return ip_finish_output2(skb);
240}
241
242int ip_mc_output(struct sk_buff *skb)
243{
244	struct sock *sk = skb->sk;
245	struct rtable *rt = (struct rtable*)skb->dst;
246	struct net_device *dev = rt->u.dst.dev;
247
248	/*
249	 *	If the indicated interface is up and running, send the packet.
250	 */
251	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
252
253	skb->dev = dev;
254	skb->protocol = htons(ETH_P_IP);
255
256	/*
257	 *	Multicasts are looped back for other local users
258	 */
259
260	if (rt->rt_flags&RTCF_MULTICAST) {
261		if ((!sk || inet_sk(sk)->mc_loop)
262#ifdef CONFIG_IP_MROUTE
263		/* Small optimization: do not loopback not local frames,
264		   which returned after forwarding; they will be  dropped
265		   by ip_mr_input in any case.
266		   Note, that local frames are looped back to be delivered
267		   to local recipients.
268
269		   This check is duplicated in ip_mr_input at the moment.
270		 */
271		    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
272#endif
273		) {
274			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
275			if (newskb)
276				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
277					newskb->dev,
278					ip_dev_loopback_xmit);
279		}
280
281		/* Multicasts with ttl 0 must not go beyond the host */
282
283		if (ip_hdr(skb)->ttl == 0) {
284			kfree_skb(skb);
285			return 0;
286		}
287	}
288
289	if (rt->rt_flags&RTCF_BROADCAST) {
290		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
291		if (newskb)
292			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
293				newskb->dev, ip_dev_loopback_xmit);
294	}
295
296	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
297			    ip_finish_output,
298			    !(IPCB(skb)->flags & IPSKB_REROUTED));
299}
300
301int ip_output(struct sk_buff *skb)
302{
303	struct net_device *dev = skb->dst->dev;
304
305	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
306
307	skb->dev = dev;
308	skb->protocol = htons(ETH_P_IP);
309
310	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
311			    ip_finish_output,
312			    !(IPCB(skb)->flags & IPSKB_REROUTED));
313}
314
315int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
316{
317	struct sock *sk = skb->sk;
318	struct inet_sock *inet = inet_sk(sk);
319	struct ip_options *opt = inet->opt;
320	struct rtable *rt;
321	struct iphdr *iph;
322
323	/* Skip all of this if the packet is already routed,
324	 * f.e. by something like SCTP.
325	 */
326	rt = (struct rtable *) skb->dst;
327	if (rt != NULL)
328		goto packet_routed;
329
330	/* Make sure we can route this packet. */
331	rt = (struct rtable *)__sk_dst_check(sk, 0);
332	if (rt == NULL) {
333		__be32 daddr;
334
335		/* Use correct destination address if we have options. */
336		daddr = inet->daddr;
337		if(opt && opt->srr)
338			daddr = opt->faddr;
339
340		{
341			struct flowi fl = { .oif = sk->sk_bound_dev_if,
342					    .nl_u = { .ip4_u =
343						      { .daddr = daddr,
344							.saddr = inet->saddr,
345							.tos = RT_CONN_FLAGS(sk) } },
346					    .proto = sk->sk_protocol,
347					    .uli_u = { .ports =
348						       { .sport = inet->sport,
349							 .dport = inet->dport } } };
350
351			/* If this fails, retransmit mechanism of transport layer will
352			 * keep trying until route appears or the connection times
353			 * itself out.
354			 */
355			security_sk_classify_flow(sk, &fl);
356			if (ip_route_output_flow(&rt, &fl, sk, 0))
357				goto no_route;
358		}
359		sk_setup_caps(sk, &rt->u.dst);
360	}
361	skb->dst = dst_clone(&rt->u.dst);
362
363packet_routed:
364	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
365		goto no_route;
366
367	/* OK, we know where to send it, allocate and build IP header. */
368	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
369	skb_reset_network_header(skb);
370	iph = ip_hdr(skb);
371	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
372	iph->tot_len = htons(skb->len);
373	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
374		iph->frag_off = htons(IP_DF);
375	else
376		iph->frag_off = 0;
377	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
378	iph->protocol = sk->sk_protocol;
379	iph->saddr    = rt->rt_src;
380	iph->daddr    = rt->rt_dst;
381	/* Transport layer set skb->h.foo itself. */
382
383	if (opt && opt->optlen) {
384		iph->ihl += opt->optlen >> 2;
385		ip_options_build(skb, opt, inet->daddr, rt, 0);
386	}
387
388	ip_select_ident_more(iph, &rt->u.dst, sk,
389			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
390
391	/* Add an IP checksum. */
392	ip_send_check(iph);
393
394	skb->priority = sk->sk_priority;
395
396	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
397		       dst_output);
398
399no_route:
400	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
401	kfree_skb(skb);
402	return -EHOSTUNREACH;
403}
404
405
406static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
407{
408	to->pkt_type = from->pkt_type;
409	to->priority = from->priority;
410	to->protocol = from->protocol;
411	dst_release(to->dst);
412	to->dst = dst_clone(from->dst);
413	to->dev = from->dev;
414	to->mark = from->mark;
415
416	/* Copy the flags to each fragment. */
417	IPCB(to)->flags = IPCB(from)->flags;
418
419#ifdef CONFIG_NET_SCHED
420	to->tc_index = from->tc_index;
421#endif
422	nf_copy(to, from);
423#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
424	to->ipvs_property = from->ipvs_property;
425#endif
426	skb_copy_secmark(to, from);
427}
428
429/*
430 *	This IP datagram is too large to be sent in one piece.  Break it up into
431 *	smaller pieces (each of size equal to IP header plus
432 *	a block of the data of the original IP data part) that will yet fit in a
433 *	single device frame, and queue such a frame for sending.
434 */
435
436int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
437{
438	struct iphdr *iph;
439	int raw = 0;
440	int ptr;
441	struct net_device *dev;
442	struct sk_buff *skb2;
443	unsigned int mtu, hlen, left, len, ll_rs, pad;
444	int offset;
445	__be16 not_last_frag;
446	struct rtable *rt = (struct rtable*)skb->dst;
447	int err = 0;
448    int first_frag = 1;     // Foxconn added pling 04/29/2010
449
450	dev = rt->u.dst.dev;
451
452	/*
453	 *	Point into the IP datagram header.
454	 */
455
456	iph = ip_hdr(skb);
457
458	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
459		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
460		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
461			  htonl(ip_skb_dst_mtu(skb)));
462		kfree_skb(skb);
463		return -EMSGSIZE;
464	}
465
466	/*
467	 *	Setup starting values.
468	 */
469
470	hlen = iph->ihl * 4;
471	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
472	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
473
474	/* When frag_list is given, use it. First, check its validity:
475	 * some transformers could create wrong frag_list or break existing
476	 * one, it is not prohibited. In this case fall back to copying.
477	 *
478	 * LATER: this step can be merged to real generation of fragments,
479	 * we can switch to copy when see the first bad fragment.
480	 */
481	if (skb_shinfo(skb)->frag_list) {
482		struct sk_buff *frag;
483		int first_len = skb_pagelen(skb);
484
485		if (first_len - hlen > mtu ||
486		    ((first_len - hlen) & 7) ||
487		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
488		    skb_cloned(skb))
489			goto slow_path;
490
491		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
492			/* Correct geometry. */
493			if (frag->len > mtu ||
494			    ((frag->len & 7) && frag->next) ||
495			    skb_headroom(frag) < hlen)
496			    goto slow_path;
497
498			/* Partially cloned skb? */
499			if (skb_shared(frag))
500				goto slow_path;
501
502			BUG_ON(frag->sk);
503			if (skb->sk) {
504				sock_hold(skb->sk);
505				frag->sk = skb->sk;
506				frag->destructor = sock_wfree;
507				skb->truesize -= frag->truesize;
508			}
509		}
510
511		/* Everything is OK. Generate! */
512
513		err = 0;
514		offset = 0;
515		frag = skb_shinfo(skb)->frag_list;
516		skb_shinfo(skb)->frag_list = NULL;
517		skb->data_len = first_len - skb_headlen(skb);
518		skb->len = first_len;
519		iph->tot_len = htons(first_len);
520		iph->frag_off = htons(IP_MF);
521		ip_send_check(iph);
522
523		for (;;) {
524			/* Prepare header of the next frame,
525			 * before previous one went down. */
526			if (frag) {
527				frag->ip_summed = CHECKSUM_NONE;
528				skb_reset_transport_header(frag);
529				__skb_push(frag, hlen);
530				skb_reset_network_header(frag);
531				memcpy(skb_network_header(frag), iph, hlen);
532				iph = ip_hdr(frag);
533				iph->tot_len = htons(frag->len);
534				ip_copy_metadata(frag, skb);
535				if (offset == 0)
536					ip_options_fragment(frag);
537				offset += skb->len - hlen;
538				iph->frag_off = htons(offset>>3);
539				if (frag->next != NULL)
540					iph->frag_off |= htons(IP_MF);
541				/* Ready, complete checksum */
542				ip_send_check(iph);
543			}
544
545			err = output(skb);
546
547			if (!err)
548				IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
549			if (err || !frag)
550				break;
551
552			skb = frag;
553			frag = skb->next;
554			skb->next = NULL;
555		}
556
557		if (err == 0) {
558			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
559			return 0;
560		}
561
562		while (frag) {
563			skb = frag->next;
564			kfree_skb(frag);
565			frag = skb;
566		}
567		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
568		return err;
569	}
570
571slow_path:
572	left = skb->len - hlen;		/* Space per frame */
573	ptr = raw + hlen;		/* Where to start from */
574
575	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
576	 * we need to make room for the encapsulating header
577	 */
578	pad = nf_bridge_pad(skb);
579	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
580	mtu -= pad;
581
582	/*
583	 *	Fragment the datagram.
584	 */
585
586	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
587	not_last_frag = iph->frag_off & htons(IP_MF);
588
589	/*
590	 *	Keep copying data until we run out.
591	 */
592
593	while (left > 0) {
594		len = left;
595		/* IF: it doesn't fit, use 'mtu' - the data space left */
596		if (len > mtu)
597			len = mtu;
598		/* IF: we are not sending upto and including the packet end
599		   then align the next start on an eight byte boundary */
600		if (len < left)	{
601			len &= ~7;
602		}
603		/*
604		 *	Allocate buffer.
605		 */
606
607		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
608			NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
609			err = -ENOMEM;
610			goto fail;
611		}
612
613		/*
614		 *	Set up data on packet
615		 */
616
617		ip_copy_metadata(skb2, skb);
618		skb_reserve(skb2, ll_rs);
619		skb_put(skb2, len + hlen);
620		skb_reset_network_header(skb2);
621		skb2->transport_header = skb2->network_header + hlen;
622
623		/*
624		 *	Charge the memory for the fragment to any owner
625		 *	it might possess
626		 */
627
628		if (skb->sk)
629			skb_set_owner_w(skb2, skb->sk);
630
631		/*
632		 *	Copy the packet header into the new buffer.
633		 */
634
635		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
636
637		/*
638		 *	Copy a block of the IP datagram.
639		 */
640		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
641			BUG();
642		left -= len;
643
644		/*
645		 *	Fill in the new header fields.
646		 */
647		iph = ip_hdr(skb2);
648		iph->frag_off = htons((offset >> 3));
649
650        /* Foxconn added start pling 04/29/2010 */
651        /* If the packet is not from IP stack, i.e. from other i/f,
652         * then copy the Ethernet header and cb to the
653         * first fragment, for later use by NAT/QoS.
654         */
655        if (!skb->sk && first_frag)
656        {
657            first_frag = 0;
658            skb2->mac_header = (unsigned char *)(skb2->data - sizeof(struct ethhdr));
659            memcpy(skb2->mac_header, skb->mac_header, sizeof(struct ethhdr));
660            memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
661        }
662        /* Foxconn added end pling 04/29/2010 */
663
664		/* ANK: dirty, but effective trick. Upgrade options only if
665		 * the segment to be fragmented was THE FIRST (otherwise,
666		 * options are already fixed) and make it ONCE
667		 * on the initial skb, so that all the following fragments
668		 * will inherit fixed options.
669		 */
670		if (offset == 0)
671			ip_options_fragment(skb);
672
673		/*
674		 *	Added AC : If we are fragmenting a fragment that's not the
675		 *		   last fragment then keep MF on each bit
676		 */
677		if (left > 0 || not_last_frag)
678			iph->frag_off |= htons(IP_MF);
679		ptr += len;
680		offset += len;
681
682		/*
683		 *	Put this fragment into the sending queue.
684		 */
685		iph->tot_len = htons(len + hlen);
686
687		ip_send_check(iph);
688
689		err = output(skb2);
690		if (err)
691			goto fail;
692
693		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
694	}
695	kfree_skb(skb);
696	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
697	return err;
698
699fail:
700	kfree_skb(skb);
701	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
702	return err;
703}
704
705EXPORT_SYMBOL(ip_fragment);
706
707int
708ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
709{
710	struct iovec *iov = from;
711
712	if (skb->ip_summed == CHECKSUM_PARTIAL) {
713		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
714			return -EFAULT;
715	} else {
716		__wsum csum = 0;
717		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
718			return -EFAULT;
719		skb->csum = csum_block_add(skb->csum, csum, odd);
720	}
721	return 0;
722}
723
724static inline __wsum
725csum_page(struct page *page, int offset, int copy)
726{
727	char *kaddr;
728	__wsum csum;
729	kaddr = kmap(page);
730	csum = csum_partial(kaddr + offset, copy, 0);
731	kunmap(page);
732	return csum;
733}
734
735static inline int ip_ufo_append_data(struct sock *sk,
736			int getfrag(void *from, char *to, int offset, int len,
737			       int odd, struct sk_buff *skb),
738			void *from, int length, int hh_len, int fragheaderlen,
739			int transhdrlen, int mtu,unsigned int flags)
740{
741	struct sk_buff *skb;
742	int err;
743
744	/* There is support for UDP fragmentation offload by network
745	 * device, so create one single skb packet containing complete
746	 * udp datagram
747	 */
748	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
749		skb = sock_alloc_send_skb(sk,
750			hh_len + fragheaderlen + transhdrlen + 20,
751			(flags & MSG_DONTWAIT), &err);
752
753		if (skb == NULL)
754			return err;
755
756		/* reserve space for Hardware header */
757		skb_reserve(skb, hh_len);
758
759		/* create space for UDP/IP header */
760		skb_put(skb,fragheaderlen + transhdrlen);
761
762		/* initialize network header pointer */
763		skb_reset_network_header(skb);
764
765		/* initialize protocol header pointer */
766		skb->transport_header = skb->network_header + fragheaderlen;
767
768		skb->ip_summed = CHECKSUM_PARTIAL;
769		skb->csum = 0;
770		sk->sk_sndmsg_off = 0;
771	}
772
773	err = skb_append_datato_frags(sk,skb, getfrag, from,
774			       (length - transhdrlen));
775	if (!err) {
776		/* specify the length of each IP datagram fragment*/
777		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
778		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
779		__skb_queue_tail(&sk->sk_write_queue, skb);
780
781		return 0;
782	}
783	/* There is not enough support do UFO ,
784	 * so follow normal path
785	 */
786	kfree_skb(skb);
787	return err;
788}
789
790/*
791 *	ip_append_data() and ip_append_page() can make one large IP datagram
792 *	from many pieces of data. Each pieces will be holded on the socket
793 *	until ip_push_pending_frames() is called. Each piece can be a page
794 *	or non-page data.
795 *
796 *	Not only UDP, other transport protocols - e.g. raw sockets - can use
797 *	this interface potentially.
798 *
799 *	LATER: length must be adjusted by pad at tail, when it is required.
800 */
801int ip_append_data(struct sock *sk,
802		   int getfrag(void *from, char *to, int offset, int len,
803			       int odd, struct sk_buff *skb),
804		   void *from, int length, int transhdrlen,
805		   struct ipcm_cookie *ipc, struct rtable *rt,
806		   unsigned int flags)
807{
808	struct inet_sock *inet = inet_sk(sk);
809	struct sk_buff *skb;
810
811	struct ip_options *opt = NULL;
812	int hh_len;
813	int exthdrlen;
814	int mtu;
815	int copy;
816	int err;
817	int offset = 0;
818	unsigned int maxfraglen, fragheaderlen;
819	int csummode = CHECKSUM_NONE;
820
821	if (flags&MSG_PROBE)
822		return 0;
823
824	if (skb_queue_empty(&sk->sk_write_queue)) {
825		/*
826		 * setup for corking.
827		 */
828		opt = ipc->opt;
829		if (opt) {
830			if (inet->cork.opt == NULL) {
831				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
832				if (unlikely(inet->cork.opt == NULL))
833					return -ENOBUFS;
834			}
835			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
836			inet->cork.flags |= IPCORK_OPT;
837			inet->cork.addr = ipc->addr;
838		}
839		dst_hold(&rt->u.dst);
840		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
841					    rt->u.dst.dev->mtu :
842					    dst_mtu(rt->u.dst.path);
843		inet->cork.rt = rt;
844		inet->cork.length = 0;
845		sk->sk_sndmsg_page = NULL;
846		sk->sk_sndmsg_off = 0;
847		if ((exthdrlen = rt->u.dst.header_len) != 0) {
848			length += exthdrlen;
849			transhdrlen += exthdrlen;
850		}
851	} else {
852		rt = inet->cork.rt;
853		if (inet->cork.flags & IPCORK_OPT)
854			opt = inet->cork.opt;
855
856		transhdrlen = 0;
857		exthdrlen = 0;
858		mtu = inet->cork.fragsize;
859	}
860	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
861
862	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
863	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
864
865	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
866		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
867		return -EMSGSIZE;
868	}
869
870	/*
871	 * transhdrlen > 0 means that this is the first fragment and we wish
872	 * it won't be fragmented in the future.
873	 */
874	if (transhdrlen &&
875	    length + fragheaderlen <= mtu &&
876	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
877	    !exthdrlen)
878		csummode = CHECKSUM_PARTIAL;
879
880	inet->cork.length += length;
881	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
882			(rt->u.dst.dev->features & NETIF_F_UFO)) {
883
884		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
885					 fragheaderlen, transhdrlen, mtu,
886					 flags);
887		if (err)
888			goto error;
889		return 0;
890	}
891
892	/* So, what's going on in the loop below?
893	 *
894	 * We use calculated fragment length to generate chained skb,
895	 * each of segments is IP fragment ready for sending to network after
896	 * adding appropriate IP header.
897	 */
898
899	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
900		goto alloc_new_skb;
901
902	while (length > 0) {
903		/* Check if the remaining data fits into current packet. */
904		copy = mtu - skb->len;
905		if (copy < length)
906			copy = maxfraglen - skb->len;
907		if (copy <= 0) {
908			char *data;
909			unsigned int datalen;
910			unsigned int fraglen;
911			unsigned int fraggap;
912			unsigned int alloclen;
913			struct sk_buff *skb_prev;
914alloc_new_skb:
915			skb_prev = skb;
916			if (skb_prev)
917				fraggap = skb_prev->len - maxfraglen;
918			else
919				fraggap = 0;
920
921			/*
922			 * If remaining data exceeds the mtu,
923			 * we know we need more fragment(s).
924			 */
925			datalen = length + fraggap;
926			if (datalen > mtu - fragheaderlen)
927				datalen = maxfraglen - fragheaderlen;
928			fraglen = datalen + fragheaderlen;
929
930			if ((flags & MSG_MORE) &&
931			    !(rt->u.dst.dev->features&NETIF_F_SG))
932				alloclen = mtu;
933			else
934				alloclen = datalen + fragheaderlen;
935
936			/* The last fragment gets additional space at tail.
937			 * Note, with MSG_MORE we overallocate on fragments,
938			 * because we have no idea what fragment will be
939			 * the last.
940			 */
941			if (datalen == length + fraggap)
942				alloclen += rt->u.dst.trailer_len;
943
944			if (transhdrlen) {
945				skb = sock_alloc_send_skb(sk,
946						alloclen + hh_len + 15,
947						(flags & MSG_DONTWAIT), &err);
948			} else {
949				skb = NULL;
950				if (atomic_read(&sk->sk_wmem_alloc) <=
951				    2 * sk->sk_sndbuf)
952					skb = sock_wmalloc(sk,
953							   alloclen + hh_len + 15, 1,
954							   sk->sk_allocation);
955				if (unlikely(skb == NULL))
956					err = -ENOBUFS;
957			}
958			if (skb == NULL)
959				goto error;
960
961			/*
962			 *	Fill in the control structures
963			 */
964			skb->ip_summed = csummode;
965			skb->csum = 0;
966			skb_reserve(skb, hh_len);
967
968			/*
969			 *	Find where to start putting bytes.
970			 */
971			data = skb_put(skb, fraglen);
972			skb_set_network_header(skb, exthdrlen);
973			skb->transport_header = (skb->network_header +
974						 fragheaderlen);
975			data += fragheaderlen;
976
977			if (fraggap) {
978				skb->csum = skb_copy_and_csum_bits(
979					skb_prev, maxfraglen,
980					data + transhdrlen, fraggap, 0);
981				skb_prev->csum = csum_sub(skb_prev->csum,
982							  skb->csum);
983				data += fraggap;
984				pskb_trim_unique(skb_prev, maxfraglen);
985			}
986
987			copy = datalen - transhdrlen - fraggap;
988			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
989				err = -EFAULT;
990				kfree_skb(skb);
991				goto error;
992			}
993
994			offset += copy;
995			length -= datalen - fraggap;
996			transhdrlen = 0;
997			exthdrlen = 0;
998			csummode = CHECKSUM_NONE;
999
1000			/*
1001			 * Put the packet on the pending queue.
1002			 */
1003			__skb_queue_tail(&sk->sk_write_queue, skb);
1004			continue;
1005		}
1006
1007		if (copy > length)
1008			copy = length;
1009
1010		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1011			unsigned int off;
1012
1013			off = skb->len;
1014			if (getfrag(from, skb_put(skb, copy),
1015					offset, copy, off, skb) < 0) {
1016				__skb_trim(skb, off);
1017				err = -EFAULT;
1018				goto error;
1019			}
1020		} else {
1021			int i = skb_shinfo(skb)->nr_frags;
1022			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1023			struct page *page = sk->sk_sndmsg_page;
1024			int off = sk->sk_sndmsg_off;
1025			unsigned int left;
1026
1027			if (page && (left = PAGE_SIZE - off) > 0) {
1028				if (copy >= left)
1029					copy = left;
1030				if (page != frag->page) {
1031					if (i == MAX_SKB_FRAGS) {
1032						err = -EMSGSIZE;
1033						goto error;
1034					}
1035					get_page(page);
1036					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1037					frag = &skb_shinfo(skb)->frags[i];
1038				}
1039			} else if (i < MAX_SKB_FRAGS) {
1040				if (copy > PAGE_SIZE)
1041					copy = PAGE_SIZE;
1042				page = alloc_pages(sk->sk_allocation, 0);
1043				if (page == NULL)  {
1044					err = -ENOMEM;
1045					goto error;
1046				}
1047				sk->sk_sndmsg_page = page;
1048				sk->sk_sndmsg_off = 0;
1049
1050				skb_fill_page_desc(skb, i, page, 0, 0);
1051				frag = &skb_shinfo(skb)->frags[i];
1052				skb->truesize += PAGE_SIZE;
1053				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1054			} else {
1055				err = -EMSGSIZE;
1056				goto error;
1057			}
1058			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1059				err = -EFAULT;
1060				goto error;
1061			}
1062			sk->sk_sndmsg_off += copy;
1063			frag->size += copy;
1064			skb->len += copy;
1065			skb->data_len += copy;
1066		}
1067		offset += copy;
1068		length -= copy;
1069	}
1070
1071	return 0;
1072
1073error:
1074	inet->cork.length -= length;
1075	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1076	return err;
1077}
1078
1079ssize_t	ip_append_page(struct sock *sk, struct page *page,
1080		       int offset, size_t size, int flags)
1081{
1082	struct inet_sock *inet = inet_sk(sk);
1083	struct sk_buff *skb;
1084	struct rtable *rt;
1085	struct ip_options *opt = NULL;
1086	int hh_len;
1087	int mtu;
1088	int len;
1089	int err;
1090	unsigned int maxfraglen, fragheaderlen, fraggap;
1091
1092	if (inet->hdrincl)
1093		return -EPERM;
1094
1095	if (flags&MSG_PROBE)
1096		return 0;
1097
1098	if (skb_queue_empty(&sk->sk_write_queue))
1099		return -EINVAL;
1100
1101	rt = inet->cork.rt;
1102	if (inet->cork.flags & IPCORK_OPT)
1103		opt = inet->cork.opt;
1104
1105	if (!(rt->u.dst.dev->features&NETIF_F_SG))
1106		return -EOPNOTSUPP;
1107
1108	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1109	mtu = inet->cork.fragsize;
1110
1111	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1112	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1113
1114	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1115		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1116		return -EMSGSIZE;
1117	}
1118
1119	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1120		return -EINVAL;
1121
1122	inet->cork.length += size;
1123	if ((sk->sk_protocol == IPPROTO_UDP) &&
1124	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1125		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1126		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1127	}
1128
1129
1130	while (size > 0) {
1131		int i;
1132
1133		if (skb_is_gso(skb))
1134			len = size;
1135		else {
1136
1137			/* Check if the remaining data fits into current packet. */
1138			len = mtu - skb->len;
1139			if (len < size)
1140				len = maxfraglen - skb->len;
1141		}
1142		if (len <= 0) {
1143			struct sk_buff *skb_prev;
1144			int alloclen;
1145
1146			skb_prev = skb;
1147			fraggap = skb_prev->len - maxfraglen;
1148
1149			alloclen = fragheaderlen + hh_len + fraggap + 15;
1150			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1151			if (unlikely(!skb)) {
1152				err = -ENOBUFS;
1153				goto error;
1154			}
1155
1156			/*
1157			 *	Fill in the control structures
1158			 */
1159			skb->ip_summed = CHECKSUM_NONE;
1160			skb->csum = 0;
1161			skb_reserve(skb, hh_len);
1162
1163			/*
1164			 *	Find where to start putting bytes.
1165			 */
1166			skb_put(skb, fragheaderlen + fraggap);
1167			skb_reset_network_header(skb);
1168			skb->transport_header = (skb->network_header +
1169						 fragheaderlen);
1170			if (fraggap) {
1171				skb->csum = skb_copy_and_csum_bits(skb_prev,
1172								   maxfraglen,
1173						    skb_transport_header(skb),
1174								   fraggap, 0);
1175				skb_prev->csum = csum_sub(skb_prev->csum,
1176							  skb->csum);
1177				pskb_trim_unique(skb_prev, maxfraglen);
1178			}
1179
1180			/*
1181			 * Put the packet on the pending queue.
1182			 */
1183			__skb_queue_tail(&sk->sk_write_queue, skb);
1184			continue;
1185		}
1186
1187		i = skb_shinfo(skb)->nr_frags;
1188		if (len > size)
1189			len = size;
1190		if (skb_can_coalesce(skb, i, page, offset)) {
1191			skb_shinfo(skb)->frags[i-1].size += len;
1192		} else if (i < MAX_SKB_FRAGS) {
1193			get_page(page);
1194			skb_fill_page_desc(skb, i, page, offset, len);
1195		} else {
1196			err = -EMSGSIZE;
1197			goto error;
1198		}
1199
1200		if (skb->ip_summed == CHECKSUM_NONE) {
1201			__wsum csum;
1202			csum = csum_page(page, offset, len);
1203			skb->csum = csum_block_add(skb->csum, csum, skb->len);
1204		}
1205
1206		skb->len += len;
1207		skb->data_len += len;
1208		offset += len;
1209		size -= len;
1210	}
1211	return 0;
1212
1213error:
1214	inet->cork.length -= size;
1215	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1216	return err;
1217}
1218
1219/*
1220 *	Combined all pending IP fragments on the socket as one IP datagram
1221 *	and push them out.
1222 */
1223int ip_push_pending_frames(struct sock *sk)
1224{
1225	struct sk_buff *skb, *tmp_skb;
1226	struct sk_buff **tail_skb;
1227	struct inet_sock *inet = inet_sk(sk);
1228	struct ip_options *opt = NULL;
1229	struct rtable *rt = inet->cork.rt;
1230	struct iphdr *iph;
1231	__be16 df = 0;
1232	__u8 ttl;
1233	int err = 0;
1234
1235	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1236		goto out;
1237	tail_skb = &(skb_shinfo(skb)->frag_list);
1238
1239	/* move skb->data to ip header from ext header */
1240	if (skb->data < skb_network_header(skb))
1241		__skb_pull(skb, skb_network_offset(skb));
1242	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1243		__skb_pull(tmp_skb, skb_network_header_len(skb));
1244		*tail_skb = tmp_skb;
1245		tail_skb = &(tmp_skb->next);
1246		skb->len += tmp_skb->len;
1247		skb->data_len += tmp_skb->len;
1248		skb->truesize += tmp_skb->truesize;
1249		__sock_put(tmp_skb->sk);
1250		tmp_skb->destructor = NULL;
1251		tmp_skb->sk = NULL;
1252	}
1253
1254	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1255	 * to fragment the frame generated here. No matter, what transforms
1256	 * how transforms change size of the packet, it will come out.
1257	 */
1258	if (inet->pmtudisc < IP_PMTUDISC_DO)
1259		skb->local_df = 1;
1260
1261	/* DF bit is set when we want to see DF on outgoing frames.
1262	 * If local_df is set too, we still allow to fragment this frame
1263	 * locally. */
1264	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1265	    (skb->len <= dst_mtu(&rt->u.dst) &&
1266	     ip_dont_fragment(sk, &rt->u.dst)))
1267		df = htons(IP_DF);
1268
1269	if (inet->cork.flags & IPCORK_OPT)
1270		opt = inet->cork.opt;
1271
1272	if (rt->rt_type == RTN_MULTICAST)
1273		ttl = inet->mc_ttl;
1274	else
1275		ttl = ip_select_ttl(inet, &rt->u.dst);
1276
1277	iph = (struct iphdr *)skb->data;
1278	iph->version = 4;
1279	iph->ihl = 5;
1280	if (opt) {
1281		iph->ihl += opt->optlen>>2;
1282		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1283	}
1284	iph->tos = inet->tos;
1285	iph->tot_len = htons(skb->len);
1286	iph->frag_off = df;
1287	ip_select_ident(iph, &rt->u.dst, sk);
1288	iph->ttl = ttl;
1289	iph->protocol = sk->sk_protocol;
1290	iph->saddr = rt->rt_src;
1291	iph->daddr = rt->rt_dst;
1292	ip_send_check(iph);
1293
1294	skb->priority = sk->sk_priority;
1295	skb->dst = dst_clone(&rt->u.dst);
1296
1297	/* Netfilter gets whole the not fragmented skb. */
1298	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1299		      skb->dst->dev, dst_output);
1300	if (err) {
1301		if (err > 0)
1302			err = inet->recverr ? net_xmit_errno(err) : 0;
1303		if (err)
1304			goto error;
1305	}
1306
1307out:
1308	inet->cork.flags &= ~IPCORK_OPT;
1309	kfree(inet->cork.opt);
1310	inet->cork.opt = NULL;
1311	if (inet->cork.rt) {
1312		ip_rt_put(inet->cork.rt);
1313		inet->cork.rt = NULL;
1314	}
1315	return err;
1316
1317error:
1318	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1319	goto out;
1320}
1321
1322/*
1323 *	Throw away all pending data on the socket.
1324 */
1325void ip_flush_pending_frames(struct sock *sk)
1326{
1327	struct inet_sock *inet = inet_sk(sk);
1328	struct sk_buff *skb;
1329
1330	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1331		kfree_skb(skb);
1332
1333	inet->cork.flags &= ~IPCORK_OPT;
1334	kfree(inet->cork.opt);
1335	inet->cork.opt = NULL;
1336	if (inet->cork.rt) {
1337		ip_rt_put(inet->cork.rt);
1338		inet->cork.rt = NULL;
1339	}
1340}
1341
1342
1343/*
1344 *	Fetch data from kernel space and fill in checksum if needed.
1345 */
1346static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1347			      int len, int odd, struct sk_buff *skb)
1348{
1349	__wsum csum;
1350
1351	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1352	skb->csum = csum_block_add(skb->csum, csum, odd);
1353	return 0;
1354}
1355
1356/*
1357 *	Generic function to send a packet as reply to another packet.
1358 *	Used to send TCP resets so far. ICMP should use this function too.
1359 *
1360 *	Should run single threaded per socket because it uses the sock
1361 *     	structure to pass arguments.
1362 *
1363 *	LATER: switch from ip_build_xmit to ip_append_*
1364 */
1365void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1366		   unsigned int len)
1367{
1368	struct inet_sock *inet = inet_sk(sk);
1369	struct {
1370		struct ip_options	opt;
1371		char			data[40];
1372	} replyopts;
1373	struct ipcm_cookie ipc;
1374	__be32 daddr;
1375	struct rtable *rt = (struct rtable*)skb->dst;
1376
1377	if (ip_options_echo(&replyopts.opt, skb))
1378		return;
1379
1380	daddr = ipc.addr = rt->rt_src;
1381	ipc.opt = NULL;
1382
1383	if (replyopts.opt.optlen) {
1384		ipc.opt = &replyopts.opt;
1385
1386		if (ipc.opt->srr)
1387			daddr = replyopts.opt.faddr;
1388	}
1389
1390	{
1391		struct flowi fl = { .oif = arg->bound_dev_if,
1392				    .nl_u = { .ip4_u =
1393					      { .daddr = daddr,
1394						.saddr = rt->rt_spec_dst,
1395						.tos = RT_TOS(ip_hdr(skb)->tos) } },
1396				    /* Not quite clean, but right. */
1397				    .uli_u = { .ports =
1398					       { .sport = tcp_hdr(skb)->dest,
1399						 .dport = tcp_hdr(skb)->source } },
1400				    .proto = sk->sk_protocol };
1401		security_skb_classify_flow(skb, &fl);
1402		if (ip_route_output_key(&rt, &fl))
1403			return;
1404	}
1405
1406	/* And let IP do all the hard work.
1407
1408	   This chunk is not reenterable, hence spinlock.
1409	   Note that it uses the fact, that this function is called
1410	   with locally disabled BH and that sk cannot be already spinlocked.
1411	 */
1412	bh_lock_sock(sk);
1413	inet->tos = ip_hdr(skb)->tos;
1414	sk->sk_priority = skb->priority;
1415	sk->sk_protocol = ip_hdr(skb)->protocol;
1416	sk->sk_bound_dev_if = arg->bound_dev_if;
1417	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1418		       &ipc, rt, MSG_DONTWAIT);
1419	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1420		if (arg->csumoffset >= 0)
1421			*((__sum16 *)skb_transport_header(skb) +
1422			  arg->csumoffset) = csum_fold(csum_add(skb->csum,
1423								arg->csum));
1424		skb->ip_summed = CHECKSUM_NONE;
1425		ip_push_pending_frames(sk);
1426	}
1427
1428	bh_unlock_sock(sk);
1429
1430	ip_rt_put(rt);
1431}
1432
1433void __init ip_init(void)
1434{
1435	ip_rt_init();
1436	inet_initpeers();
1437
1438#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1439	igmp_mc_proc_init();
1440#endif
1441}
1442
1443EXPORT_SYMBOL(ip_generic_getfrag);
1444EXPORT_SYMBOL(ip_queue_xmit);
1445EXPORT_SYMBOL(ip_send_check);
1446
1447/* Fxcn port-S Wins, 0714-09 */
1448//Foxconn add start, Lewis Min, for OpenDNS, 12/12/2008
1449void insert_func_to_BR_POST_ROUTE(void *FUNC)
1450{
1451   br_post_insert_hook= FUNC;
1452}
1453
1454
1455void remove_func_from_BR_POST_ROUTE(void)
1456{
1457   br_post_insert_hook= NULL;
1458}
1459//Foxconn add end, Lewis Min, for OpenDNS, 12/12/2008
1460/* Fxcn port-E Wins, 0714-09 */
1461