1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		The Internet Protocol (IP) output module.
7 *
8 * Authors:	Ross Biro
9 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *		Donald Becker, <becker@super.org>
11 *		Alan Cox, <Alan.Cox@linux.org>
12 *		Richard Underwood
13 *		Stefan Becker, <stefanb@yello.ping.de>
14 *		Jorge Cwik, <jorge@laser.satlink.net>
15 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
16 *		Hirokazu Takahashi, <taka@valinux.co.jp>
17 *
18 *	See ip_input.c for original log
19 *
20 *	Fixes:
21 *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
22 *		Mike Kilburn	:	htons() missing in ip_build_xmit.
23 *		Bradford Johnson:	Fix faulty handling of some frames when
24 *					no route is found.
25 *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
26 *					(in case if packet not accepted by
27 *					output firewall rules)
28 *		Mike McLagan	:	Routing by source
29 *		Alexey Kuznetsov:	use new route cache
30 *		Andi Kleen:		Fix broken PMTU recovery and remove
31 *					some redundant tests.
32 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
33 *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
34 *		Andi Kleen	:	Split fast and slow ip_build_xmit path
35 *					for decreased register pressure on x86
36 *					and more readibility.
37 *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
38 *					silently drop skb instead of failing with -EPERM.
39 *		Detlev Wengorz	:	Copy protocol for fragments.
40 *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
41 *					datagrams.
42 *		Hirokazu Takahashi:	sendfile() on UDP works now.
43 */
44
45#include <asm/uaccess.h>
46#include <asm/system.h>
47#include <linux/module.h>
48#include <linux/types.h>
49#include <linux/kernel.h>
50#include <linux/mm.h>
51#include <linux/string.h>
52#include <linux/errno.h>
53#include <linux/highmem.h>
54#include <linux/slab.h>
55
56#include <linux/socket.h>
57#include <linux/sockios.h>
58#include <linux/in.h>
59#include <linux/inet.h>
60#include <linux/netdevice.h>
61#include <linux/etherdevice.h>
62#include <linux/proc_fs.h>
63#include <linux/stat.h>
64#include <linux/init.h>
65
66#include <net/snmp.h>
67#include <net/ip.h>
68#include <net/protocol.h>
69#include <net/route.h>
70#include <net/xfrm.h>
71#include <linux/skbuff.h>
72#include <net/sock.h>
73#include <net/arp.h>
74#include <net/icmp.h>
75#include <net/checksum.h>
76#include <net/inetpeer.h>
77#include <linux/igmp.h>
78#include <linux/netfilter_ipv4.h>
79#include <linux/netfilter_bridge.h>
80#include <linux/mroute.h>
81#include <linux/netlink.h>
82#include <linux/tcp.h>
83
84#include <typedefs.h>
85#include <bcmdefs.h>
86
87int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
88
89/* Generate a checksum for an outgoing IP datagram. */
90__inline__ void ip_send_check(struct iphdr *iph)
91{
92	iph->check = 0;
93	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
94}
95EXPORT_SYMBOL(ip_send_check);
96
97int __ip_local_out(struct sk_buff *skb)
98{
99	struct iphdr *iph = ip_hdr(skb);
100
101	iph->tot_len = htons(skb->len);
102	ip_send_check(iph);
103
104	/* Mark skb to identify SMB data packet */
105	if ((ip_hdr(skb)->protocol == IPPROTO_TCP) && tcp_hdr(skb))
106		skb->tcpf_smb = (tcp_hdr(skb)->source == htons(0x01bd));
107
108	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
109		       skb_dst(skb)->dev, dst_output);
110}
111
112int ip_local_out(struct sk_buff *skb)
113{
114	int err;
115
116	err = __ip_local_out(skb);
117	if (likely(err == 1))
118		err = dst_output(skb);
119
120	return err;
121}
122EXPORT_SYMBOL_GPL(ip_local_out);
123
124/* dev_loopback_xmit for use with netfilter. */
125static int ip_dev_loopback_xmit(struct sk_buff *newskb)
126{
127	skb_reset_mac_header(newskb);
128	__skb_pull(newskb, skb_network_offset(newskb));
129	newskb->pkt_type = PACKET_LOOPBACK;
130	newskb->ip_summed = CHECKSUM_UNNECESSARY;
131	WARN_ON(!skb_dst(newskb));
132	netif_rx_ni(newskb);
133	return 0;
134}
135
136static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
137{
138	int ttl = inet->uc_ttl;
139
140	if (ttl < 0)
141		ttl = dst_metric(dst, RTAX_HOPLIMIT);
142	return ttl;
143}
144
145/*
146 *		Add an ip header to a skbuff and send it out.
147 *
148 */
149int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
150			  __be32 saddr, __be32 daddr, struct ip_options *opt)
151{
152	struct inet_sock *inet = inet_sk(sk);
153	struct rtable *rt = skb_rtable(skb);
154	struct iphdr *iph;
155
156	/* Build the IP header. */
157	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
158	skb_reset_network_header(skb);
159	iph = ip_hdr(skb);
160	iph->version  = 4;
161	iph->ihl      = 5;
162	iph->tos      = inet->tos;
163	if (ip_dont_fragment(sk, &rt->dst))
164		iph->frag_off = htons(IP_DF);
165	else
166		iph->frag_off = 0;
167	iph->ttl      = ip_select_ttl(inet, &rt->dst);
168	iph->daddr    = rt->rt_dst;
169	iph->saddr    = rt->rt_src;
170	iph->protocol = sk->sk_protocol;
171	ip_select_ident(iph, &rt->dst, sk);
172
173	if (opt && opt->optlen) {
174		iph->ihl += opt->optlen>>2;
175		ip_options_build(skb, opt, daddr, rt, 0);
176	}
177
178	skb->priority = sk->sk_priority;
179	skb->mark = sk->sk_mark;
180
181	/* Send it out. */
182	return ip_local_out(skb);
183}
184EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
185
186static inline int ip_finish_output2(struct sk_buff *skb)
187{
188	struct dst_entry *dst = skb_dst(skb);
189	struct rtable *rt = (struct rtable *)dst;
190	struct net_device *dev = dst->dev;
191	unsigned int hh_len = LL_RESERVED_SPACE(dev);
192
193	if (rt->rt_type == RTN_MULTICAST) {
194		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
195	} else if (rt->rt_type == RTN_BROADCAST)
196		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
197
198	/* Be paranoid, rather than too clever. */
199	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
200		struct sk_buff *skb2;
201
202		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
203		if (skb2 == NULL) {
204			kfree_skb(skb);
205			return -ENOMEM;
206		}
207		if (skb->sk)
208			skb_set_owner_w(skb2, skb->sk);
209		kfree_skb(skb);
210		skb = skb2;
211	}
212
213	if (dst->hh)
214		return neigh_hh_output(dst->hh, skb);
215	else if (dst->neighbour)
216		return dst->neighbour->output(skb);
217
218	if (net_ratelimit())
219		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
220	kfree_skb(skb);
221	return -EINVAL;
222}
223
224static inline int ip_skb_dst_mtu(struct sk_buff *skb)
225{
226	struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
227
228	return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
229	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
230}
231
232/* Fxcn port-S Wins, 0714-09 */
233int (*br_post_insert_hook)(struct sk_buff *skb);//Foxconn add , Lewis Min, for OpenDNS, 03/12/2009
234/* Fxcn port-E Wins, 0714-09 */
235static int ip_finish_output(struct sk_buff *skb)
236{
237#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
238	/* Policy lookup after SNAT yielded a new policy */
239	if (skb_dst(skb)->xfrm != NULL) {
240		IPCB(skb)->flags |= IPSKB_REROUTED;
241		return dst_output(skb);
242	}
243#endif
244
245/* Fxcn port-S Wins, 0714-09 */
246    //Foxconn add start, Lewis Min, for OpenDNS, 03/12/2009
247	if(NULL!=br_post_insert_hook)
248	{
249        int ret;
250
251		ret = br_post_insert_hook(skb);
252		if((ret==NF_DROP)||(ret==NF_STOLEN))
253		{
254//			read_unlock(&br->lock);
255//            spin_unlock_bh(&br->lock);
256			return;
257		}
258	}
259    //Foxconn add end, Lewis Min, for OpenDNS, 03/12/2009
260/* Fxcn port-E Wins, 0714-09 */
261
262	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
263		return ip_fragment(skb, ip_finish_output2);
264	else
265		return ip_finish_output2(skb);
266}
267
268int ip_mc_output(struct sk_buff *skb)
269{
270	struct sock *sk = skb->sk;
271	struct rtable *rt = skb_rtable(skb);
272	struct net_device *dev = rt->dst.dev;
273
274	/*
275	 *	If the indicated interface is up and running, send the packet.
276	 */
277	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
278
279	skb->dev = dev;
280	skb->protocol = htons(ETH_P_IP);
281
282	/*
283	 *	Multicasts are looped back for other local users
284	 */
285
286	if (rt->rt_flags&RTCF_MULTICAST) {
287		if (sk_mc_loop(sk)
288#ifdef CONFIG_IP_MROUTE
289		/* Small optimization: do not loopback not local frames,
290		   which returned after forwarding; they will be  dropped
291		   by ip_mr_input in any case.
292		   Note, that local frames are looped back to be delivered
293		   to local recipients.
294
295		   This check is duplicated in ip_mr_input at the moment.
296		 */
297		    &&
298		    ((rt->rt_flags & RTCF_LOCAL) ||
299		     !(IPCB(skb)->flags & IPSKB_FORWARDED))
300#endif
301		   ) {
302			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
303			if (newskb)
304				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
305					newskb, NULL, newskb->dev,
306					ip_dev_loopback_xmit);
307		}
308
309		/* Multicasts with ttl 0 must not go beyond the host */
310
311		if (ip_hdr(skb)->ttl == 0) {
312			kfree_skb(skb);
313			return 0;
314		}
315	}
316
317	if (rt->rt_flags&RTCF_BROADCAST) {
318		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
319		if (newskb)
320			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
321				NULL, newskb->dev, ip_dev_loopback_xmit);
322	}
323
324	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
325			    skb->dev, ip_finish_output,
326			    !(IPCB(skb)->flags & IPSKB_REROUTED));
327}
328
329int ip_output(struct sk_buff *skb)
330{
331	struct net_device *dev = skb_dst(skb)->dev;
332
333	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
334
335	skb->dev = dev;
336	skb->protocol = htons(ETH_P_IP);
337
338	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
339			    ip_finish_output,
340			    !(IPCB(skb)->flags & IPSKB_REROUTED));
341}
342
343int BCMFASTPATH_HOST ip_queue_xmit(struct sk_buff *skb)
344{
345	struct sock *sk = skb->sk;
346	struct inet_sock *inet = inet_sk(sk);
347	struct ip_options *opt = inet->opt;
348	struct rtable *rt;
349	struct iphdr *iph;
350	int res;
351
352	/* Skip all of this if the packet is already routed,
353	 * f.e. by something like SCTP.
354	 */
355	rcu_read_lock();
356	rt = skb_rtable(skb);
357	if (rt != NULL)
358		goto packet_routed;
359
360	/* Make sure we can route this packet. */
361	rt = (struct rtable *)__sk_dst_check(sk, 0);
362	if (rt == NULL) {
363		__be32 daddr;
364
365		/* Use correct destination address if we have options. */
366		daddr = inet->inet_daddr;
367		if(opt && opt->srr)
368			daddr = opt->faddr;
369
370		{
371			struct flowi fl = { .oif = sk->sk_bound_dev_if,
372					    .mark = sk->sk_mark,
373					    .nl_u = { .ip4_u =
374						      { .daddr = daddr,
375							.saddr = inet->inet_saddr,
376							.tos = RT_CONN_FLAGS(sk) } },
377					    .proto = sk->sk_protocol,
378					    .flags = inet_sk_flowi_flags(sk),
379					    .uli_u = { .ports =
380						       { .sport = inet->inet_sport,
381							 .dport = inet->inet_dport } } };
382
383			/* If this fails, retransmit mechanism of transport layer will
384			 * keep trying until route appears or the connection times
385			 * itself out.
386			 */
387			security_sk_classify_flow(sk, &fl);
388			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
389				goto no_route;
390		}
391		sk_setup_caps(sk, &rt->dst);
392	}
393	skb_dst_set_noref(skb, &rt->dst);
394
395packet_routed:
396	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
397		goto no_route;
398
399	/* OK, we know where to send it, allocate and build IP header. */
400	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
401	skb_reset_network_header(skb);
402	iph = ip_hdr(skb);
403	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
404	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
405		iph->frag_off = htons(IP_DF);
406	else
407		iph->frag_off = 0;
408	iph->ttl      = ip_select_ttl(inet, &rt->dst);
409	iph->protocol = sk->sk_protocol;
410	iph->saddr    = rt->rt_src;
411	iph->daddr    = rt->rt_dst;
412	/* Transport layer set skb->h.foo itself. */
413
414	if (opt && opt->optlen) {
415		iph->ihl += opt->optlen >> 2;
416		ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
417	}
418
419	ip_select_ident_more(iph, &rt->dst, sk,
420			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
421
422	skb->priority = sk->sk_priority;
423	skb->mark = sk->sk_mark;
424
425	res = ip_local_out(skb);
426	rcu_read_unlock();
427	return res;
428
429no_route:
430	rcu_read_unlock();
431	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
432	kfree_skb(skb);
433	return -EHOSTUNREACH;
434}
435EXPORT_SYMBOL(ip_queue_xmit);
436
437
438static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
439{
440	to->pkt_type = from->pkt_type;
441	to->priority = from->priority;
442	to->protocol = from->protocol;
443	skb_dst_drop(to);
444	skb_dst_copy(to, from);
445	to->dev = from->dev;
446	to->mark = from->mark;
447
448	/* Copy the flags to each fragment. */
449	IPCB(to)->flags = IPCB(from)->flags;
450
451#ifdef CONFIG_NET_SCHED
452	to->tc_index = from->tc_index;
453#endif
454	nf_copy(to, from);
455#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
456	defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
457	to->nf_trace = from->nf_trace;
458#endif
459#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
460	to->ipvs_property = from->ipvs_property;
461#endif
462	skb_copy_secmark(to, from);
463}
464
465/*
466 *	This IP datagram is too large to be sent in one piece.  Break it up into
467 *	smaller pieces (each of size equal to IP header plus
468 *	a block of the data of the original IP data part) that will yet fit in a
469 *	single device frame, and queue such a frame for sending.
470 */
471
472int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
473{
474	struct iphdr *iph;
475	int ptr;
476	struct net_device *dev;
477	struct sk_buff *skb2;
478	unsigned int mtu, hlen, left, len, ll_rs;
479	int offset;
480	__be16 not_last_frag;
481	struct rtable *rt = skb_rtable(skb);
482	int err = 0;
483    int first_frag = 1;     // Foxconn added pling 04/29/2010
484
485	dev = rt->dst.dev;
486
487	/*
488	 *	Point into the IP datagram header.
489	 */
490
491	iph = ip_hdr(skb);
492
493	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
494		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
495		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
496			  htonl(ip_skb_dst_mtu(skb)));
497		kfree_skb(skb);
498		return -EMSGSIZE;
499	}
500
501	/*
502	 *	Setup starting values.
503	 */
504
505	hlen = iph->ihl * 4;
506	mtu = dst_mtu(&rt->dst) - hlen;	/* Size of data space */
507#ifdef CONFIG_BRIDGE_NETFILTER
508	if (skb->nf_bridge)
509		mtu -= nf_bridge_mtu_reduction(skb);
510#endif
511	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
512
513	/* When frag_list is given, use it. First, check its validity:
514	 * some transformers could create wrong frag_list or break existing
515	 * one, it is not prohibited. In this case fall back to copying.
516	 *
517	 * LATER: this step can be merged to real generation of fragments,
518	 * we can switch to copy when see the first bad fragment.
519	 */
520	if (skb_has_frags(skb)) {
521		struct sk_buff *frag, *frag2;
522		int first_len = skb_pagelen(skb);
523
524		if (first_len - hlen > mtu ||
525		    ((first_len - hlen) & 7) ||
526		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
527		    skb_cloned(skb))
528			goto slow_path;
529
530		skb_walk_frags(skb, frag) {
531			/* Correct geometry. */
532			if (frag->len > mtu ||
533			    ((frag->len & 7) && frag->next) ||
534			    skb_headroom(frag) < hlen)
535				goto slow_path_clean;
536
537			/* Partially cloned skb? */
538			if (skb_shared(frag))
539				goto slow_path_clean;
540
541			BUG_ON(frag->sk);
542			if (skb->sk) {
543				frag->sk = skb->sk;
544				frag->destructor = sock_wfree;
545			}
546			skb->truesize -= frag->truesize;
547		}
548
549		/* Everything is OK. Generate! */
550
551		err = 0;
552		offset = 0;
553		frag = skb_shinfo(skb)->frag_list;
554		skb_frag_list_init(skb);
555		skb->data_len = first_len - skb_headlen(skb);
556		skb->len = first_len;
557		iph->tot_len = htons(first_len);
558		iph->frag_off = htons(IP_MF);
559		ip_send_check(iph);
560
561		for (;;) {
562			/* Prepare header of the next frame,
563			 * before previous one went down. */
564			if (frag) {
565				frag->ip_summed = CHECKSUM_NONE;
566				skb_reset_transport_header(frag);
567				__skb_push(frag, hlen);
568				skb_reset_network_header(frag);
569				memcpy(skb_network_header(frag), iph, hlen);
570				iph = ip_hdr(frag);
571				iph->tot_len = htons(frag->len);
572				ip_copy_metadata(frag, skb);
573				if (offset == 0)
574					ip_options_fragment(frag);
575				offset += skb->len - hlen;
576				iph->frag_off = htons(offset>>3);
577				if (frag->next != NULL)
578					iph->frag_off |= htons(IP_MF);
579				/* Ready, complete checksum */
580				ip_send_check(iph);
581			}
582
583			err = output(skb);
584
585			if (!err)
586				IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
587			if (err || !frag)
588				break;
589
590			skb = frag;
591			frag = skb->next;
592			skb->next = NULL;
593		}
594
595		if (err == 0) {
596			IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
597			return 0;
598		}
599
600		while (frag) {
601			skb = frag->next;
602			kfree_skb(frag);
603			frag = skb;
604		}
605		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
606		return err;
607
608slow_path_clean:
609		skb_walk_frags(skb, frag2) {
610			if (frag2 == frag)
611				break;
612			frag2->sk = NULL;
613			frag2->destructor = NULL;
614			skb->truesize += frag2->truesize;
615		}
616	}
617
618slow_path:
619	left = skb->len - hlen;		/* Space per frame */
620	ptr = hlen;		/* Where to start from */
621
622	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
623	 * we need to make room for the encapsulating header
624	 */
625	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
626
627	/*
628	 *	Fragment the datagram.
629	 */
630
631	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
632	not_last_frag = iph->frag_off & htons(IP_MF);
633
634	/*
635	 *	Keep copying data until we run out.
636	 */
637
638	while (left > 0) {
639		len = left;
640		/* IF: it doesn't fit, use 'mtu' - the data space left */
641		if (len > mtu)
642			len = mtu;
643		/* IF: we are not sending upto and including the packet end
644		   then align the next start on an eight byte boundary */
645		if (len < left)	{
646			len &= ~7;
647		}
648		/*
649		 *	Allocate buffer.
650		 */
651
652		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
653			NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
654			err = -ENOMEM;
655			goto fail;
656		}
657
658		/*
659		 *	Set up data on packet
660		 */
661
662		ip_copy_metadata(skb2, skb);
663		skb_reserve(skb2, ll_rs);
664		skb_put(skb2, len + hlen);
665		skb_reset_network_header(skb2);
666		skb2->transport_header = skb2->network_header + hlen;
667
668		/*
669		 *	Charge the memory for the fragment to any owner
670		 *	it might possess
671		 */
672
673		if (skb->sk)
674			skb_set_owner_w(skb2, skb->sk);
675
676		/*
677		 *	Copy the packet header into the new buffer.
678		 */
679
680		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
681
682		/*
683		 *	Copy a block of the IP datagram.
684		 */
685		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
686			BUG();
687		left -= len;
688
689		/*
690		 *	Fill in the new header fields.
691		 */
692		iph = ip_hdr(skb2);
693		iph->frag_off = htons((offset >> 3));
694
695        /* Foxconn added start pling 04/29/2010 */
696        /* If the packet is not from IP stack, i.e. from other i/f,
697         * then copy the Ethernet header and cb to the
698         * first fragment, for later use by NAT/QoS.
699         */
700        if (!skb->sk && first_frag)
701        {
702            first_frag = 0;
703            skb2->mac_header = (unsigned char *)(skb2->data - sizeof(struct ethhdr));
704            memcpy(skb2->mac_header, skb->mac_header, sizeof(struct ethhdr));
705            memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
706        }
707        /* Foxconn added end pling 04/29/2010 */
708
709		/* ANK: dirty, but effective trick. Upgrade options only if
710		 * the segment to be fragmented was THE FIRST (otherwise,
711		 * options are already fixed) and make it ONCE
712		 * on the initial skb, so that all the following fragments
713		 * will inherit fixed options.
714		 */
715		if (offset == 0)
716			ip_options_fragment(skb);
717
718		/*
719		 *	Added AC : If we are fragmenting a fragment that's not the
720		 *		   last fragment then keep MF on each bit
721		 */
722		if (left > 0 || not_last_frag)
723			iph->frag_off |= htons(IP_MF);
724		ptr += len;
725		offset += len;
726
727		/*
728		 *	Put this fragment into the sending queue.
729		 */
730		iph->tot_len = htons(len + hlen);
731
732		ip_send_check(iph);
733
734		err = output(skb2);
735		if (err)
736			goto fail;
737
738		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
739	}
740	kfree_skb(skb);
741	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
742	return err;
743
744fail:
745	kfree_skb(skb);
746	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
747	return err;
748}
749EXPORT_SYMBOL(ip_fragment);
750
751int
752ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
753{
754	struct iovec *iov = from;
755
756	if (skb->ip_summed == CHECKSUM_PARTIAL) {
757		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
758			return -EFAULT;
759	} else {
760		__wsum csum = 0;
761		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
762			return -EFAULT;
763		skb->csum = csum_block_add(skb->csum, csum, odd);
764	}
765	return 0;
766}
767EXPORT_SYMBOL(ip_generic_getfrag);
768
769static inline __wsum
770csum_page(struct page *page, int offset, int copy)
771{
772	char *kaddr;
773	__wsum csum;
774	kaddr = kmap(page);
775	csum = csum_partial(kaddr + offset, copy, 0);
776	kunmap(page);
777	return csum;
778}
779
780static inline int ip_ufo_append_data(struct sock *sk,
781			int getfrag(void *from, char *to, int offset, int len,
782			       int odd, struct sk_buff *skb),
783			void *from, int length, int hh_len, int fragheaderlen,
784			int transhdrlen, int mtu, unsigned int flags)
785{
786	struct sk_buff *skb;
787	int err;
788
789	/* There is support for UDP fragmentation offload by network
790	 * device, so create one single skb packet containing complete
791	 * udp datagram
792	 */
793	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
794		skb = sock_alloc_send_skb(sk,
795			hh_len + fragheaderlen + transhdrlen + 20,
796			(flags & MSG_DONTWAIT), &err);
797
798		if (skb == NULL)
799			return err;
800
801		/* reserve space for Hardware header */
802		skb_reserve(skb, hh_len);
803
804		/* create space for UDP/IP header */
805		skb_put(skb, fragheaderlen + transhdrlen);
806
807		/* initialize network header pointer */
808		skb_reset_network_header(skb);
809
810		/* initialize protocol header pointer */
811		skb->transport_header = skb->network_header + fragheaderlen;
812
813		skb->ip_summed = CHECKSUM_PARTIAL;
814		skb->csum = 0;
815		sk->sk_sndmsg_off = 0;
816
817		/* specify the length of each IP datagram fragment */
818		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
819		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
820		__skb_queue_tail(&sk->sk_write_queue, skb);
821	}
822
823	return skb_append_datato_frags(sk, skb, getfrag, from,
824				       (length - transhdrlen));
825}
826
827/*
828 *	ip_append_data() and ip_append_page() can make one large IP datagram
829 *	from many pieces of data. Each pieces will be holded on the socket
830 *	until ip_push_pending_frames() is called. Each piece can be a page
831 *	or non-page data.
832 *
833 *	Not only UDP, other transport protocols - e.g. raw sockets - can use
834 *	this interface potentially.
835 *
836 *	LATER: length must be adjusted by pad at tail, when it is required.
837 */
838int ip_append_data(struct sock *sk,
839		   int getfrag(void *from, char *to, int offset, int len,
840			       int odd, struct sk_buff *skb),
841		   void *from, int length, int transhdrlen,
842		   struct ipcm_cookie *ipc, struct rtable **rtp,
843		   unsigned int flags)
844{
845	struct inet_sock *inet = inet_sk(sk);
846	struct sk_buff *skb;
847
848	struct ip_options *opt = NULL;
849	int hh_len;
850	int exthdrlen;
851	int mtu;
852	int copy;
853	int err;
854	int offset = 0;
855	unsigned int maxfraglen, fragheaderlen;
856	int csummode = CHECKSUM_NONE;
857	struct rtable *rt;
858
859	if (flags&MSG_PROBE)
860		return 0;
861
862	if (skb_queue_empty(&sk->sk_write_queue)) {
863		/*
864		 * setup for corking.
865		 */
866		opt = ipc->opt;
867		if (opt) {
868			if (inet->cork.opt == NULL) {
869				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
870				if (unlikely(inet->cork.opt == NULL))
871					return -ENOBUFS;
872			}
873			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
874			inet->cork.flags |= IPCORK_OPT;
875			inet->cork.addr = ipc->addr;
876		}
877		rt = *rtp;
878		if (unlikely(!rt))
879			return -EFAULT;
880		/*
881		 * We steal reference to this route, caller should not release it
882		 */
883		*rtp = NULL;
884		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
885					    rt->dst.dev->mtu :
886					    dst_mtu(rt->dst.path);
887		inet->cork.dst = &rt->dst;
888		inet->cork.length = 0;
889		sk->sk_sndmsg_page = NULL;
890		sk->sk_sndmsg_off = 0;
891		if ((exthdrlen = rt->dst.header_len) != 0) {
892			length += exthdrlen;
893			transhdrlen += exthdrlen;
894		}
895	} else {
896		rt = (struct rtable *)inet->cork.dst;
897		if (inet->cork.flags & IPCORK_OPT)
898			opt = inet->cork.opt;
899
900		transhdrlen = 0;
901		exthdrlen = 0;
902		mtu = inet->cork.fragsize;
903	}
904	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
905
906	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
907	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
908
909	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
910		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
911			       mtu-exthdrlen);
912		return -EMSGSIZE;
913	}
914
915	/*
916	 * transhdrlen > 0 means that this is the first fragment and we wish
917	 * it won't be fragmented in the future.
918	 */
919	if (transhdrlen &&
920	    length + fragheaderlen <= mtu &&
921	    rt->dst.dev->features & NETIF_F_V4_CSUM &&
922	    !exthdrlen)
923		csummode = CHECKSUM_PARTIAL;
924
925	skb = skb_peek_tail(&sk->sk_write_queue);
926
927	inet->cork.length += length;
928	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
929	    (sk->sk_protocol == IPPROTO_UDP) &&
930	    (rt->dst.dev->features & NETIF_F_UFO)) {
931		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
932					 fragheaderlen, transhdrlen, mtu,
933					 flags);
934		if (err)
935			goto error;
936		return 0;
937	}
938
939	/* So, what's going on in the loop below?
940	 *
941	 * We use calculated fragment length to generate chained skb,
942	 * each of segments is IP fragment ready for sending to network after
943	 * adding appropriate IP header.
944	 */
945
946	if (!skb)
947		goto alloc_new_skb;
948
949	while (length > 0) {
950		/* Check if the remaining data fits into current packet. */
951		copy = mtu - skb->len;
952		if (copy < length)
953			copy = maxfraglen - skb->len;
954		if (copy <= 0) {
955			char *data;
956			unsigned int datalen;
957			unsigned int fraglen;
958			unsigned int fraggap;
959			unsigned int alloclen;
960			struct sk_buff *skb_prev;
961alloc_new_skb:
962			skb_prev = skb;
963			if (skb_prev)
964				fraggap = skb_prev->len - maxfraglen;
965			else
966				fraggap = 0;
967
968			/*
969			 * If remaining data exceeds the mtu,
970			 * we know we need more fragment(s).
971			 */
972			datalen = length + fraggap;
973			if (datalen > mtu - fragheaderlen)
974				datalen = maxfraglen - fragheaderlen;
975			fraglen = datalen + fragheaderlen;
976
977			if ((flags & MSG_MORE) &&
978			    !(rt->dst.dev->features&NETIF_F_SG))
979				alloclen = mtu;
980			else
981				alloclen = datalen + fragheaderlen;
982
983			/* The last fragment gets additional space at tail.
984			 * Note, with MSG_MORE we overallocate on fragments,
985			 * because we have no idea what fragment will be
986			 * the last.
987			 */
988			if (datalen == length + fraggap)
989				alloclen += rt->dst.trailer_len;
990
991			if (transhdrlen) {
992				skb = sock_alloc_send_skb(sk,
993						alloclen + hh_len + 15,
994						(flags & MSG_DONTWAIT), &err);
995			} else {
996				skb = NULL;
997				if (atomic_read(&sk->sk_wmem_alloc) <=
998				    2 * sk->sk_sndbuf)
999					skb = sock_wmalloc(sk,
1000							   alloclen + hh_len + 15, 1,
1001							   sk->sk_allocation);
1002				if (unlikely(skb == NULL))
1003					err = -ENOBUFS;
1004				else
1005					/* only the initial fragment is
1006					   time stamped */
1007					ipc->shtx.flags = 0;
1008			}
1009			if (skb == NULL)
1010				goto error;
1011
1012			/*
1013			 *	Fill in the control structures
1014			 */
1015			skb->ip_summed = csummode;
1016			skb->csum = 0;
1017			skb_reserve(skb, hh_len);
1018			*skb_tx(skb) = ipc->shtx;
1019
1020			/*
1021			 *	Find where to start putting bytes.
1022			 */
1023			data = skb_put(skb, fraglen);
1024			skb_set_network_header(skb, exthdrlen);
1025			skb->transport_header = (skb->network_header +
1026						 fragheaderlen);
1027			data += fragheaderlen;
1028
1029			if (fraggap) {
1030				skb->csum = skb_copy_and_csum_bits(
1031					skb_prev, maxfraglen,
1032					data + transhdrlen, fraggap, 0);
1033				skb_prev->csum = csum_sub(skb_prev->csum,
1034							  skb->csum);
1035				data += fraggap;
1036				pskb_trim_unique(skb_prev, maxfraglen);
1037			}
1038
1039			copy = datalen - transhdrlen - fraggap;
1040			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1041				err = -EFAULT;
1042				kfree_skb(skb);
1043				goto error;
1044			}
1045
1046			offset += copy;
1047			length -= datalen - fraggap;
1048			transhdrlen = 0;
1049			exthdrlen = 0;
1050			csummode = CHECKSUM_NONE;
1051
1052			/*
1053			 * Put the packet on the pending queue.
1054			 */
1055			__skb_queue_tail(&sk->sk_write_queue, skb);
1056			continue;
1057		}
1058
1059		if (copy > length)
1060			copy = length;
1061
1062		if (!(rt->dst.dev->features&NETIF_F_SG)) {
1063			unsigned int off;
1064
1065			off = skb->len;
1066			if (getfrag(from, skb_put(skb, copy),
1067					offset, copy, off, skb) < 0) {
1068				__skb_trim(skb, off);
1069				err = -EFAULT;
1070				goto error;
1071			}
1072		} else {
1073			int i = skb_shinfo(skb)->nr_frags;
1074			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1075			struct page *page = sk->sk_sndmsg_page;
1076			int off = sk->sk_sndmsg_off;
1077			unsigned int left;
1078
1079			if (page && (left = PAGE_SIZE - off) > 0) {
1080				if (copy >= left)
1081					copy = left;
1082				if (page != frag->page) {
1083					if (i == MAX_SKB_FRAGS) {
1084						err = -EMSGSIZE;
1085						goto error;
1086					}
1087					get_page(page);
1088					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1089					frag = &skb_shinfo(skb)->frags[i];
1090				}
1091			} else if (i < MAX_SKB_FRAGS) {
1092				if (copy > PAGE_SIZE)
1093					copy = PAGE_SIZE;
1094				page = alloc_pages(sk->sk_allocation, 0);
1095				if (page == NULL)  {
1096					err = -ENOMEM;
1097					goto error;
1098				}
1099				sk->sk_sndmsg_page = page;
1100				sk->sk_sndmsg_off = 0;
1101
1102				skb_fill_page_desc(skb, i, page, 0, 0);
1103				frag = &skb_shinfo(skb)->frags[i];
1104			} else {
1105				err = -EMSGSIZE;
1106				goto error;
1107			}
1108			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1109				err = -EFAULT;
1110				goto error;
1111			}
1112			sk->sk_sndmsg_off += copy;
1113			frag->size += copy;
1114			skb->len += copy;
1115			skb->data_len += copy;
1116			skb->truesize += copy;
1117			atomic_add(copy, &sk->sk_wmem_alloc);
1118		}
1119		offset += copy;
1120		length -= copy;
1121	}
1122
1123	return 0;
1124
1125error:
1126	inet->cork.length -= length;
1127	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1128	return err;
1129}
1130
1131ssize_t	ip_append_page(struct sock *sk, struct page *page,
1132		       int offset, size_t size, int flags)
1133{
1134	struct inet_sock *inet = inet_sk(sk);
1135	struct sk_buff *skb;
1136	struct rtable *rt;
1137	struct ip_options *opt = NULL;
1138	int hh_len;
1139	int mtu;
1140	int len;
1141	int err;
1142	unsigned int maxfraglen, fragheaderlen, fraggap;
1143
1144	if (inet->hdrincl)
1145		return -EPERM;
1146
1147	if (flags&MSG_PROBE)
1148		return 0;
1149
1150	if (skb_queue_empty(&sk->sk_write_queue))
1151		return -EINVAL;
1152
1153	rt = (struct rtable *)inet->cork.dst;
1154	if (inet->cork.flags & IPCORK_OPT)
1155		opt = inet->cork.opt;
1156
1157	if (!(rt->dst.dev->features&NETIF_F_SG))
1158		return -EOPNOTSUPP;
1159
1160	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1161	mtu = inet->cork.fragsize;
1162
1163	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1164	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1165
1166	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1167		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu);
1168		return -EMSGSIZE;
1169	}
1170
1171	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1172		return -EINVAL;
1173
1174	inet->cork.length += size;
1175	if ((size + skb->len > mtu) &&
1176	    (sk->sk_protocol == IPPROTO_UDP) &&
1177	    (rt->dst.dev->features & NETIF_F_UFO)) {
1178		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1179		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1180	}
1181
1182
1183	while (size > 0) {
1184		int i;
1185
1186		if (skb_is_gso(skb))
1187			len = size;
1188		else {
1189
1190			/* Check if the remaining data fits into current packet. */
1191			len = mtu - skb->len;
1192			if (len < size)
1193				len = maxfraglen - skb->len;
1194		}
1195		if (len <= 0) {
1196			struct sk_buff *skb_prev;
1197			int alloclen;
1198
1199			skb_prev = skb;
1200			fraggap = skb_prev->len - maxfraglen;
1201
1202			alloclen = fragheaderlen + hh_len + fraggap + 15;
1203			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1204			if (unlikely(!skb)) {
1205				err = -ENOBUFS;
1206				goto error;
1207			}
1208
1209			/*
1210			 *	Fill in the control structures
1211			 */
1212			skb->ip_summed = CHECKSUM_NONE;
1213			skb->csum = 0;
1214			skb_reserve(skb, hh_len);
1215
1216			/*
1217			 *	Find where to start putting bytes.
1218			 */
1219			skb_put(skb, fragheaderlen + fraggap);
1220			skb_reset_network_header(skb);
1221			skb->transport_header = (skb->network_header +
1222						 fragheaderlen);
1223			if (fraggap) {
1224				skb->csum = skb_copy_and_csum_bits(skb_prev,
1225								   maxfraglen,
1226						    skb_transport_header(skb),
1227								   fraggap, 0);
1228				skb_prev->csum = csum_sub(skb_prev->csum,
1229							  skb->csum);
1230				pskb_trim_unique(skb_prev, maxfraglen);
1231			}
1232
1233			/*
1234			 * Put the packet on the pending queue.
1235			 */
1236			__skb_queue_tail(&sk->sk_write_queue, skb);
1237			continue;
1238		}
1239
1240		i = skb_shinfo(skb)->nr_frags;
1241		if (len > size)
1242			len = size;
1243		if (skb_can_coalesce(skb, i, page, offset)) {
1244			skb_shinfo(skb)->frags[i-1].size += len;
1245		} else if (i < MAX_SKB_FRAGS) {
1246			get_page(page);
1247			skb_fill_page_desc(skb, i, page, offset, len);
1248		} else {
1249			err = -EMSGSIZE;
1250			goto error;
1251		}
1252
1253		if (skb->ip_summed == CHECKSUM_NONE) {
1254			__wsum csum;
1255			csum = csum_page(page, offset, len);
1256			skb->csum = csum_block_add(skb->csum, csum, skb->len);
1257		}
1258
1259		skb->len += len;
1260		skb->data_len += len;
1261		skb->truesize += len;
1262		atomic_add(len, &sk->sk_wmem_alloc);
1263		offset += len;
1264		size -= len;
1265	}
1266	return 0;
1267
1268error:
1269	inet->cork.length -= size;
1270	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1271	return err;
1272}
1273
1274static void ip_cork_release(struct inet_sock *inet)
1275{
1276	inet->cork.flags &= ~IPCORK_OPT;
1277	kfree(inet->cork.opt);
1278	inet->cork.opt = NULL;
1279	dst_release(inet->cork.dst);
1280	inet->cork.dst = NULL;
1281}
1282
1283/*
1284 *	Combined all pending IP fragments on the socket as one IP datagram
1285 *	and push them out.
1286 */
1287int ip_push_pending_frames(struct sock *sk)
1288{
1289	struct sk_buff *skb, *tmp_skb;
1290	struct sk_buff **tail_skb;
1291	struct inet_sock *inet = inet_sk(sk);
1292	struct net *net = sock_net(sk);
1293	struct ip_options *opt = NULL;
1294	struct rtable *rt = (struct rtable *)inet->cork.dst;
1295	struct iphdr *iph;
1296	__be16 df = 0;
1297	__u8 ttl;
1298	int err = 0;
1299
1300	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1301		goto out;
1302	tail_skb = &(skb_shinfo(skb)->frag_list);
1303
1304	/* move skb->data to ip header from ext header */
1305	if (skb->data < skb_network_header(skb))
1306		__skb_pull(skb, skb_network_offset(skb));
1307	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1308		__skb_pull(tmp_skb, skb_network_header_len(skb));
1309		*tail_skb = tmp_skb;
1310		tail_skb = &(tmp_skb->next);
1311		skb->len += tmp_skb->len;
1312		skb->data_len += tmp_skb->len;
1313		skb->truesize += tmp_skb->truesize;
1314		tmp_skb->destructor = NULL;
1315		tmp_skb->sk = NULL;
1316	}
1317
1318	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1319	 * to fragment the frame generated here. No matter, what transforms
1320	 * how transforms change size of the packet, it will come out.
1321	 */
1322	if (inet->pmtudisc < IP_PMTUDISC_DO)
1323		skb->local_df = 1;
1324
1325	/* DF bit is set when we want to see DF on outgoing frames.
1326	 * If local_df is set too, we still allow to fragment this frame
1327	 * locally. */
1328	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1329	    (skb->len <= dst_mtu(&rt->dst) &&
1330	     ip_dont_fragment(sk, &rt->dst)))
1331		df = htons(IP_DF);
1332
1333	if (inet->cork.flags & IPCORK_OPT)
1334		opt = inet->cork.opt;
1335
1336	if (rt->rt_type == RTN_MULTICAST)
1337		ttl = inet->mc_ttl;
1338	else
1339		ttl = ip_select_ttl(inet, &rt->dst);
1340
1341	iph = (struct iphdr *)skb->data;
1342	iph->version = 4;
1343	iph->ihl = 5;
1344	if (opt) {
1345		iph->ihl += opt->optlen>>2;
1346		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1347	}
1348	iph->tos = inet->tos;
1349	iph->frag_off = df;
1350	ip_select_ident(iph, &rt->dst, sk);
1351	iph->ttl = ttl;
1352	iph->protocol = sk->sk_protocol;
1353	iph->saddr = rt->rt_src;
1354	iph->daddr = rt->rt_dst;
1355
1356	skb->priority = sk->sk_priority;
1357	skb->mark = sk->sk_mark;
1358	/*
1359	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
1360	 * on dst refcount
1361	 */
1362	inet->cork.dst = NULL;
1363	skb_dst_set(skb, &rt->dst);
1364
1365	if (iph->protocol == IPPROTO_ICMP)
1366		icmp_out_count(net, ((struct icmphdr *)
1367			skb_transport_header(skb))->type);
1368
1369	/* Netfilter gets whole the not fragmented skb. */
1370	err = ip_local_out(skb);
1371	if (err) {
1372		if (err > 0)
1373			err = net_xmit_errno(err);
1374		if (err)
1375			goto error;
1376	}
1377
1378out:
1379	ip_cork_release(inet);
1380	return err;
1381
1382error:
1383	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1384	goto out;
1385}
1386
1387/*
1388 *	Throw away all pending data on the socket.
1389 */
1390void ip_flush_pending_frames(struct sock *sk)
1391{
1392	struct sk_buff *skb;
1393
1394	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1395		kfree_skb(skb);
1396
1397	ip_cork_release(inet_sk(sk));
1398}
1399
1400
1401/*
1402 *	Fetch data from kernel space and fill in checksum if needed.
1403 */
1404static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1405			      int len, int odd, struct sk_buff *skb)
1406{
1407	__wsum csum;
1408
1409	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1410	skb->csum = csum_block_add(skb->csum, csum, odd);
1411	return 0;
1412}
1413
1414/*
1415 *	Generic function to send a packet as reply to another packet.
1416 *	Used to send TCP resets so far. ICMP should use this function too.
1417 *
1418 *	Should run single threaded per socket because it uses the sock
1419 *     	structure to pass arguments.
1420 */
1421void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1422		   unsigned int len)
1423{
1424	struct inet_sock *inet = inet_sk(sk);
1425	struct {
1426		struct ip_options	opt;
1427		char			data[40];
1428	} replyopts;
1429	struct ipcm_cookie ipc;
1430	__be32 daddr;
1431	struct rtable *rt = skb_rtable(skb);
1432
1433	if (ip_options_echo(&replyopts.opt, skb))
1434		return;
1435
1436	daddr = ipc.addr = rt->rt_src;
1437	ipc.opt = NULL;
1438	ipc.shtx.flags = 0;
1439
1440	if (replyopts.opt.optlen) {
1441		ipc.opt = &replyopts.opt;
1442
1443		if (ipc.opt->srr)
1444			daddr = replyopts.opt.faddr;
1445	}
1446
1447	{
1448		struct flowi fl = { .oif = arg->bound_dev_if,
1449				    .nl_u = { .ip4_u =
1450					      { .daddr = daddr,
1451						.saddr = rt->rt_spec_dst,
1452						.tos = RT_TOS(ip_hdr(skb)->tos) } },
1453				    /* Not quite clean, but right. */
1454				    .uli_u = { .ports =
1455					       { .sport = tcp_hdr(skb)->dest,
1456						 .dport = tcp_hdr(skb)->source } },
1457				    .proto = sk->sk_protocol,
1458				    .flags = ip_reply_arg_flowi_flags(arg) };
1459		security_skb_classify_flow(skb, &fl);
1460		if (ip_route_output_key(sock_net(sk), &rt, &fl))
1461			return;
1462	}
1463
1464	/* And let IP do all the hard work.
1465
1466	   This chunk is not reenterable, hence spinlock.
1467	   Note that it uses the fact, that this function is called
1468	   with locally disabled BH and that sk cannot be already spinlocked.
1469	 */
1470	bh_lock_sock(sk);
1471	inet->tos = ip_hdr(skb)->tos;
1472	sk->sk_priority = skb->priority;
1473	sk->sk_protocol = ip_hdr(skb)->protocol;
1474	sk->sk_bound_dev_if = arg->bound_dev_if;
1475	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1476		       &ipc, &rt, MSG_DONTWAIT);
1477	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1478		if (arg->csumoffset >= 0)
1479			*((__sum16 *)skb_transport_header(skb) +
1480			  arg->csumoffset) = csum_fold(csum_add(skb->csum,
1481								arg->csum));
1482		skb->ip_summed = CHECKSUM_NONE;
1483		ip_push_pending_frames(sk);
1484	}
1485
1486	bh_unlock_sock(sk);
1487
1488	ip_rt_put(rt);
1489}
1490
1491void __init ip_init(void)
1492{
1493	ip_rt_init();
1494	inet_initpeers();
1495
1496#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1497	igmp_mc_proc_init();
1498#endif
1499}
1500/* Fxcn port-S Wins, 0714-09 */
1501//Foxconn add start, Lewis Min, for OpenDNS, 12/12/2008
1502void insert_func_to_BR_POST_ROUTE(void *FUNC)
1503{
1504   br_post_insert_hook= FUNC;
1505}
1506
1507
1508void remove_func_from_BR_POST_ROUTE(void)
1509{
1510   br_post_insert_hook= NULL;
1511}
1512//Foxconn add end, Lewis Min, for OpenDNS, 12/12/2008
1513/* Fxcn port-E Wins, 0714-09 */
1514