1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) output module. 7 * 8 * Authors: Ross Biro 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 10 * Donald Becker, <becker@super.org> 11 * Alan Cox, <Alan.Cox@linux.org> 12 * Richard Underwood 13 * Stefan Becker, <stefanb@yello.ping.de> 14 * Jorge Cwik, <jorge@laser.satlink.net> 15 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 16 * Hirokazu Takahashi, <taka@valinux.co.jp> 17 * 18 * See ip_input.c for original log 19 * 20 * Fixes: 21 * Alan Cox : Missing nonblock feature in ip_build_xmit. 22 * Mike Kilburn : htons() missing in ip_build_xmit. 23 * Bradford Johnson: Fix faulty handling of some frames when 24 * no route is found. 25 * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit 26 * (in case if packet not accepted by 27 * output firewall rules) 28 * Mike McLagan : Routing by source 29 * Alexey Kuznetsov: use new route cache 30 * Andi Kleen: Fix broken PMTU recovery and remove 31 * some redundant tests. 32 * Vitaly E. Lavrov : Transparent proxy revived after year coma. 33 * Andi Kleen : Replace ip_reply with ip_send_reply. 34 * Andi Kleen : Split fast and slow ip_build_xmit path 35 * for decreased register pressure on x86 36 * and more readibility. 37 * Marc Boucher : When call_out_firewall returns FW_QUEUE, 38 * silently drop skb instead of failing with -EPERM. 39 * Detlev Wengorz : Copy protocol for fragments. 40 * Hirokazu Takahashi: HW checksumming for outgoing UDP 41 * datagrams. 42 * Hirokazu Takahashi: sendfile() on UDP works now. 43 */ 44 45#include <asm/uaccess.h> 46#include <asm/system.h> 47#include <linux/module.h> 48#include <linux/types.h> 49#include <linux/kernel.h> 50#include <linux/mm.h> 51#include <linux/string.h> 52#include <linux/errno.h> 53#include <linux/highmem.h> 54#include <linux/slab.h> 55 56#include <linux/socket.h> 57#include <linux/sockios.h> 58#include <linux/in.h> 59#include <linux/inet.h> 60#include <linux/netdevice.h> 61#include <linux/etherdevice.h> 62#include <linux/proc_fs.h> 63#include <linux/stat.h> 64#include <linux/init.h> 65 66#include <net/snmp.h> 67#include <net/ip.h> 68#include <net/protocol.h> 69#include <net/route.h> 70#include <net/xfrm.h> 71#include <linux/skbuff.h> 72#include <net/sock.h> 73#include <net/arp.h> 74#include <net/icmp.h> 75#include <net/checksum.h> 76#include <net/inetpeer.h> 77#include <linux/igmp.h> 78#include <linux/netfilter_ipv4.h> 79#include <linux/netfilter_bridge.h> 80#include <linux/mroute.h> 81#include <linux/netlink.h> 82#include <linux/tcp.h> 83 84#include <typedefs.h> 85#include <bcmdefs.h> 86 87int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; 88 89/* Generate a checksum for an outgoing IP datagram. */ 90__inline__ void ip_send_check(struct iphdr *iph) 91{ 92 iph->check = 0; 93 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 94} 95EXPORT_SYMBOL(ip_send_check); 96 97int __ip_local_out(struct sk_buff *skb) 98{ 99 struct iphdr *iph = ip_hdr(skb); 100 101 iph->tot_len = htons(skb->len); 102 ip_send_check(iph); 103 104 /* Mark skb to identify SMB data packet */ 105 if ((ip_hdr(skb)->protocol == IPPROTO_TCP) && tcp_hdr(skb)) 106 skb->tcpf_smb = (tcp_hdr(skb)->source == htons(0x01bd)); 107 108 return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, 109 skb_dst(skb)->dev, dst_output); 110} 111 112int ip_local_out(struct sk_buff *skb) 113{ 114 int err; 115 116 err = __ip_local_out(skb); 117 if (likely(err == 1)) 118 err = dst_output(skb); 119 120 return err; 121} 122EXPORT_SYMBOL_GPL(ip_local_out); 123 124/* dev_loopback_xmit for use with netfilter. */ 125static int ip_dev_loopback_xmit(struct sk_buff *newskb) 126{ 127 skb_reset_mac_header(newskb); 128 __skb_pull(newskb, skb_network_offset(newskb)); 129 newskb->pkt_type = PACKET_LOOPBACK; 130 newskb->ip_summed = CHECKSUM_UNNECESSARY; 131 WARN_ON(!skb_dst(newskb)); 132 netif_rx_ni(newskb); 133 return 0; 134} 135 136static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) 137{ 138 int ttl = inet->uc_ttl; 139 140 if (ttl < 0) 141 ttl = dst_metric(dst, RTAX_HOPLIMIT); 142 return ttl; 143} 144 145/* 146 * Add an ip header to a skbuff and send it out. 147 * 148 */ 149int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 150 __be32 saddr, __be32 daddr, struct ip_options *opt) 151{ 152 struct inet_sock *inet = inet_sk(sk); 153 struct rtable *rt = skb_rtable(skb); 154 struct iphdr *iph; 155 156 /* Build the IP header. */ 157 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 158 skb_reset_network_header(skb); 159 iph = ip_hdr(skb); 160 iph->version = 4; 161 iph->ihl = 5; 162 iph->tos = inet->tos; 163 if (ip_dont_fragment(sk, &rt->dst)) 164 iph->frag_off = htons(IP_DF); 165 else 166 iph->frag_off = 0; 167 iph->ttl = ip_select_ttl(inet, &rt->dst); 168 iph->daddr = rt->rt_dst; 169 iph->saddr = rt->rt_src; 170 iph->protocol = sk->sk_protocol; 171 ip_select_ident(iph, &rt->dst, sk); 172 173 if (opt && opt->optlen) { 174 iph->ihl += opt->optlen>>2; 175 ip_options_build(skb, opt, daddr, rt, 0); 176 } 177 178 skb->priority = sk->sk_priority; 179 skb->mark = sk->sk_mark; 180 181 /* Send it out. */ 182 return ip_local_out(skb); 183} 184EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 185 186static inline int ip_finish_output2(struct sk_buff *skb) 187{ 188 struct dst_entry *dst = skb_dst(skb); 189 struct rtable *rt = (struct rtable *)dst; 190 struct net_device *dev = dst->dev; 191 unsigned int hh_len = LL_RESERVED_SPACE(dev); 192 193 if (rt->rt_type == RTN_MULTICAST) { 194 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); 195 } else if (rt->rt_type == RTN_BROADCAST) 196 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); 197 198 /* Be paranoid, rather than too clever. */ 199 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { 200 struct sk_buff *skb2; 201 202 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); 203 if (skb2 == NULL) { 204 kfree_skb(skb); 205 return -ENOMEM; 206 } 207 if (skb->sk) 208 skb_set_owner_w(skb2, skb->sk); 209 kfree_skb(skb); 210 skb = skb2; 211 } 212 213 if (dst->hh) 214 return neigh_hh_output(dst->hh, skb); 215 else if (dst->neighbour) 216 return dst->neighbour->output(skb); 217 218 if (net_ratelimit()) 219 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); 220 kfree_skb(skb); 221 return -EINVAL; 222} 223 224static inline int ip_skb_dst_mtu(struct sk_buff *skb) 225{ 226 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; 227 228 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? 229 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); 230} 231 232/* Fxcn port-S Wins, 0714-09 */ 233int (*br_post_insert_hook)(struct sk_buff *skb);//Foxconn add , Lewis Min, for OpenDNS, 03/12/2009 234/* Fxcn port-E Wins, 0714-09 */ 235static int ip_finish_output(struct sk_buff *skb) 236{ 237#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 238 /* Policy lookup after SNAT yielded a new policy */ 239 if (skb_dst(skb)->xfrm != NULL) { 240 IPCB(skb)->flags |= IPSKB_REROUTED; 241 return dst_output(skb); 242 } 243#endif 244 245/* Fxcn port-S Wins, 0714-09 */ 246 //Foxconn add start, Lewis Min, for OpenDNS, 03/12/2009 247 if(NULL!=br_post_insert_hook) 248 { 249 int ret; 250 251 ret = br_post_insert_hook(skb); 252 if((ret==NF_DROP)||(ret==NF_STOLEN)) 253 { 254// read_unlock(&br->lock); 255// spin_unlock_bh(&br->lock); 256 return; 257 } 258 } 259 //Foxconn add end, Lewis Min, for OpenDNS, 03/12/2009 260/* Fxcn port-E Wins, 0714-09 */ 261 262 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) 263 return ip_fragment(skb, ip_finish_output2); 264 else 265 return ip_finish_output2(skb); 266} 267 268int ip_mc_output(struct sk_buff *skb) 269{ 270 struct sock *sk = skb->sk; 271 struct rtable *rt = skb_rtable(skb); 272 struct net_device *dev = rt->dst.dev; 273 274 /* 275 * If the indicated interface is up and running, send the packet. 276 */ 277 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); 278 279 skb->dev = dev; 280 skb->protocol = htons(ETH_P_IP); 281 282 /* 283 * Multicasts are looped back for other local users 284 */ 285 286 if (rt->rt_flags&RTCF_MULTICAST) { 287 if (sk_mc_loop(sk) 288#ifdef CONFIG_IP_MROUTE 289 /* Small optimization: do not loopback not local frames, 290 which returned after forwarding; they will be dropped 291 by ip_mr_input in any case. 292 Note, that local frames are looped back to be delivered 293 to local recipients. 294 295 This check is duplicated in ip_mr_input at the moment. 296 */ 297 && 298 ((rt->rt_flags & RTCF_LOCAL) || 299 !(IPCB(skb)->flags & IPSKB_FORWARDED)) 300#endif 301 ) { 302 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 303 if (newskb) 304 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 305 newskb, NULL, newskb->dev, 306 ip_dev_loopback_xmit); 307 } 308 309 /* Multicasts with ttl 0 must not go beyond the host */ 310 311 if (ip_hdr(skb)->ttl == 0) { 312 kfree_skb(skb); 313 return 0; 314 } 315 } 316 317 if (rt->rt_flags&RTCF_BROADCAST) { 318 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 319 if (newskb) 320 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, 321 NULL, newskb->dev, ip_dev_loopback_xmit); 322 } 323 324 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, 325 skb->dev, ip_finish_output, 326 !(IPCB(skb)->flags & IPSKB_REROUTED)); 327} 328 329int ip_output(struct sk_buff *skb) 330{ 331 struct net_device *dev = skb_dst(skb)->dev; 332 333 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); 334 335 skb->dev = dev; 336 skb->protocol = htons(ETH_P_IP); 337 338 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, 339 ip_finish_output, 340 !(IPCB(skb)->flags & IPSKB_REROUTED)); 341} 342 343int BCMFASTPATH_HOST ip_queue_xmit(struct sk_buff *skb) 344{ 345 struct sock *sk = skb->sk; 346 struct inet_sock *inet = inet_sk(sk); 347 struct ip_options *opt = inet->opt; 348 struct rtable *rt; 349 struct iphdr *iph; 350 int res; 351 352 /* Skip all of this if the packet is already routed, 353 * f.e. by something like SCTP. 354 */ 355 rcu_read_lock(); 356 rt = skb_rtable(skb); 357 if (rt != NULL) 358 goto packet_routed; 359 360 /* Make sure we can route this packet. */ 361 rt = (struct rtable *)__sk_dst_check(sk, 0); 362 if (rt == NULL) { 363 __be32 daddr; 364 365 /* Use correct destination address if we have options. */ 366 daddr = inet->inet_daddr; 367 if(opt && opt->srr) 368 daddr = opt->faddr; 369 370 { 371 struct flowi fl = { .oif = sk->sk_bound_dev_if, 372 .mark = sk->sk_mark, 373 .nl_u = { .ip4_u = 374 { .daddr = daddr, 375 .saddr = inet->inet_saddr, 376 .tos = RT_CONN_FLAGS(sk) } }, 377 .proto = sk->sk_protocol, 378 .flags = inet_sk_flowi_flags(sk), 379 .uli_u = { .ports = 380 { .sport = inet->inet_sport, 381 .dport = inet->inet_dport } } }; 382 383 /* If this fails, retransmit mechanism of transport layer will 384 * keep trying until route appears or the connection times 385 * itself out. 386 */ 387 security_sk_classify_flow(sk, &fl); 388 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) 389 goto no_route; 390 } 391 sk_setup_caps(sk, &rt->dst); 392 } 393 skb_dst_set_noref(skb, &rt->dst); 394 395packet_routed: 396 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 397 goto no_route; 398 399 /* OK, we know where to send it, allocate and build IP header. */ 400 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 401 skb_reset_network_header(skb); 402 iph = ip_hdr(skb); 403 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 404 if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) 405 iph->frag_off = htons(IP_DF); 406 else 407 iph->frag_off = 0; 408 iph->ttl = ip_select_ttl(inet, &rt->dst); 409 iph->protocol = sk->sk_protocol; 410 iph->saddr = rt->rt_src; 411 iph->daddr = rt->rt_dst; 412 /* Transport layer set skb->h.foo itself. */ 413 414 if (opt && opt->optlen) { 415 iph->ihl += opt->optlen >> 2; 416 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 417 } 418 419 ip_select_ident_more(iph, &rt->dst, sk, 420 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 421 422 skb->priority = sk->sk_priority; 423 skb->mark = sk->sk_mark; 424 425 res = ip_local_out(skb); 426 rcu_read_unlock(); 427 return res; 428 429no_route: 430 rcu_read_unlock(); 431 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 432 kfree_skb(skb); 433 return -EHOSTUNREACH; 434} 435EXPORT_SYMBOL(ip_queue_xmit); 436 437 438static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) 439{ 440 to->pkt_type = from->pkt_type; 441 to->priority = from->priority; 442 to->protocol = from->protocol; 443 skb_dst_drop(to); 444 skb_dst_copy(to, from); 445 to->dev = from->dev; 446 to->mark = from->mark; 447 448 /* Copy the flags to each fragment. */ 449 IPCB(to)->flags = IPCB(from)->flags; 450 451#ifdef CONFIG_NET_SCHED 452 to->tc_index = from->tc_index; 453#endif 454 nf_copy(to, from); 455#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 456 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 457 to->nf_trace = from->nf_trace; 458#endif 459#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 460 to->ipvs_property = from->ipvs_property; 461#endif 462 skb_copy_secmark(to, from); 463} 464 465/* 466 * This IP datagram is too large to be sent in one piece. Break it up into 467 * smaller pieces (each of size equal to IP header plus 468 * a block of the data of the original IP data part) that will yet fit in a 469 * single device frame, and queue such a frame for sending. 470 */ 471 472int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 473{ 474 struct iphdr *iph; 475 int ptr; 476 struct net_device *dev; 477 struct sk_buff *skb2; 478 unsigned int mtu, hlen, left, len, ll_rs; 479 int offset; 480 __be16 not_last_frag; 481 struct rtable *rt = skb_rtable(skb); 482 int err = 0; 483 int first_frag = 1; // Foxconn added pling 04/29/2010 484 485 dev = rt->dst.dev; 486 487 /* 488 * Point into the IP datagram header. 489 */ 490 491 iph = ip_hdr(skb); 492 493 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { 494 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 495 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 496 htonl(ip_skb_dst_mtu(skb))); 497 kfree_skb(skb); 498 return -EMSGSIZE; 499 } 500 501 /* 502 * Setup starting values. 503 */ 504 505 hlen = iph->ihl * 4; 506 mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ 507#ifdef CONFIG_BRIDGE_NETFILTER 508 if (skb->nf_bridge) 509 mtu -= nf_bridge_mtu_reduction(skb); 510#endif 511 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; 512 513 /* When frag_list is given, use it. First, check its validity: 514 * some transformers could create wrong frag_list or break existing 515 * one, it is not prohibited. In this case fall back to copying. 516 * 517 * LATER: this step can be merged to real generation of fragments, 518 * we can switch to copy when see the first bad fragment. 519 */ 520 if (skb_has_frags(skb)) { 521 struct sk_buff *frag, *frag2; 522 int first_len = skb_pagelen(skb); 523 524 if (first_len - hlen > mtu || 525 ((first_len - hlen) & 7) || 526 (iph->frag_off & htons(IP_MF|IP_OFFSET)) || 527 skb_cloned(skb)) 528 goto slow_path; 529 530 skb_walk_frags(skb, frag) { 531 /* Correct geometry. */ 532 if (frag->len > mtu || 533 ((frag->len & 7) && frag->next) || 534 skb_headroom(frag) < hlen) 535 goto slow_path_clean; 536 537 /* Partially cloned skb? */ 538 if (skb_shared(frag)) 539 goto slow_path_clean; 540 541 BUG_ON(frag->sk); 542 if (skb->sk) { 543 frag->sk = skb->sk; 544 frag->destructor = sock_wfree; 545 } 546 skb->truesize -= frag->truesize; 547 } 548 549 /* Everything is OK. Generate! */ 550 551 err = 0; 552 offset = 0; 553 frag = skb_shinfo(skb)->frag_list; 554 skb_frag_list_init(skb); 555 skb->data_len = first_len - skb_headlen(skb); 556 skb->len = first_len; 557 iph->tot_len = htons(first_len); 558 iph->frag_off = htons(IP_MF); 559 ip_send_check(iph); 560 561 for (;;) { 562 /* Prepare header of the next frame, 563 * before previous one went down. */ 564 if (frag) { 565 frag->ip_summed = CHECKSUM_NONE; 566 skb_reset_transport_header(frag); 567 __skb_push(frag, hlen); 568 skb_reset_network_header(frag); 569 memcpy(skb_network_header(frag), iph, hlen); 570 iph = ip_hdr(frag); 571 iph->tot_len = htons(frag->len); 572 ip_copy_metadata(frag, skb); 573 if (offset == 0) 574 ip_options_fragment(frag); 575 offset += skb->len - hlen; 576 iph->frag_off = htons(offset>>3); 577 if (frag->next != NULL) 578 iph->frag_off |= htons(IP_MF); 579 /* Ready, complete checksum */ 580 ip_send_check(iph); 581 } 582 583 err = output(skb); 584 585 if (!err) 586 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); 587 if (err || !frag) 588 break; 589 590 skb = frag; 591 frag = skb->next; 592 skb->next = NULL; 593 } 594 595 if (err == 0) { 596 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); 597 return 0; 598 } 599 600 while (frag) { 601 skb = frag->next; 602 kfree_skb(frag); 603 frag = skb; 604 } 605 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 606 return err; 607 608slow_path_clean: 609 skb_walk_frags(skb, frag2) { 610 if (frag2 == frag) 611 break; 612 frag2->sk = NULL; 613 frag2->destructor = NULL; 614 skb->truesize += frag2->truesize; 615 } 616 } 617 618slow_path: 619 left = skb->len - hlen; /* Space per frame */ 620 ptr = hlen; /* Where to start from */ 621 622 /* for bridged IP traffic encapsulated inside f.e. a vlan header, 623 * we need to make room for the encapsulating header 624 */ 625 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); 626 627 /* 628 * Fragment the datagram. 629 */ 630 631 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; 632 not_last_frag = iph->frag_off & htons(IP_MF); 633 634 /* 635 * Keep copying data until we run out. 636 */ 637 638 while (left > 0) { 639 len = left; 640 /* IF: it doesn't fit, use 'mtu' - the data space left */ 641 if (len > mtu) 642 len = mtu; 643 /* IF: we are not sending upto and including the packet end 644 then align the next start on an eight byte boundary */ 645 if (len < left) { 646 len &= ~7; 647 } 648 /* 649 * Allocate buffer. 650 */ 651 652 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { 653 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); 654 err = -ENOMEM; 655 goto fail; 656 } 657 658 /* 659 * Set up data on packet 660 */ 661 662 ip_copy_metadata(skb2, skb); 663 skb_reserve(skb2, ll_rs); 664 skb_put(skb2, len + hlen); 665 skb_reset_network_header(skb2); 666 skb2->transport_header = skb2->network_header + hlen; 667 668 /* 669 * Charge the memory for the fragment to any owner 670 * it might possess 671 */ 672 673 if (skb->sk) 674 skb_set_owner_w(skb2, skb->sk); 675 676 /* 677 * Copy the packet header into the new buffer. 678 */ 679 680 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); 681 682 /* 683 * Copy a block of the IP datagram. 684 */ 685 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) 686 BUG(); 687 left -= len; 688 689 /* 690 * Fill in the new header fields. 691 */ 692 iph = ip_hdr(skb2); 693 iph->frag_off = htons((offset >> 3)); 694 695 /* Foxconn added start pling 04/29/2010 */ 696 /* If the packet is not from IP stack, i.e. from other i/f, 697 * then copy the Ethernet header and cb to the 698 * first fragment, for later use by NAT/QoS. 699 */ 700 if (!skb->sk && first_frag) 701 { 702 first_frag = 0; 703 skb2->mac_header = (unsigned char *)(skb2->data - sizeof(struct ethhdr)); 704 memcpy(skb2->mac_header, skb->mac_header, sizeof(struct ethhdr)); 705 memcpy(skb2->cb, skb->cb, sizeof(skb->cb)); 706 } 707 /* Foxconn added end pling 04/29/2010 */ 708 709 /* ANK: dirty, but effective trick. Upgrade options only if 710 * the segment to be fragmented was THE FIRST (otherwise, 711 * options are already fixed) and make it ONCE 712 * on the initial skb, so that all the following fragments 713 * will inherit fixed options. 714 */ 715 if (offset == 0) 716 ip_options_fragment(skb); 717 718 /* 719 * Added AC : If we are fragmenting a fragment that's not the 720 * last fragment then keep MF on each bit 721 */ 722 if (left > 0 || not_last_frag) 723 iph->frag_off |= htons(IP_MF); 724 ptr += len; 725 offset += len; 726 727 /* 728 * Put this fragment into the sending queue. 729 */ 730 iph->tot_len = htons(len + hlen); 731 732 ip_send_check(iph); 733 734 err = output(skb2); 735 if (err) 736 goto fail; 737 738 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); 739 } 740 kfree_skb(skb); 741 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); 742 return err; 743 744fail: 745 kfree_skb(skb); 746 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 747 return err; 748} 749EXPORT_SYMBOL(ip_fragment); 750 751int 752ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 753{ 754 struct iovec *iov = from; 755 756 if (skb->ip_summed == CHECKSUM_PARTIAL) { 757 if (memcpy_fromiovecend(to, iov, offset, len) < 0) 758 return -EFAULT; 759 } else { 760 __wsum csum = 0; 761 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0) 762 return -EFAULT; 763 skb->csum = csum_block_add(skb->csum, csum, odd); 764 } 765 return 0; 766} 767EXPORT_SYMBOL(ip_generic_getfrag); 768 769static inline __wsum 770csum_page(struct page *page, int offset, int copy) 771{ 772 char *kaddr; 773 __wsum csum; 774 kaddr = kmap(page); 775 csum = csum_partial(kaddr + offset, copy, 0); 776 kunmap(page); 777 return csum; 778} 779 780static inline int ip_ufo_append_data(struct sock *sk, 781 int getfrag(void *from, char *to, int offset, int len, 782 int odd, struct sk_buff *skb), 783 void *from, int length, int hh_len, int fragheaderlen, 784 int transhdrlen, int mtu, unsigned int flags) 785{ 786 struct sk_buff *skb; 787 int err; 788 789 /* There is support for UDP fragmentation offload by network 790 * device, so create one single skb packet containing complete 791 * udp datagram 792 */ 793 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 794 skb = sock_alloc_send_skb(sk, 795 hh_len + fragheaderlen + transhdrlen + 20, 796 (flags & MSG_DONTWAIT), &err); 797 798 if (skb == NULL) 799 return err; 800 801 /* reserve space for Hardware header */ 802 skb_reserve(skb, hh_len); 803 804 /* create space for UDP/IP header */ 805 skb_put(skb, fragheaderlen + transhdrlen); 806 807 /* initialize network header pointer */ 808 skb_reset_network_header(skb); 809 810 /* initialize protocol header pointer */ 811 skb->transport_header = skb->network_header + fragheaderlen; 812 813 skb->ip_summed = CHECKSUM_PARTIAL; 814 skb->csum = 0; 815 sk->sk_sndmsg_off = 0; 816 817 /* specify the length of each IP datagram fragment */ 818 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 819 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 820 __skb_queue_tail(&sk->sk_write_queue, skb); 821 } 822 823 return skb_append_datato_frags(sk, skb, getfrag, from, 824 (length - transhdrlen)); 825} 826 827/* 828 * ip_append_data() and ip_append_page() can make one large IP datagram 829 * from many pieces of data. Each pieces will be holded on the socket 830 * until ip_push_pending_frames() is called. Each piece can be a page 831 * or non-page data. 832 * 833 * Not only UDP, other transport protocols - e.g. raw sockets - can use 834 * this interface potentially. 835 * 836 * LATER: length must be adjusted by pad at tail, when it is required. 837 */ 838int ip_append_data(struct sock *sk, 839 int getfrag(void *from, char *to, int offset, int len, 840 int odd, struct sk_buff *skb), 841 void *from, int length, int transhdrlen, 842 struct ipcm_cookie *ipc, struct rtable **rtp, 843 unsigned int flags) 844{ 845 struct inet_sock *inet = inet_sk(sk); 846 struct sk_buff *skb; 847 848 struct ip_options *opt = NULL; 849 int hh_len; 850 int exthdrlen; 851 int mtu; 852 int copy; 853 int err; 854 int offset = 0; 855 unsigned int maxfraglen, fragheaderlen; 856 int csummode = CHECKSUM_NONE; 857 struct rtable *rt; 858 859 if (flags&MSG_PROBE) 860 return 0; 861 862 if (skb_queue_empty(&sk->sk_write_queue)) { 863 /* 864 * setup for corking. 865 */ 866 opt = ipc->opt; 867 if (opt) { 868 if (inet->cork.opt == NULL) { 869 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); 870 if (unlikely(inet->cork.opt == NULL)) 871 return -ENOBUFS; 872 } 873 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); 874 inet->cork.flags |= IPCORK_OPT; 875 inet->cork.addr = ipc->addr; 876 } 877 rt = *rtp; 878 if (unlikely(!rt)) 879 return -EFAULT; 880 /* 881 * We steal reference to this route, caller should not release it 882 */ 883 *rtp = NULL; 884 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 885 rt->dst.dev->mtu : 886 dst_mtu(rt->dst.path); 887 inet->cork.dst = &rt->dst; 888 inet->cork.length = 0; 889 sk->sk_sndmsg_page = NULL; 890 sk->sk_sndmsg_off = 0; 891 if ((exthdrlen = rt->dst.header_len) != 0) { 892 length += exthdrlen; 893 transhdrlen += exthdrlen; 894 } 895 } else { 896 rt = (struct rtable *)inet->cork.dst; 897 if (inet->cork.flags & IPCORK_OPT) 898 opt = inet->cork.opt; 899 900 transhdrlen = 0; 901 exthdrlen = 0; 902 mtu = inet->cork.fragsize; 903 } 904 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 905 906 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 907 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 908 909 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { 910 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 911 mtu-exthdrlen); 912 return -EMSGSIZE; 913 } 914 915 /* 916 * transhdrlen > 0 means that this is the first fragment and we wish 917 * it won't be fragmented in the future. 918 */ 919 if (transhdrlen && 920 length + fragheaderlen <= mtu && 921 rt->dst.dev->features & NETIF_F_V4_CSUM && 922 !exthdrlen) 923 csummode = CHECKSUM_PARTIAL; 924 925 skb = skb_peek_tail(&sk->sk_write_queue); 926 927 inet->cork.length += length; 928 if (((length > mtu) || (skb && skb_is_gso(skb))) && 929 (sk->sk_protocol == IPPROTO_UDP) && 930 (rt->dst.dev->features & NETIF_F_UFO)) { 931 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, 932 fragheaderlen, transhdrlen, mtu, 933 flags); 934 if (err) 935 goto error; 936 return 0; 937 } 938 939 /* So, what's going on in the loop below? 940 * 941 * We use calculated fragment length to generate chained skb, 942 * each of segments is IP fragment ready for sending to network after 943 * adding appropriate IP header. 944 */ 945 946 if (!skb) 947 goto alloc_new_skb; 948 949 while (length > 0) { 950 /* Check if the remaining data fits into current packet. */ 951 copy = mtu - skb->len; 952 if (copy < length) 953 copy = maxfraglen - skb->len; 954 if (copy <= 0) { 955 char *data; 956 unsigned int datalen; 957 unsigned int fraglen; 958 unsigned int fraggap; 959 unsigned int alloclen; 960 struct sk_buff *skb_prev; 961alloc_new_skb: 962 skb_prev = skb; 963 if (skb_prev) 964 fraggap = skb_prev->len - maxfraglen; 965 else 966 fraggap = 0; 967 968 /* 969 * If remaining data exceeds the mtu, 970 * we know we need more fragment(s). 971 */ 972 datalen = length + fraggap; 973 if (datalen > mtu - fragheaderlen) 974 datalen = maxfraglen - fragheaderlen; 975 fraglen = datalen + fragheaderlen; 976 977 if ((flags & MSG_MORE) && 978 !(rt->dst.dev->features&NETIF_F_SG)) 979 alloclen = mtu; 980 else 981 alloclen = datalen + fragheaderlen; 982 983 /* The last fragment gets additional space at tail. 984 * Note, with MSG_MORE we overallocate on fragments, 985 * because we have no idea what fragment will be 986 * the last. 987 */ 988 if (datalen == length + fraggap) 989 alloclen += rt->dst.trailer_len; 990 991 if (transhdrlen) { 992 skb = sock_alloc_send_skb(sk, 993 alloclen + hh_len + 15, 994 (flags & MSG_DONTWAIT), &err); 995 } else { 996 skb = NULL; 997 if (atomic_read(&sk->sk_wmem_alloc) <= 998 2 * sk->sk_sndbuf) 999 skb = sock_wmalloc(sk, 1000 alloclen + hh_len + 15, 1, 1001 sk->sk_allocation); 1002 if (unlikely(skb == NULL)) 1003 err = -ENOBUFS; 1004 else 1005 /* only the initial fragment is 1006 time stamped */ 1007 ipc->shtx.flags = 0; 1008 } 1009 if (skb == NULL) 1010 goto error; 1011 1012 /* 1013 * Fill in the control structures 1014 */ 1015 skb->ip_summed = csummode; 1016 skb->csum = 0; 1017 skb_reserve(skb, hh_len); 1018 *skb_tx(skb) = ipc->shtx; 1019 1020 /* 1021 * Find where to start putting bytes. 1022 */ 1023 data = skb_put(skb, fraglen); 1024 skb_set_network_header(skb, exthdrlen); 1025 skb->transport_header = (skb->network_header + 1026 fragheaderlen); 1027 data += fragheaderlen; 1028 1029 if (fraggap) { 1030 skb->csum = skb_copy_and_csum_bits( 1031 skb_prev, maxfraglen, 1032 data + transhdrlen, fraggap, 0); 1033 skb_prev->csum = csum_sub(skb_prev->csum, 1034 skb->csum); 1035 data += fraggap; 1036 pskb_trim_unique(skb_prev, maxfraglen); 1037 } 1038 1039 copy = datalen - transhdrlen - fraggap; 1040 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1041 err = -EFAULT; 1042 kfree_skb(skb); 1043 goto error; 1044 } 1045 1046 offset += copy; 1047 length -= datalen - fraggap; 1048 transhdrlen = 0; 1049 exthdrlen = 0; 1050 csummode = CHECKSUM_NONE; 1051 1052 /* 1053 * Put the packet on the pending queue. 1054 */ 1055 __skb_queue_tail(&sk->sk_write_queue, skb); 1056 continue; 1057 } 1058 1059 if (copy > length) 1060 copy = length; 1061 1062 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1063 unsigned int off; 1064 1065 off = skb->len; 1066 if (getfrag(from, skb_put(skb, copy), 1067 offset, copy, off, skb) < 0) { 1068 __skb_trim(skb, off); 1069 err = -EFAULT; 1070 goto error; 1071 } 1072 } else { 1073 int i = skb_shinfo(skb)->nr_frags; 1074 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1075 struct page *page = sk->sk_sndmsg_page; 1076 int off = sk->sk_sndmsg_off; 1077 unsigned int left; 1078 1079 if (page && (left = PAGE_SIZE - off) > 0) { 1080 if (copy >= left) 1081 copy = left; 1082 if (page != frag->page) { 1083 if (i == MAX_SKB_FRAGS) { 1084 err = -EMSGSIZE; 1085 goto error; 1086 } 1087 get_page(page); 1088 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1089 frag = &skb_shinfo(skb)->frags[i]; 1090 } 1091 } else if (i < MAX_SKB_FRAGS) { 1092 if (copy > PAGE_SIZE) 1093 copy = PAGE_SIZE; 1094 page = alloc_pages(sk->sk_allocation, 0); 1095 if (page == NULL) { 1096 err = -ENOMEM; 1097 goto error; 1098 } 1099 sk->sk_sndmsg_page = page; 1100 sk->sk_sndmsg_off = 0; 1101 1102 skb_fill_page_desc(skb, i, page, 0, 0); 1103 frag = &skb_shinfo(skb)->frags[i]; 1104 } else { 1105 err = -EMSGSIZE; 1106 goto error; 1107 } 1108 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1109 err = -EFAULT; 1110 goto error; 1111 } 1112 sk->sk_sndmsg_off += copy; 1113 frag->size += copy; 1114 skb->len += copy; 1115 skb->data_len += copy; 1116 skb->truesize += copy; 1117 atomic_add(copy, &sk->sk_wmem_alloc); 1118 } 1119 offset += copy; 1120 length -= copy; 1121 } 1122 1123 return 0; 1124 1125error: 1126 inet->cork.length -= length; 1127 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1128 return err; 1129} 1130 1131ssize_t ip_append_page(struct sock *sk, struct page *page, 1132 int offset, size_t size, int flags) 1133{ 1134 struct inet_sock *inet = inet_sk(sk); 1135 struct sk_buff *skb; 1136 struct rtable *rt; 1137 struct ip_options *opt = NULL; 1138 int hh_len; 1139 int mtu; 1140 int len; 1141 int err; 1142 unsigned int maxfraglen, fragheaderlen, fraggap; 1143 1144 if (inet->hdrincl) 1145 return -EPERM; 1146 1147 if (flags&MSG_PROBE) 1148 return 0; 1149 1150 if (skb_queue_empty(&sk->sk_write_queue)) 1151 return -EINVAL; 1152 1153 rt = (struct rtable *)inet->cork.dst; 1154 if (inet->cork.flags & IPCORK_OPT) 1155 opt = inet->cork.opt; 1156 1157 if (!(rt->dst.dev->features&NETIF_F_SG)) 1158 return -EOPNOTSUPP; 1159 1160 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1161 mtu = inet->cork.fragsize; 1162 1163 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1164 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1165 1166 if (inet->cork.length + size > 0xFFFF - fragheaderlen) { 1167 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); 1168 return -EMSGSIZE; 1169 } 1170 1171 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1172 return -EINVAL; 1173 1174 inet->cork.length += size; 1175 if ((size + skb->len > mtu) && 1176 (sk->sk_protocol == IPPROTO_UDP) && 1177 (rt->dst.dev->features & NETIF_F_UFO)) { 1178 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 1179 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1180 } 1181 1182 1183 while (size > 0) { 1184 int i; 1185 1186 if (skb_is_gso(skb)) 1187 len = size; 1188 else { 1189 1190 /* Check if the remaining data fits into current packet. */ 1191 len = mtu - skb->len; 1192 if (len < size) 1193 len = maxfraglen - skb->len; 1194 } 1195 if (len <= 0) { 1196 struct sk_buff *skb_prev; 1197 int alloclen; 1198 1199 skb_prev = skb; 1200 fraggap = skb_prev->len - maxfraglen; 1201 1202 alloclen = fragheaderlen + hh_len + fraggap + 15; 1203 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); 1204 if (unlikely(!skb)) { 1205 err = -ENOBUFS; 1206 goto error; 1207 } 1208 1209 /* 1210 * Fill in the control structures 1211 */ 1212 skb->ip_summed = CHECKSUM_NONE; 1213 skb->csum = 0; 1214 skb_reserve(skb, hh_len); 1215 1216 /* 1217 * Find where to start putting bytes. 1218 */ 1219 skb_put(skb, fragheaderlen + fraggap); 1220 skb_reset_network_header(skb); 1221 skb->transport_header = (skb->network_header + 1222 fragheaderlen); 1223 if (fraggap) { 1224 skb->csum = skb_copy_and_csum_bits(skb_prev, 1225 maxfraglen, 1226 skb_transport_header(skb), 1227 fraggap, 0); 1228 skb_prev->csum = csum_sub(skb_prev->csum, 1229 skb->csum); 1230 pskb_trim_unique(skb_prev, maxfraglen); 1231 } 1232 1233 /* 1234 * Put the packet on the pending queue. 1235 */ 1236 __skb_queue_tail(&sk->sk_write_queue, skb); 1237 continue; 1238 } 1239 1240 i = skb_shinfo(skb)->nr_frags; 1241 if (len > size) 1242 len = size; 1243 if (skb_can_coalesce(skb, i, page, offset)) { 1244 skb_shinfo(skb)->frags[i-1].size += len; 1245 } else if (i < MAX_SKB_FRAGS) { 1246 get_page(page); 1247 skb_fill_page_desc(skb, i, page, offset, len); 1248 } else { 1249 err = -EMSGSIZE; 1250 goto error; 1251 } 1252 1253 if (skb->ip_summed == CHECKSUM_NONE) { 1254 __wsum csum; 1255 csum = csum_page(page, offset, len); 1256 skb->csum = csum_block_add(skb->csum, csum, skb->len); 1257 } 1258 1259 skb->len += len; 1260 skb->data_len += len; 1261 skb->truesize += len; 1262 atomic_add(len, &sk->sk_wmem_alloc); 1263 offset += len; 1264 size -= len; 1265 } 1266 return 0; 1267 1268error: 1269 inet->cork.length -= size; 1270 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1271 return err; 1272} 1273 1274static void ip_cork_release(struct inet_sock *inet) 1275{ 1276 inet->cork.flags &= ~IPCORK_OPT; 1277 kfree(inet->cork.opt); 1278 inet->cork.opt = NULL; 1279 dst_release(inet->cork.dst); 1280 inet->cork.dst = NULL; 1281} 1282 1283/* 1284 * Combined all pending IP fragments on the socket as one IP datagram 1285 * and push them out. 1286 */ 1287int ip_push_pending_frames(struct sock *sk) 1288{ 1289 struct sk_buff *skb, *tmp_skb; 1290 struct sk_buff **tail_skb; 1291 struct inet_sock *inet = inet_sk(sk); 1292 struct net *net = sock_net(sk); 1293 struct ip_options *opt = NULL; 1294 struct rtable *rt = (struct rtable *)inet->cork.dst; 1295 struct iphdr *iph; 1296 __be16 df = 0; 1297 __u8 ttl; 1298 int err = 0; 1299 1300 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1301 goto out; 1302 tail_skb = &(skb_shinfo(skb)->frag_list); 1303 1304 /* move skb->data to ip header from ext header */ 1305 if (skb->data < skb_network_header(skb)) 1306 __skb_pull(skb, skb_network_offset(skb)); 1307 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1308 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1309 *tail_skb = tmp_skb; 1310 tail_skb = &(tmp_skb->next); 1311 skb->len += tmp_skb->len; 1312 skb->data_len += tmp_skb->len; 1313 skb->truesize += tmp_skb->truesize; 1314 tmp_skb->destructor = NULL; 1315 tmp_skb->sk = NULL; 1316 } 1317 1318 /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow 1319 * to fragment the frame generated here. No matter, what transforms 1320 * how transforms change size of the packet, it will come out. 1321 */ 1322 if (inet->pmtudisc < IP_PMTUDISC_DO) 1323 skb->local_df = 1; 1324 1325 /* DF bit is set when we want to see DF on outgoing frames. 1326 * If local_df is set too, we still allow to fragment this frame 1327 * locally. */ 1328 if (inet->pmtudisc >= IP_PMTUDISC_DO || 1329 (skb->len <= dst_mtu(&rt->dst) && 1330 ip_dont_fragment(sk, &rt->dst))) 1331 df = htons(IP_DF); 1332 1333 if (inet->cork.flags & IPCORK_OPT) 1334 opt = inet->cork.opt; 1335 1336 if (rt->rt_type == RTN_MULTICAST) 1337 ttl = inet->mc_ttl; 1338 else 1339 ttl = ip_select_ttl(inet, &rt->dst); 1340 1341 iph = (struct iphdr *)skb->data; 1342 iph->version = 4; 1343 iph->ihl = 5; 1344 if (opt) { 1345 iph->ihl += opt->optlen>>2; 1346 ip_options_build(skb, opt, inet->cork.addr, rt, 0); 1347 } 1348 iph->tos = inet->tos; 1349 iph->frag_off = df; 1350 ip_select_ident(iph, &rt->dst, sk); 1351 iph->ttl = ttl; 1352 iph->protocol = sk->sk_protocol; 1353 iph->saddr = rt->rt_src; 1354 iph->daddr = rt->rt_dst; 1355 1356 skb->priority = sk->sk_priority; 1357 skb->mark = sk->sk_mark; 1358 /* 1359 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec 1360 * on dst refcount 1361 */ 1362 inet->cork.dst = NULL; 1363 skb_dst_set(skb, &rt->dst); 1364 1365 if (iph->protocol == IPPROTO_ICMP) 1366 icmp_out_count(net, ((struct icmphdr *) 1367 skb_transport_header(skb))->type); 1368 1369 /* Netfilter gets whole the not fragmented skb. */ 1370 err = ip_local_out(skb); 1371 if (err) { 1372 if (err > 0) 1373 err = net_xmit_errno(err); 1374 if (err) 1375 goto error; 1376 } 1377 1378out: 1379 ip_cork_release(inet); 1380 return err; 1381 1382error: 1383 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); 1384 goto out; 1385} 1386 1387/* 1388 * Throw away all pending data on the socket. 1389 */ 1390void ip_flush_pending_frames(struct sock *sk) 1391{ 1392 struct sk_buff *skb; 1393 1394 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) 1395 kfree_skb(skb); 1396 1397 ip_cork_release(inet_sk(sk)); 1398} 1399 1400 1401/* 1402 * Fetch data from kernel space and fill in checksum if needed. 1403 */ 1404static int ip_reply_glue_bits(void *dptr, char *to, int offset, 1405 int len, int odd, struct sk_buff *skb) 1406{ 1407 __wsum csum; 1408 1409 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0); 1410 skb->csum = csum_block_add(skb->csum, csum, odd); 1411 return 0; 1412} 1413 1414/* 1415 * Generic function to send a packet as reply to another packet. 1416 * Used to send TCP resets so far. ICMP should use this function too. 1417 * 1418 * Should run single threaded per socket because it uses the sock 1419 * structure to pass arguments. 1420 */ 1421void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 1422 unsigned int len) 1423{ 1424 struct inet_sock *inet = inet_sk(sk); 1425 struct { 1426 struct ip_options opt; 1427 char data[40]; 1428 } replyopts; 1429 struct ipcm_cookie ipc; 1430 __be32 daddr; 1431 struct rtable *rt = skb_rtable(skb); 1432 1433 if (ip_options_echo(&replyopts.opt, skb)) 1434 return; 1435 1436 daddr = ipc.addr = rt->rt_src; 1437 ipc.opt = NULL; 1438 ipc.shtx.flags = 0; 1439 1440 if (replyopts.opt.optlen) { 1441 ipc.opt = &replyopts.opt; 1442 1443 if (ipc.opt->srr) 1444 daddr = replyopts.opt.faddr; 1445 } 1446 1447 { 1448 struct flowi fl = { .oif = arg->bound_dev_if, 1449 .nl_u = { .ip4_u = 1450 { .daddr = daddr, 1451 .saddr = rt->rt_spec_dst, 1452 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 1453 /* Not quite clean, but right. */ 1454 .uli_u = { .ports = 1455 { .sport = tcp_hdr(skb)->dest, 1456 .dport = tcp_hdr(skb)->source } }, 1457 .proto = sk->sk_protocol, 1458 .flags = ip_reply_arg_flowi_flags(arg) }; 1459 security_skb_classify_flow(skb, &fl); 1460 if (ip_route_output_key(sock_net(sk), &rt, &fl)) 1461 return; 1462 } 1463 1464 /* And let IP do all the hard work. 1465 1466 This chunk is not reenterable, hence spinlock. 1467 Note that it uses the fact, that this function is called 1468 with locally disabled BH and that sk cannot be already spinlocked. 1469 */ 1470 bh_lock_sock(sk); 1471 inet->tos = ip_hdr(skb)->tos; 1472 sk->sk_priority = skb->priority; 1473 sk->sk_protocol = ip_hdr(skb)->protocol; 1474 sk->sk_bound_dev_if = arg->bound_dev_if; 1475 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1476 &ipc, &rt, MSG_DONTWAIT); 1477 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1478 if (arg->csumoffset >= 0) 1479 *((__sum16 *)skb_transport_header(skb) + 1480 arg->csumoffset) = csum_fold(csum_add(skb->csum, 1481 arg->csum)); 1482 skb->ip_summed = CHECKSUM_NONE; 1483 ip_push_pending_frames(sk); 1484 } 1485 1486 bh_unlock_sock(sk); 1487 1488 ip_rt_put(rt); 1489} 1490 1491void __init ip_init(void) 1492{ 1493 ip_rt_init(); 1494 inet_initpeers(); 1495 1496#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS) 1497 igmp_mc_proc_init(); 1498#endif 1499} 1500/* Fxcn port-S Wins, 0714-09 */ 1501//Foxconn add start, Lewis Min, for OpenDNS, 12/12/2008 1502void insert_func_to_BR_POST_ROUTE(void *FUNC) 1503{ 1504 br_post_insert_hook= FUNC; 1505} 1506 1507 1508void remove_func_from_BR_POST_ROUTE(void) 1509{ 1510 br_post_insert_hook= NULL; 1511} 1512//Foxconn add end, Lewis Min, for OpenDNS, 12/12/2008 1513/* Fxcn port-E Wins, 0714-09 */ 1514