1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) output module. 7 * 8 * Version: $Id: ip_output.c,v 1.1.1.1 2007/08/03 18:53:51 Exp $ 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <Alan.Cox@linux.org> 14 * Richard Underwood 15 * Stefan Becker, <stefanb@yello.ping.de> 16 * Jorge Cwik, <jorge@laser.satlink.net> 17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 18 * Hirokazu Takahashi, <taka@valinux.co.jp> 19 * 20 * See ip_input.c for original log 21 * 22 * Fixes: 23 * Alan Cox : Missing nonblock feature in ip_build_xmit. 24 * Mike Kilburn : htons() missing in ip_build_xmit. 25 * Bradford Johnson: Fix faulty handling of some frames when 26 * no route is found. 27 * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit 28 * (in case if packet not accepted by 29 * output firewall rules) 30 * Mike McLagan : Routing by source 31 * Alexey Kuznetsov: use new route cache 32 * Andi Kleen: Fix broken PMTU recovery and remove 33 * some redundant tests. 34 * Vitaly E. Lavrov : Transparent proxy revived after year coma. 35 * Andi Kleen : Replace ip_reply with ip_send_reply. 36 * Andi Kleen : Split fast and slow ip_build_xmit path 37 * for decreased register pressure on x86 38 * and more readibility. 39 * Marc Boucher : When call_out_firewall returns FW_QUEUE, 40 * silently drop skb instead of failing with -EPERM. 41 * Detlev Wengorz : Copy protocol for fragments. 42 * Hirokazu Takahashi: HW checksumming for outgoing UDP 43 * datagrams. 44 * Hirokazu Takahashi: sendfile() on UDP works now. 45 */ 46 47#include <asm/uaccess.h> 48#include <asm/system.h> 49#include <linux/module.h> 50#include <linux/types.h> 51#include <linux/kernel.h> 52#include <linux/mm.h> 53#include <linux/string.h> 54#include <linux/errno.h> 55#include <linux/highmem.h> 56 57#include <linux/socket.h> 58#include <linux/sockios.h> 59#include <linux/in.h> 60#include <linux/inet.h> 61#include <linux/netdevice.h> 62#include <linux/etherdevice.h> 63#include <linux/proc_fs.h> 64#include <linux/stat.h> 65#include <linux/init.h> 66 67#include <net/snmp.h> 68#include <net/ip.h> 69#include <net/protocol.h> 70#include <net/route.h> 71#include <net/xfrm.h> 72#include <linux/skbuff.h> 73#include <net/sock.h> 74#include <net/arp.h> 75#include <net/icmp.h> 76#include <net/checksum.h> 77#include <net/inetpeer.h> 78#include <net/checksum.h> 79#include <linux/igmp.h> 80#include <linux/netfilter_ipv4.h> 81#include <linux/netfilter_bridge.h> 82#include <linux/mroute.h> 83#include <linux/netlink.h> 84#include <linux/tcp.h> 85 86int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; 87 88/* Generate a checksum for an outgoing IP datagram. */ 89__inline__ void ip_send_check(struct iphdr *iph) 90{ 91 iph->check = 0; 92 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 93} 94 95/* dev_loopback_xmit for use with netfilter. */ 96static int ip_dev_loopback_xmit(struct sk_buff *newskb) 97{ 98 skb_reset_mac_header(newskb); 99 __skb_pull(newskb, skb_network_offset(newskb)); 100 newskb->pkt_type = PACKET_LOOPBACK; 101 newskb->ip_summed = CHECKSUM_UNNECESSARY; 102 BUG_TRAP(newskb->dst); 103 netif_rx(newskb); 104 return 0; 105} 106 107static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) 108{ 109 int ttl = inet->uc_ttl; 110 111 if (ttl < 0) 112 ttl = dst_metric(dst, RTAX_HOPLIMIT); 113 return ttl; 114} 115 116/* 117 * Add an ip header to a skbuff and send it out. 118 * 119 */ 120int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 121 __be32 saddr, __be32 daddr, struct ip_options *opt) 122{ 123 struct inet_sock *inet = inet_sk(sk); 124 struct rtable *rt = (struct rtable *)skb->dst; 125 struct iphdr *iph; 126 127 /* Build the IP header. */ 128 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 129 skb_reset_network_header(skb); 130 iph = ip_hdr(skb); 131 iph->version = 4; 132 iph->ihl = 5; 133 iph->tos = inet->tos; 134 if (ip_dont_fragment(sk, &rt->u.dst)) 135 iph->frag_off = htons(IP_DF); 136 else 137 iph->frag_off = 0; 138 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 139 iph->daddr = rt->rt_dst; 140 iph->saddr = rt->rt_src; 141 iph->protocol = sk->sk_protocol; 142 iph->tot_len = htons(skb->len); 143 ip_select_ident(iph, &rt->u.dst, sk); 144 145 if (opt && opt->optlen) { 146 iph->ihl += opt->optlen>>2; 147 ip_options_build(skb, opt, daddr, rt, 0); 148 } 149 ip_send_check(iph); 150 151 skb->priority = sk->sk_priority; 152 153 /* Send it out. */ 154 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 155 dst_output); 156} 157 158EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 159 160static inline int ip_finish_output2(struct sk_buff *skb) 161{ 162 struct dst_entry *dst = skb->dst; 163 struct rtable *rt = (struct rtable *)dst; 164 struct net_device *dev = dst->dev; 165 int hh_len = LL_RESERVED_SPACE(dev); 166 167 if (rt->rt_type == RTN_MULTICAST) 168 IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); 169 else if (rt->rt_type == RTN_BROADCAST) 170 IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); 171 172 /* Be paranoid, rather than too clever. */ 173 if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { 174 struct sk_buff *skb2; 175 176 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); 177 if (skb2 == NULL) { 178 kfree_skb(skb); 179 return -ENOMEM; 180 } 181 if (skb->sk) 182 skb_set_owner_w(skb2, skb->sk); 183 kfree_skb(skb); 184 skb = skb2; 185 } 186 187 if (dst->hh) 188 return neigh_hh_output(dst->hh, skb); 189 else if (dst->neighbour) 190 return dst->neighbour->output(skb); 191 192 if (net_ratelimit()) 193 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); 194 kfree_skb(skb); 195 return -EINVAL; 196} 197 198static inline int ip_skb_dst_mtu(struct sk_buff *skb) 199{ 200 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; 201 202 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? 203 skb->dst->dev->mtu : dst_mtu(skb->dst); 204} 205 206/* Fxcn port-S Wins, 0714-09 */ 207int (*br_post_insert_hook)(struct sk_buff *skb);//Foxconn add , Lewis Min, for OpenDNS, 03/12/2009 208/* Fxcn port-E Wins, 0714-09 */ 209static inline int ip_finish_output(struct sk_buff *skb) 210{ 211#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 212 /* Policy lookup after SNAT yielded a new policy */ 213 if (skb->dst->xfrm != NULL) { 214 IPCB(skb)->flags |= IPSKB_REROUTED; 215 return dst_output(skb); 216 } 217#endif 218 219/* Fxcn port-S Wins, 0714-09 */ 220 //Foxconn add start, Lewis Min, for OpenDNS, 03/12/2009 221 if(NULL!=br_post_insert_hook) 222 { 223 int ret; 224 225 ret = br_post_insert_hook(skb); 226 if((ret==NF_DROP)||(ret==NF_STOLEN)) 227 { 228// read_unlock(&br->lock); 229// spin_unlock_bh(&br->lock); 230 return; 231 } 232 } 233 //Foxconn add end, Lewis Min, for OpenDNS, 03/12/2009 234/* Fxcn port-E Wins, 0714-09 */ 235 236 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) 237 return ip_fragment(skb, ip_finish_output2); 238 else 239 return ip_finish_output2(skb); 240} 241 242int ip_mc_output(struct sk_buff *skb) 243{ 244 struct sock *sk = skb->sk; 245 struct rtable *rt = (struct rtable*)skb->dst; 246 struct net_device *dev = rt->u.dst.dev; 247 248 /* 249 * If the indicated interface is up and running, send the packet. 250 */ 251 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 252 253 skb->dev = dev; 254 skb->protocol = htons(ETH_P_IP); 255 256 /* 257 * Multicasts are looped back for other local users 258 */ 259 260 if (rt->rt_flags&RTCF_MULTICAST) { 261 if ((!sk || inet_sk(sk)->mc_loop) 262#ifdef CONFIG_IP_MROUTE 263 /* Small optimization: do not loopback not local frames, 264 which returned after forwarding; they will be dropped 265 by ip_mr_input in any case. 266 Note, that local frames are looped back to be delivered 267 to local recipients. 268 269 This check is duplicated in ip_mr_input at the moment. 270 */ 271 && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED)) 272#endif 273 ) { 274 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 275 if (newskb) 276 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL, 277 newskb->dev, 278 ip_dev_loopback_xmit); 279 } 280 281 /* Multicasts with ttl 0 must not go beyond the host */ 282 283 if (ip_hdr(skb)->ttl == 0) { 284 kfree_skb(skb); 285 return 0; 286 } 287 } 288 289 if (rt->rt_flags&RTCF_BROADCAST) { 290 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 291 if (newskb) 292 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL, 293 newskb->dev, ip_dev_loopback_xmit); 294 } 295 296 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, 297 ip_finish_output, 298 !(IPCB(skb)->flags & IPSKB_REROUTED)); 299} 300 301int ip_output(struct sk_buff *skb) 302{ 303 struct net_device *dev = skb->dst->dev; 304 305 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 306 307 skb->dev = dev; 308 skb->protocol = htons(ETH_P_IP); 309 310 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, 311 ip_finish_output, 312 !(IPCB(skb)->flags & IPSKB_REROUTED)); 313} 314 315int ip_queue_xmit(struct sk_buff *skb, int ipfragok) 316{ 317 struct sock *sk = skb->sk; 318 struct inet_sock *inet = inet_sk(sk); 319 struct ip_options *opt = inet->opt; 320 struct rtable *rt; 321 struct iphdr *iph; 322 323 /* Skip all of this if the packet is already routed, 324 * f.e. by something like SCTP. 325 */ 326 rt = (struct rtable *) skb->dst; 327 if (rt != NULL) 328 goto packet_routed; 329 330 /* Make sure we can route this packet. */ 331 rt = (struct rtable *)__sk_dst_check(sk, 0); 332 if (rt == NULL) { 333 __be32 daddr; 334 335 /* Use correct destination address if we have options. */ 336 daddr = inet->daddr; 337 if(opt && opt->srr) 338 daddr = opt->faddr; 339 340 { 341 struct flowi fl = { .oif = sk->sk_bound_dev_if, 342 .nl_u = { .ip4_u = 343 { .daddr = daddr, 344 .saddr = inet->saddr, 345 .tos = RT_CONN_FLAGS(sk) } }, 346 .proto = sk->sk_protocol, 347 .uli_u = { .ports = 348 { .sport = inet->sport, 349 .dport = inet->dport } } }; 350 351 /* If this fails, retransmit mechanism of transport layer will 352 * keep trying until route appears or the connection times 353 * itself out. 354 */ 355 security_sk_classify_flow(sk, &fl); 356 if (ip_route_output_flow(&rt, &fl, sk, 0)) 357 goto no_route; 358 } 359 sk_setup_caps(sk, &rt->u.dst); 360 } 361 skb->dst = dst_clone(&rt->u.dst); 362 363packet_routed: 364 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 365 goto no_route; 366 367 /* OK, we know where to send it, allocate and build IP header. */ 368 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 369 skb_reset_network_header(skb); 370 iph = ip_hdr(skb); 371 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 372 iph->tot_len = htons(skb->len); 373 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 374 iph->frag_off = htons(IP_DF); 375 else 376 iph->frag_off = 0; 377 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 378 iph->protocol = sk->sk_protocol; 379 iph->saddr = rt->rt_src; 380 iph->daddr = rt->rt_dst; 381 /* Transport layer set skb->h.foo itself. */ 382 383 if (opt && opt->optlen) { 384 iph->ihl += opt->optlen >> 2; 385 ip_options_build(skb, opt, inet->daddr, rt, 0); 386 } 387 388 ip_select_ident_more(iph, &rt->u.dst, sk, 389 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 390 391 /* Add an IP checksum. */ 392 ip_send_check(iph); 393 394 skb->priority = sk->sk_priority; 395 396 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 397 dst_output); 398 399no_route: 400 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 401 kfree_skb(skb); 402 return -EHOSTUNREACH; 403} 404 405 406static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) 407{ 408 to->pkt_type = from->pkt_type; 409 to->priority = from->priority; 410 to->protocol = from->protocol; 411 dst_release(to->dst); 412 to->dst = dst_clone(from->dst); 413 to->dev = from->dev; 414 to->mark = from->mark; 415 416 /* Copy the flags to each fragment. */ 417 IPCB(to)->flags = IPCB(from)->flags; 418 419#ifdef CONFIG_NET_SCHED 420 to->tc_index = from->tc_index; 421#endif 422 nf_copy(to, from); 423#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 424 to->ipvs_property = from->ipvs_property; 425#endif 426 skb_copy_secmark(to, from); 427} 428 429/* 430 * This IP datagram is too large to be sent in one piece. Break it up into 431 * smaller pieces (each of size equal to IP header plus 432 * a block of the data of the original IP data part) that will yet fit in a 433 * single device frame, and queue such a frame for sending. 434 */ 435 436int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) 437{ 438 struct iphdr *iph; 439 int raw = 0; 440 int ptr; 441 struct net_device *dev; 442 struct sk_buff *skb2; 443 unsigned int mtu, hlen, left, len, ll_rs, pad; 444 int offset; 445 __be16 not_last_frag; 446 struct rtable *rt = (struct rtable*)skb->dst; 447 int err = 0; 448 int first_frag = 1; // Foxconn added pling 04/29/2010 449 450 dev = rt->u.dst.dev; 451 452 /* 453 * Point into the IP datagram header. 454 */ 455 456 iph = ip_hdr(skb); 457 458 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { 459 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 460 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 461 htonl(ip_skb_dst_mtu(skb))); 462 kfree_skb(skb); 463 return -EMSGSIZE; 464 } 465 466 /* 467 * Setup starting values. 468 */ 469 470 hlen = iph->ihl * 4; 471 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ 472 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; 473 474 /* When frag_list is given, use it. First, check its validity: 475 * some transformers could create wrong frag_list or break existing 476 * one, it is not prohibited. In this case fall back to copying. 477 * 478 * LATER: this step can be merged to real generation of fragments, 479 * we can switch to copy when see the first bad fragment. 480 */ 481 if (skb_shinfo(skb)->frag_list) { 482 struct sk_buff *frag; 483 int first_len = skb_pagelen(skb); 484 485 if (first_len - hlen > mtu || 486 ((first_len - hlen) & 7) || 487 (iph->frag_off & htons(IP_MF|IP_OFFSET)) || 488 skb_cloned(skb)) 489 goto slow_path; 490 491 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { 492 /* Correct geometry. */ 493 if (frag->len > mtu || 494 ((frag->len & 7) && frag->next) || 495 skb_headroom(frag) < hlen) 496 goto slow_path; 497 498 /* Partially cloned skb? */ 499 if (skb_shared(frag)) 500 goto slow_path; 501 502 BUG_ON(frag->sk); 503 if (skb->sk) { 504 sock_hold(skb->sk); 505 frag->sk = skb->sk; 506 frag->destructor = sock_wfree; 507 skb->truesize -= frag->truesize; 508 } 509 } 510 511 /* Everything is OK. Generate! */ 512 513 err = 0; 514 offset = 0; 515 frag = skb_shinfo(skb)->frag_list; 516 skb_shinfo(skb)->frag_list = NULL; 517 skb->data_len = first_len - skb_headlen(skb); 518 skb->len = first_len; 519 iph->tot_len = htons(first_len); 520 iph->frag_off = htons(IP_MF); 521 ip_send_check(iph); 522 523 for (;;) { 524 /* Prepare header of the next frame, 525 * before previous one went down. */ 526 if (frag) { 527 frag->ip_summed = CHECKSUM_NONE; 528 skb_reset_transport_header(frag); 529 __skb_push(frag, hlen); 530 skb_reset_network_header(frag); 531 memcpy(skb_network_header(frag), iph, hlen); 532 iph = ip_hdr(frag); 533 iph->tot_len = htons(frag->len); 534 ip_copy_metadata(frag, skb); 535 if (offset == 0) 536 ip_options_fragment(frag); 537 offset += skb->len - hlen; 538 iph->frag_off = htons(offset>>3); 539 if (frag->next != NULL) 540 iph->frag_off |= htons(IP_MF); 541 /* Ready, complete checksum */ 542 ip_send_check(iph); 543 } 544 545 err = output(skb); 546 547 if (!err) 548 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); 549 if (err || !frag) 550 break; 551 552 skb = frag; 553 frag = skb->next; 554 skb->next = NULL; 555 } 556 557 if (err == 0) { 558 IP_INC_STATS(IPSTATS_MIB_FRAGOKS); 559 return 0; 560 } 561 562 while (frag) { 563 skb = frag->next; 564 kfree_skb(frag); 565 frag = skb; 566 } 567 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 568 return err; 569 } 570 571slow_path: 572 left = skb->len - hlen; /* Space per frame */ 573 ptr = raw + hlen; /* Where to start from */ 574 575 /* for bridged IP traffic encapsulated inside f.e. a vlan header, 576 * we need to make room for the encapsulating header 577 */ 578 pad = nf_bridge_pad(skb); 579 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); 580 mtu -= pad; 581 582 /* 583 * Fragment the datagram. 584 */ 585 586 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; 587 not_last_frag = iph->frag_off & htons(IP_MF); 588 589 /* 590 * Keep copying data until we run out. 591 */ 592 593 while (left > 0) { 594 len = left; 595 /* IF: it doesn't fit, use 'mtu' - the data space left */ 596 if (len > mtu) 597 len = mtu; 598 /* IF: we are not sending upto and including the packet end 599 then align the next start on an eight byte boundary */ 600 if (len < left) { 601 len &= ~7; 602 } 603 /* 604 * Allocate buffer. 605 */ 606 607 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { 608 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); 609 err = -ENOMEM; 610 goto fail; 611 } 612 613 /* 614 * Set up data on packet 615 */ 616 617 ip_copy_metadata(skb2, skb); 618 skb_reserve(skb2, ll_rs); 619 skb_put(skb2, len + hlen); 620 skb_reset_network_header(skb2); 621 skb2->transport_header = skb2->network_header + hlen; 622 623 /* 624 * Charge the memory for the fragment to any owner 625 * it might possess 626 */ 627 628 if (skb->sk) 629 skb_set_owner_w(skb2, skb->sk); 630 631 /* 632 * Copy the packet header into the new buffer. 633 */ 634 635 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); 636 637 /* 638 * Copy a block of the IP datagram. 639 */ 640 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) 641 BUG(); 642 left -= len; 643 644 /* 645 * Fill in the new header fields. 646 */ 647 iph = ip_hdr(skb2); 648 iph->frag_off = htons((offset >> 3)); 649 650 /* Foxconn added start pling 04/29/2010 */ 651 /* If the packet is not from IP stack, i.e. from other i/f, 652 * then copy the Ethernet header and cb to the 653 * first fragment, for later use by NAT/QoS. 654 */ 655 if (!skb->sk && first_frag) 656 { 657 first_frag = 0; 658 skb2->mac_header = (unsigned char *)(skb2->data - sizeof(struct ethhdr)); 659 memcpy(skb2->mac_header, skb->mac_header, sizeof(struct ethhdr)); 660 memcpy(skb2->cb, skb->cb, sizeof(skb->cb)); 661 } 662 /* Foxconn added end pling 04/29/2010 */ 663 664 /* ANK: dirty, but effective trick. Upgrade options only if 665 * the segment to be fragmented was THE FIRST (otherwise, 666 * options are already fixed) and make it ONCE 667 * on the initial skb, so that all the following fragments 668 * will inherit fixed options. 669 */ 670 if (offset == 0) 671 ip_options_fragment(skb); 672 673 /* 674 * Added AC : If we are fragmenting a fragment that's not the 675 * last fragment then keep MF on each bit 676 */ 677 if (left > 0 || not_last_frag) 678 iph->frag_off |= htons(IP_MF); 679 ptr += len; 680 offset += len; 681 682 /* 683 * Put this fragment into the sending queue. 684 */ 685 iph->tot_len = htons(len + hlen); 686 687 ip_send_check(iph); 688 689 err = output(skb2); 690 if (err) 691 goto fail; 692 693 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); 694 } 695 kfree_skb(skb); 696 IP_INC_STATS(IPSTATS_MIB_FRAGOKS); 697 return err; 698 699fail: 700 kfree_skb(skb); 701 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 702 return err; 703} 704 705EXPORT_SYMBOL(ip_fragment); 706 707int 708ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 709{ 710 struct iovec *iov = from; 711 712 if (skb->ip_summed == CHECKSUM_PARTIAL) { 713 if (memcpy_fromiovecend(to, iov, offset, len) < 0) 714 return -EFAULT; 715 } else { 716 __wsum csum = 0; 717 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0) 718 return -EFAULT; 719 skb->csum = csum_block_add(skb->csum, csum, odd); 720 } 721 return 0; 722} 723 724static inline __wsum 725csum_page(struct page *page, int offset, int copy) 726{ 727 char *kaddr; 728 __wsum csum; 729 kaddr = kmap(page); 730 csum = csum_partial(kaddr + offset, copy, 0); 731 kunmap(page); 732 return csum; 733} 734 735static inline int ip_ufo_append_data(struct sock *sk, 736 int getfrag(void *from, char *to, int offset, int len, 737 int odd, struct sk_buff *skb), 738 void *from, int length, int hh_len, int fragheaderlen, 739 int transhdrlen, int mtu,unsigned int flags) 740{ 741 struct sk_buff *skb; 742 int err; 743 744 /* There is support for UDP fragmentation offload by network 745 * device, so create one single skb packet containing complete 746 * udp datagram 747 */ 748 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 749 skb = sock_alloc_send_skb(sk, 750 hh_len + fragheaderlen + transhdrlen + 20, 751 (flags & MSG_DONTWAIT), &err); 752 753 if (skb == NULL) 754 return err; 755 756 /* reserve space for Hardware header */ 757 skb_reserve(skb, hh_len); 758 759 /* create space for UDP/IP header */ 760 skb_put(skb,fragheaderlen + transhdrlen); 761 762 /* initialize network header pointer */ 763 skb_reset_network_header(skb); 764 765 /* initialize protocol header pointer */ 766 skb->transport_header = skb->network_header + fragheaderlen; 767 768 skb->ip_summed = CHECKSUM_PARTIAL; 769 skb->csum = 0; 770 sk->sk_sndmsg_off = 0; 771 } 772 773 err = skb_append_datato_frags(sk,skb, getfrag, from, 774 (length - transhdrlen)); 775 if (!err) { 776 /* specify the length of each IP datagram fragment*/ 777 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 778 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 779 __skb_queue_tail(&sk->sk_write_queue, skb); 780 781 return 0; 782 } 783 /* There is not enough support do UFO , 784 * so follow normal path 785 */ 786 kfree_skb(skb); 787 return err; 788} 789 790/* 791 * ip_append_data() and ip_append_page() can make one large IP datagram 792 * from many pieces of data. Each pieces will be holded on the socket 793 * until ip_push_pending_frames() is called. Each piece can be a page 794 * or non-page data. 795 * 796 * Not only UDP, other transport protocols - e.g. raw sockets - can use 797 * this interface potentially. 798 * 799 * LATER: length must be adjusted by pad at tail, when it is required. 800 */ 801int ip_append_data(struct sock *sk, 802 int getfrag(void *from, char *to, int offset, int len, 803 int odd, struct sk_buff *skb), 804 void *from, int length, int transhdrlen, 805 struct ipcm_cookie *ipc, struct rtable *rt, 806 unsigned int flags) 807{ 808 struct inet_sock *inet = inet_sk(sk); 809 struct sk_buff *skb; 810 811 struct ip_options *opt = NULL; 812 int hh_len; 813 int exthdrlen; 814 int mtu; 815 int copy; 816 int err; 817 int offset = 0; 818 unsigned int maxfraglen, fragheaderlen; 819 int csummode = CHECKSUM_NONE; 820 821 if (flags&MSG_PROBE) 822 return 0; 823 824 if (skb_queue_empty(&sk->sk_write_queue)) { 825 /* 826 * setup for corking. 827 */ 828 opt = ipc->opt; 829 if (opt) { 830 if (inet->cork.opt == NULL) { 831 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); 832 if (unlikely(inet->cork.opt == NULL)) 833 return -ENOBUFS; 834 } 835 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); 836 inet->cork.flags |= IPCORK_OPT; 837 inet->cork.addr = ipc->addr; 838 } 839 dst_hold(&rt->u.dst); 840 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 841 rt->u.dst.dev->mtu : 842 dst_mtu(rt->u.dst.path); 843 inet->cork.rt = rt; 844 inet->cork.length = 0; 845 sk->sk_sndmsg_page = NULL; 846 sk->sk_sndmsg_off = 0; 847 if ((exthdrlen = rt->u.dst.header_len) != 0) { 848 length += exthdrlen; 849 transhdrlen += exthdrlen; 850 } 851 } else { 852 rt = inet->cork.rt; 853 if (inet->cork.flags & IPCORK_OPT) 854 opt = inet->cork.opt; 855 856 transhdrlen = 0; 857 exthdrlen = 0; 858 mtu = inet->cork.fragsize; 859 } 860 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 861 862 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 863 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 864 865 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { 866 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); 867 return -EMSGSIZE; 868 } 869 870 /* 871 * transhdrlen > 0 means that this is the first fragment and we wish 872 * it won't be fragmented in the future. 873 */ 874 if (transhdrlen && 875 length + fragheaderlen <= mtu && 876 rt->u.dst.dev->features & NETIF_F_ALL_CSUM && 877 !exthdrlen) 878 csummode = CHECKSUM_PARTIAL; 879 880 inet->cork.length += length; 881 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 882 (rt->u.dst.dev->features & NETIF_F_UFO)) { 883 884 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, 885 fragheaderlen, transhdrlen, mtu, 886 flags); 887 if (err) 888 goto error; 889 return 0; 890 } 891 892 /* So, what's going on in the loop below? 893 * 894 * We use calculated fragment length to generate chained skb, 895 * each of segments is IP fragment ready for sending to network after 896 * adding appropriate IP header. 897 */ 898 899 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 900 goto alloc_new_skb; 901 902 while (length > 0) { 903 /* Check if the remaining data fits into current packet. */ 904 copy = mtu - skb->len; 905 if (copy < length) 906 copy = maxfraglen - skb->len; 907 if (copy <= 0) { 908 char *data; 909 unsigned int datalen; 910 unsigned int fraglen; 911 unsigned int fraggap; 912 unsigned int alloclen; 913 struct sk_buff *skb_prev; 914alloc_new_skb: 915 skb_prev = skb; 916 if (skb_prev) 917 fraggap = skb_prev->len - maxfraglen; 918 else 919 fraggap = 0; 920 921 /* 922 * If remaining data exceeds the mtu, 923 * we know we need more fragment(s). 924 */ 925 datalen = length + fraggap; 926 if (datalen > mtu - fragheaderlen) 927 datalen = maxfraglen - fragheaderlen; 928 fraglen = datalen + fragheaderlen; 929 930 if ((flags & MSG_MORE) && 931 !(rt->u.dst.dev->features&NETIF_F_SG)) 932 alloclen = mtu; 933 else 934 alloclen = datalen + fragheaderlen; 935 936 /* The last fragment gets additional space at tail. 937 * Note, with MSG_MORE we overallocate on fragments, 938 * because we have no idea what fragment will be 939 * the last. 940 */ 941 if (datalen == length + fraggap) 942 alloclen += rt->u.dst.trailer_len; 943 944 if (transhdrlen) { 945 skb = sock_alloc_send_skb(sk, 946 alloclen + hh_len + 15, 947 (flags & MSG_DONTWAIT), &err); 948 } else { 949 skb = NULL; 950 if (atomic_read(&sk->sk_wmem_alloc) <= 951 2 * sk->sk_sndbuf) 952 skb = sock_wmalloc(sk, 953 alloclen + hh_len + 15, 1, 954 sk->sk_allocation); 955 if (unlikely(skb == NULL)) 956 err = -ENOBUFS; 957 } 958 if (skb == NULL) 959 goto error; 960 961 /* 962 * Fill in the control structures 963 */ 964 skb->ip_summed = csummode; 965 skb->csum = 0; 966 skb_reserve(skb, hh_len); 967 968 /* 969 * Find where to start putting bytes. 970 */ 971 data = skb_put(skb, fraglen); 972 skb_set_network_header(skb, exthdrlen); 973 skb->transport_header = (skb->network_header + 974 fragheaderlen); 975 data += fragheaderlen; 976 977 if (fraggap) { 978 skb->csum = skb_copy_and_csum_bits( 979 skb_prev, maxfraglen, 980 data + transhdrlen, fraggap, 0); 981 skb_prev->csum = csum_sub(skb_prev->csum, 982 skb->csum); 983 data += fraggap; 984 pskb_trim_unique(skb_prev, maxfraglen); 985 } 986 987 copy = datalen - transhdrlen - fraggap; 988 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 989 err = -EFAULT; 990 kfree_skb(skb); 991 goto error; 992 } 993 994 offset += copy; 995 length -= datalen - fraggap; 996 transhdrlen = 0; 997 exthdrlen = 0; 998 csummode = CHECKSUM_NONE; 999 1000 /* 1001 * Put the packet on the pending queue. 1002 */ 1003 __skb_queue_tail(&sk->sk_write_queue, skb); 1004 continue; 1005 } 1006 1007 if (copy > length) 1008 copy = length; 1009 1010 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1011 unsigned int off; 1012 1013 off = skb->len; 1014 if (getfrag(from, skb_put(skb, copy), 1015 offset, copy, off, skb) < 0) { 1016 __skb_trim(skb, off); 1017 err = -EFAULT; 1018 goto error; 1019 } 1020 } else { 1021 int i = skb_shinfo(skb)->nr_frags; 1022 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1023 struct page *page = sk->sk_sndmsg_page; 1024 int off = sk->sk_sndmsg_off; 1025 unsigned int left; 1026 1027 if (page && (left = PAGE_SIZE - off) > 0) { 1028 if (copy >= left) 1029 copy = left; 1030 if (page != frag->page) { 1031 if (i == MAX_SKB_FRAGS) { 1032 err = -EMSGSIZE; 1033 goto error; 1034 } 1035 get_page(page); 1036 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1037 frag = &skb_shinfo(skb)->frags[i]; 1038 } 1039 } else if (i < MAX_SKB_FRAGS) { 1040 if (copy > PAGE_SIZE) 1041 copy = PAGE_SIZE; 1042 page = alloc_pages(sk->sk_allocation, 0); 1043 if (page == NULL) { 1044 err = -ENOMEM; 1045 goto error; 1046 } 1047 sk->sk_sndmsg_page = page; 1048 sk->sk_sndmsg_off = 0; 1049 1050 skb_fill_page_desc(skb, i, page, 0, 0); 1051 frag = &skb_shinfo(skb)->frags[i]; 1052 skb->truesize += PAGE_SIZE; 1053 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 1054 } else { 1055 err = -EMSGSIZE; 1056 goto error; 1057 } 1058 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1059 err = -EFAULT; 1060 goto error; 1061 } 1062 sk->sk_sndmsg_off += copy; 1063 frag->size += copy; 1064 skb->len += copy; 1065 skb->data_len += copy; 1066 } 1067 offset += copy; 1068 length -= copy; 1069 } 1070 1071 return 0; 1072 1073error: 1074 inet->cork.length -= length; 1075 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1076 return err; 1077} 1078 1079ssize_t ip_append_page(struct sock *sk, struct page *page, 1080 int offset, size_t size, int flags) 1081{ 1082 struct inet_sock *inet = inet_sk(sk); 1083 struct sk_buff *skb; 1084 struct rtable *rt; 1085 struct ip_options *opt = NULL; 1086 int hh_len; 1087 int mtu; 1088 int len; 1089 int err; 1090 unsigned int maxfraglen, fragheaderlen, fraggap; 1091 1092 if (inet->hdrincl) 1093 return -EPERM; 1094 1095 if (flags&MSG_PROBE) 1096 return 0; 1097 1098 if (skb_queue_empty(&sk->sk_write_queue)) 1099 return -EINVAL; 1100 1101 rt = inet->cork.rt; 1102 if (inet->cork.flags & IPCORK_OPT) 1103 opt = inet->cork.opt; 1104 1105 if (!(rt->u.dst.dev->features&NETIF_F_SG)) 1106 return -EOPNOTSUPP; 1107 1108 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1109 mtu = inet->cork.fragsize; 1110 1111 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1112 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1113 1114 if (inet->cork.length + size > 0xFFFF - fragheaderlen) { 1115 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); 1116 return -EMSGSIZE; 1117 } 1118 1119 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1120 return -EINVAL; 1121 1122 inet->cork.length += size; 1123 if ((sk->sk_protocol == IPPROTO_UDP) && 1124 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1125 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 1126 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1127 } 1128 1129 1130 while (size > 0) { 1131 int i; 1132 1133 if (skb_is_gso(skb)) 1134 len = size; 1135 else { 1136 1137 /* Check if the remaining data fits into current packet. */ 1138 len = mtu - skb->len; 1139 if (len < size) 1140 len = maxfraglen - skb->len; 1141 } 1142 if (len <= 0) { 1143 struct sk_buff *skb_prev; 1144 int alloclen; 1145 1146 skb_prev = skb; 1147 fraggap = skb_prev->len - maxfraglen; 1148 1149 alloclen = fragheaderlen + hh_len + fraggap + 15; 1150 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); 1151 if (unlikely(!skb)) { 1152 err = -ENOBUFS; 1153 goto error; 1154 } 1155 1156 /* 1157 * Fill in the control structures 1158 */ 1159 skb->ip_summed = CHECKSUM_NONE; 1160 skb->csum = 0; 1161 skb_reserve(skb, hh_len); 1162 1163 /* 1164 * Find where to start putting bytes. 1165 */ 1166 skb_put(skb, fragheaderlen + fraggap); 1167 skb_reset_network_header(skb); 1168 skb->transport_header = (skb->network_header + 1169 fragheaderlen); 1170 if (fraggap) { 1171 skb->csum = skb_copy_and_csum_bits(skb_prev, 1172 maxfraglen, 1173 skb_transport_header(skb), 1174 fraggap, 0); 1175 skb_prev->csum = csum_sub(skb_prev->csum, 1176 skb->csum); 1177 pskb_trim_unique(skb_prev, maxfraglen); 1178 } 1179 1180 /* 1181 * Put the packet on the pending queue. 1182 */ 1183 __skb_queue_tail(&sk->sk_write_queue, skb); 1184 continue; 1185 } 1186 1187 i = skb_shinfo(skb)->nr_frags; 1188 if (len > size) 1189 len = size; 1190 if (skb_can_coalesce(skb, i, page, offset)) { 1191 skb_shinfo(skb)->frags[i-1].size += len; 1192 } else if (i < MAX_SKB_FRAGS) { 1193 get_page(page); 1194 skb_fill_page_desc(skb, i, page, offset, len); 1195 } else { 1196 err = -EMSGSIZE; 1197 goto error; 1198 } 1199 1200 if (skb->ip_summed == CHECKSUM_NONE) { 1201 __wsum csum; 1202 csum = csum_page(page, offset, len); 1203 skb->csum = csum_block_add(skb->csum, csum, skb->len); 1204 } 1205 1206 skb->len += len; 1207 skb->data_len += len; 1208 offset += len; 1209 size -= len; 1210 } 1211 return 0; 1212 1213error: 1214 inet->cork.length -= size; 1215 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1216 return err; 1217} 1218 1219/* 1220 * Combined all pending IP fragments on the socket as one IP datagram 1221 * and push them out. 1222 */ 1223int ip_push_pending_frames(struct sock *sk) 1224{ 1225 struct sk_buff *skb, *tmp_skb; 1226 struct sk_buff **tail_skb; 1227 struct inet_sock *inet = inet_sk(sk); 1228 struct ip_options *opt = NULL; 1229 struct rtable *rt = inet->cork.rt; 1230 struct iphdr *iph; 1231 __be16 df = 0; 1232 __u8 ttl; 1233 int err = 0; 1234 1235 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1236 goto out; 1237 tail_skb = &(skb_shinfo(skb)->frag_list); 1238 1239 /* move skb->data to ip header from ext header */ 1240 if (skb->data < skb_network_header(skb)) 1241 __skb_pull(skb, skb_network_offset(skb)); 1242 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1243 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1244 *tail_skb = tmp_skb; 1245 tail_skb = &(tmp_skb->next); 1246 skb->len += tmp_skb->len; 1247 skb->data_len += tmp_skb->len; 1248 skb->truesize += tmp_skb->truesize; 1249 __sock_put(tmp_skb->sk); 1250 tmp_skb->destructor = NULL; 1251 tmp_skb->sk = NULL; 1252 } 1253 1254 /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow 1255 * to fragment the frame generated here. No matter, what transforms 1256 * how transforms change size of the packet, it will come out. 1257 */ 1258 if (inet->pmtudisc < IP_PMTUDISC_DO) 1259 skb->local_df = 1; 1260 1261 /* DF bit is set when we want to see DF on outgoing frames. 1262 * If local_df is set too, we still allow to fragment this frame 1263 * locally. */ 1264 if (inet->pmtudisc >= IP_PMTUDISC_DO || 1265 (skb->len <= dst_mtu(&rt->u.dst) && 1266 ip_dont_fragment(sk, &rt->u.dst))) 1267 df = htons(IP_DF); 1268 1269 if (inet->cork.flags & IPCORK_OPT) 1270 opt = inet->cork.opt; 1271 1272 if (rt->rt_type == RTN_MULTICAST) 1273 ttl = inet->mc_ttl; 1274 else 1275 ttl = ip_select_ttl(inet, &rt->u.dst); 1276 1277 iph = (struct iphdr *)skb->data; 1278 iph->version = 4; 1279 iph->ihl = 5; 1280 if (opt) { 1281 iph->ihl += opt->optlen>>2; 1282 ip_options_build(skb, opt, inet->cork.addr, rt, 0); 1283 } 1284 iph->tos = inet->tos; 1285 iph->tot_len = htons(skb->len); 1286 iph->frag_off = df; 1287 ip_select_ident(iph, &rt->u.dst, sk); 1288 iph->ttl = ttl; 1289 iph->protocol = sk->sk_protocol; 1290 iph->saddr = rt->rt_src; 1291 iph->daddr = rt->rt_dst; 1292 ip_send_check(iph); 1293 1294 skb->priority = sk->sk_priority; 1295 skb->dst = dst_clone(&rt->u.dst); 1296 1297 /* Netfilter gets whole the not fragmented skb. */ 1298 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 1299 skb->dst->dev, dst_output); 1300 if (err) { 1301 if (err > 0) 1302 err = inet->recverr ? net_xmit_errno(err) : 0; 1303 if (err) 1304 goto error; 1305 } 1306 1307out: 1308 inet->cork.flags &= ~IPCORK_OPT; 1309 kfree(inet->cork.opt); 1310 inet->cork.opt = NULL; 1311 if (inet->cork.rt) { 1312 ip_rt_put(inet->cork.rt); 1313 inet->cork.rt = NULL; 1314 } 1315 return err; 1316 1317error: 1318 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1319 goto out; 1320} 1321 1322/* 1323 * Throw away all pending data on the socket. 1324 */ 1325void ip_flush_pending_frames(struct sock *sk) 1326{ 1327 struct inet_sock *inet = inet_sk(sk); 1328 struct sk_buff *skb; 1329 1330 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) 1331 kfree_skb(skb); 1332 1333 inet->cork.flags &= ~IPCORK_OPT; 1334 kfree(inet->cork.opt); 1335 inet->cork.opt = NULL; 1336 if (inet->cork.rt) { 1337 ip_rt_put(inet->cork.rt); 1338 inet->cork.rt = NULL; 1339 } 1340} 1341 1342 1343/* 1344 * Fetch data from kernel space and fill in checksum if needed. 1345 */ 1346static int ip_reply_glue_bits(void *dptr, char *to, int offset, 1347 int len, int odd, struct sk_buff *skb) 1348{ 1349 __wsum csum; 1350 1351 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0); 1352 skb->csum = csum_block_add(skb->csum, csum, odd); 1353 return 0; 1354} 1355 1356/* 1357 * Generic function to send a packet as reply to another packet. 1358 * Used to send TCP resets so far. ICMP should use this function too. 1359 * 1360 * Should run single threaded per socket because it uses the sock 1361 * structure to pass arguments. 1362 * 1363 * LATER: switch from ip_build_xmit to ip_append_* 1364 */ 1365void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 1366 unsigned int len) 1367{ 1368 struct inet_sock *inet = inet_sk(sk); 1369 struct { 1370 struct ip_options opt; 1371 char data[40]; 1372 } replyopts; 1373 struct ipcm_cookie ipc; 1374 __be32 daddr; 1375 struct rtable *rt = (struct rtable*)skb->dst; 1376 1377 if (ip_options_echo(&replyopts.opt, skb)) 1378 return; 1379 1380 daddr = ipc.addr = rt->rt_src; 1381 ipc.opt = NULL; 1382 1383 if (replyopts.opt.optlen) { 1384 ipc.opt = &replyopts.opt; 1385 1386 if (ipc.opt->srr) 1387 daddr = replyopts.opt.faddr; 1388 } 1389 1390 { 1391 struct flowi fl = { .oif = arg->bound_dev_if, 1392 .nl_u = { .ip4_u = 1393 { .daddr = daddr, 1394 .saddr = rt->rt_spec_dst, 1395 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 1396 /* Not quite clean, but right. */ 1397 .uli_u = { .ports = 1398 { .sport = tcp_hdr(skb)->dest, 1399 .dport = tcp_hdr(skb)->source } }, 1400 .proto = sk->sk_protocol }; 1401 security_skb_classify_flow(skb, &fl); 1402 if (ip_route_output_key(&rt, &fl)) 1403 return; 1404 } 1405 1406 /* And let IP do all the hard work. 1407 1408 This chunk is not reenterable, hence spinlock. 1409 Note that it uses the fact, that this function is called 1410 with locally disabled BH and that sk cannot be already spinlocked. 1411 */ 1412 bh_lock_sock(sk); 1413 inet->tos = ip_hdr(skb)->tos; 1414 sk->sk_priority = skb->priority; 1415 sk->sk_protocol = ip_hdr(skb)->protocol; 1416 sk->sk_bound_dev_if = arg->bound_dev_if; 1417 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1418 &ipc, rt, MSG_DONTWAIT); 1419 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1420 if (arg->csumoffset >= 0) 1421 *((__sum16 *)skb_transport_header(skb) + 1422 arg->csumoffset) = csum_fold(csum_add(skb->csum, 1423 arg->csum)); 1424 skb->ip_summed = CHECKSUM_NONE; 1425 ip_push_pending_frames(sk); 1426 } 1427 1428 bh_unlock_sock(sk); 1429 1430 ip_rt_put(rt); 1431} 1432 1433void __init ip_init(void) 1434{ 1435 ip_rt_init(); 1436 inet_initpeers(); 1437 1438#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS) 1439 igmp_mc_proc_init(); 1440#endif 1441} 1442 1443EXPORT_SYMBOL(ip_generic_getfrag); 1444EXPORT_SYMBOL(ip_queue_xmit); 1445EXPORT_SYMBOL(ip_send_check); 1446 1447/* Fxcn port-S Wins, 0714-09 */ 1448//Foxconn add start, Lewis Min, for OpenDNS, 12/12/2008 1449void insert_func_to_BR_POST_ROUTE(void *FUNC) 1450{ 1451 br_post_insert_hook= FUNC; 1452} 1453 1454 1455void remove_func_from_BR_POST_ROUTE(void) 1456{ 1457 br_post_insert_hook= NULL; 1458} 1459//Foxconn add end, Lewis Min, for OpenDNS, 12/12/2008 1460/* Fxcn port-E Wins, 0714-09 */ 1461