1/* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29#include <linux/errno.h> 30#include <linux/kernel.h> 31#include <linux/string.h> 32#include <linux/socket.h> 33#include <linux/net.h> 34#include <linux/netdevice.h> 35#include <linux/if_arp.h> 36#include <linux/in6.h> 37#include <linux/tcp.h> 38#include <linux/route.h> 39#include <linux/module.h> 40#include <linux/slab.h> 41 42#include <linux/netfilter.h> 43#include <linux/netfilter_ipv6.h> 44 45#include <net/sock.h> 46#include <net/snmp.h> 47 48#include <net/ipv6.h> 49#include <net/ndisc.h> 50#include <net/protocol.h> 51#include <net/ip6_route.h> 52#include <net/addrconf.h> 53#include <net/rawv6.h> 54#include <net/icmp.h> 55#include <net/xfrm.h> 56#include <net/checksum.h> 57#include <linux/mroute6.h> 58 59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61int __ip6_local_out(struct sk_buff *skb) 62{ 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72} 73 74int ip6_local_out(struct sk_buff *skb) 75{ 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83} 84EXPORT_SYMBOL_GPL(ip6_local_out); 85 86/* dev_loopback_xmit for use with netfilter. */ 87static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 88{ 89 skb_reset_mac_header(newskb); 90 __skb_pull(newskb, skb_network_offset(newskb)); 91 newskb->pkt_type = PACKET_LOOPBACK; 92 newskb->ip_summed = CHECKSUM_UNNECESSARY; 93 WARN_ON(!skb_dst(newskb)); 94 95 netif_rx_ni(newskb); 96 return 0; 97} 98 99static int ip6_finish_output2(struct sk_buff *skb) 100{ 101 struct dst_entry *dst = skb_dst(skb); 102 struct net_device *dev = dst->dev; 103 104 skb->protocol = htons(ETH_P_IPV6); 105 skb->dev = dev; 106 107 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 108 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 109 110 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 111 ((mroute6_socket(dev_net(dev), skb) && 112 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 113 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 114 &ipv6_hdr(skb)->saddr))) { 115 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 116 117 /* Do not check for IFF_ALLMULTI; multicast routing 118 is not supported in any case. 119 */ 120 if (newskb) 121 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 122 newskb, NULL, newskb->dev, 123 ip6_dev_loopback_xmit); 124 125 if (ipv6_hdr(skb)->hop_limit == 0) { 126 IP6_INC_STATS(dev_net(dev), idev, 127 IPSTATS_MIB_OUTDISCARDS); 128 kfree_skb(skb); 129 return 0; 130 } 131 } 132 133 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 134 skb->len); 135 } 136 137 if (dst->hh) 138 return neigh_hh_output(dst->hh, skb); 139 else if (dst->neighbour) 140 return dst->neighbour->output(skb); 141 142 IP6_INC_STATS_BH(dev_net(dst->dev), 143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 144 kfree_skb(skb); 145 return -EINVAL; 146} 147 148static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 149{ 150 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 151 152 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? 153 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); 154} 155 156static int ip6_finish_output(struct sk_buff *skb) 157{ 158 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 159 dst_allfrag(skb_dst(skb))) 160 return ip6_fragment(skb, ip6_finish_output2); 161 else 162 return ip6_finish_output2(skb); 163} 164 165int ip6_output(struct sk_buff *skb) 166{ 167 struct net_device *dev = skb_dst(skb)->dev; 168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 169 if (unlikely(idev->cnf.disable_ipv6)) { 170 IP6_INC_STATS(dev_net(dev), idev, 171 IPSTATS_MIB_OUTDISCARDS); 172 kfree_skb(skb); 173 return 0; 174 } 175 176 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 177 ip6_finish_output, 178 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 179} 180 181/* 182 * xmit an sk_buff (used by TCP, SCTP and DCCP) 183 */ 184 185int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 186 struct ipv6_txoptions *opt) 187{ 188 struct net *net = sock_net(sk); 189 struct ipv6_pinfo *np = inet6_sk(sk); 190 struct in6_addr *first_hop = &fl->fl6_dst; 191 struct dst_entry *dst = skb_dst(skb); 192 struct ipv6hdr *hdr; 193 u8 proto = fl->proto; 194 int seg_len = skb->len; 195 int hlimit = -1; 196 int tclass = 0; 197 u32 mtu; 198 199 if (opt) { 200 unsigned int head_room; 201 202 /* First: exthdrs may take lots of space (~8K for now) 203 MAX_HEADER is not enough. 204 */ 205 head_room = opt->opt_nflen + opt->opt_flen; 206 seg_len += head_room; 207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 208 209 if (skb_headroom(skb) < head_room) { 210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 211 if (skb2 == NULL) { 212 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 213 IPSTATS_MIB_OUTDISCARDS); 214 kfree_skb(skb); 215 return -ENOBUFS; 216 } 217 kfree_skb(skb); 218 skb = skb2; 219 skb_set_owner_w(skb, sk); 220 } 221 if (opt->opt_flen) 222 ipv6_push_frag_opts(skb, opt, &proto); 223 if (opt->opt_nflen) 224 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 225 } 226 227 skb_push(skb, sizeof(struct ipv6hdr)); 228 skb_reset_network_header(skb); 229 hdr = ipv6_hdr(skb); 230 231 /* 232 * Fill in the IPv6 header 233 */ 234 if (np) { 235 tclass = np->tclass; 236 hlimit = np->hop_limit; 237 } 238 if (hlimit < 0) 239 hlimit = ip6_dst_hoplimit(dst); 240 241 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 242 243 hdr->payload_len = htons(seg_len); 244 hdr->nexthdr = proto; 245 hdr->hop_limit = hlimit; 246 247 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 248 ipv6_addr_copy(&hdr->daddr, first_hop); 249 250 skb->priority = sk->sk_priority; 251 skb->mark = sk->sk_mark; 252 253 mtu = dst_mtu(dst); 254 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 255 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 256 IPSTATS_MIB_OUT, skb->len); 257 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 258 dst->dev, dst_output); 259 } 260 261 if (net_ratelimit()) 262 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 263 skb->dev = dst->dev; 264 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 265 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 266 kfree_skb(skb); 267 return -EMSGSIZE; 268} 269 270EXPORT_SYMBOL(ip6_xmit); 271 272/* 273 * To avoid extra problems ND packets are send through this 274 * routine. It's code duplication but I really want to avoid 275 * extra checks since ipv6_build_header is used by TCP (which 276 * is for us performance critical) 277 */ 278 279int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 280 const struct in6_addr *saddr, const struct in6_addr *daddr, 281 int proto, int len) 282{ 283 struct ipv6_pinfo *np = inet6_sk(sk); 284 struct ipv6hdr *hdr; 285 int totlen; 286 287 skb->protocol = htons(ETH_P_IPV6); 288 skb->dev = dev; 289 290 totlen = len + sizeof(struct ipv6hdr); 291 292 skb_reset_network_header(skb); 293 skb_put(skb, sizeof(struct ipv6hdr)); 294 hdr = ipv6_hdr(skb); 295 296 *(__be32*)hdr = htonl(0x60000000); 297 298 hdr->payload_len = htons(len); 299 hdr->nexthdr = proto; 300 hdr->hop_limit = np->hop_limit; 301 302 ipv6_addr_copy(&hdr->saddr, saddr); 303 ipv6_addr_copy(&hdr->daddr, daddr); 304 305 return 0; 306} 307 308static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 309{ 310 struct ip6_ra_chain *ra; 311 struct sock *last = NULL; 312 313 read_lock(&ip6_ra_lock); 314 for (ra = ip6_ra_chain; ra; ra = ra->next) { 315 struct sock *sk = ra->sk; 316 if (sk && ra->sel == sel && 317 (!sk->sk_bound_dev_if || 318 sk->sk_bound_dev_if == skb->dev->ifindex)) { 319 if (last) { 320 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 321 if (skb2) 322 rawv6_rcv(last, skb2); 323 } 324 last = sk; 325 } 326 } 327 328 if (last) { 329 rawv6_rcv(last, skb); 330 read_unlock(&ip6_ra_lock); 331 return 1; 332 } 333 read_unlock(&ip6_ra_lock); 334 return 0; 335} 336 337static int ip6_forward_proxy_check(struct sk_buff *skb) 338{ 339 struct ipv6hdr *hdr = ipv6_hdr(skb); 340 u8 nexthdr = hdr->nexthdr; 341 int offset; 342 343 if (ipv6_ext_hdr(nexthdr)) { 344 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 345 if (offset < 0) 346 return 0; 347 } else 348 offset = sizeof(struct ipv6hdr); 349 350 if (nexthdr == IPPROTO_ICMPV6) { 351 struct icmp6hdr *icmp6; 352 353 if (!pskb_may_pull(skb, (skb_network_header(skb) + 354 offset + 1 - skb->data))) 355 return 0; 356 357 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 358 359 switch (icmp6->icmp6_type) { 360 case NDISC_ROUTER_SOLICITATION: 361 case NDISC_ROUTER_ADVERTISEMENT: 362 case NDISC_NEIGHBOUR_SOLICITATION: 363 case NDISC_NEIGHBOUR_ADVERTISEMENT: 364 case NDISC_REDIRECT: 365 /* For reaction involving unicast neighbor discovery 366 * message destined to the proxied address, pass it to 367 * input function. 368 */ 369 return 1; 370 default: 371 break; 372 } 373 } 374 375 /* 376 * The proxying router can't forward traffic sent to a link-local 377 * address, so signal the sender and discard the packet. This 378 * behavior is clarified by the MIPv6 specification. 379 */ 380 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 381 dst_link_failure(skb); 382 return -1; 383 } 384 385 return 0; 386} 387 388static inline int ip6_forward_finish(struct sk_buff *skb) 389{ 390 return dst_output(skb); 391} 392 393int ip6_forward(struct sk_buff *skb) 394{ 395 struct dst_entry *dst = skb_dst(skb); 396 struct ipv6hdr *hdr = ipv6_hdr(skb); 397 struct inet6_skb_parm *opt = IP6CB(skb); 398 struct net *net = dev_net(dst->dev); 399 u32 mtu; 400 401 if (net->ipv6.devconf_all->forwarding == 0) 402 goto error; 403 404 if (skb_warn_if_lro(skb)) 405 goto drop; 406 407 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 408 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 409 goto drop; 410 } 411 412 skb_forward_csum(skb); 413 414 /* 415 * We DO NOT make any processing on 416 * RA packets, pushing them to user level AS IS 417 * without ane WARRANTY that application will be able 418 * to interpret them. The reason is that we 419 * cannot make anything clever here. 420 * 421 * We are not end-node, so that if packet contains 422 * AH/ESP, we cannot make anything. 423 * Defragmentation also would be mistake, RA packets 424 * cannot be fragmented, because there is no warranty 425 * that different fragments will go along one path. --ANK 426 */ 427 if (opt->ra) { 428 u8 *ptr = skb_network_header(skb) + opt->ra; 429 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 430 return 0; 431 } 432 433 /* 434 * check and decrement ttl 435 */ 436 if (hdr->hop_limit <= 1) { 437 /* Force OUTPUT device used as source address */ 438 skb->dev = dst->dev; 439 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 440 IP6_INC_STATS_BH(net, 441 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 442 443 kfree_skb(skb); 444 return -ETIMEDOUT; 445 } 446 447 if (net->ipv6.devconf_all->proxy_ndp && 448 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 449 int proxied = ip6_forward_proxy_check(skb); 450 if (proxied > 0) 451 return ip6_input(skb); 452 else if (proxied < 0) { 453 IP6_INC_STATS(net, ip6_dst_idev(dst), 454 IPSTATS_MIB_INDISCARDS); 455 goto drop; 456 } 457 } 458 459 if (!xfrm6_route_forward(skb)) { 460 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 461 goto drop; 462 } 463 dst = skb_dst(skb); 464 465 /* IPv6 specs say nothing about it, but it is clear that we cannot 466 send redirects to source routed frames. 467 We don't send redirects to frames decapsulated from IPsec. 468 */ 469 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 470 !skb_sec_path(skb)) { 471 struct in6_addr *target = NULL; 472 struct rt6_info *rt; 473 struct neighbour *n = dst->neighbour; 474 475 /* 476 * incoming and outgoing devices are the same 477 * send a redirect. 478 */ 479 480 rt = (struct rt6_info *) dst; 481 if ((rt->rt6i_flags & RTF_GATEWAY)) 482 target = (struct in6_addr*)&n->primary_key; 483 else 484 target = &hdr->daddr; 485 486 /* Limit redirects both by destination (here) 487 and by source (inside ndisc_send_redirect) 488 */ 489 if (xrlim_allow(dst, 1*HZ)) 490 ndisc_send_redirect(skb, n, target); 491 } else { 492 int addrtype = ipv6_addr_type(&hdr->saddr); 493 494 /* This check is security critical. */ 495 if (addrtype == IPV6_ADDR_ANY || 496 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 497 goto error; 498 if (addrtype & IPV6_ADDR_LINKLOCAL) { 499 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 500 ICMPV6_NOT_NEIGHBOUR, 0); 501 goto error; 502 } 503#if 1 /* IPv6Ready- Test v6LC.1.1.10 Part C: Request sent from unspecified address 504 * RFC 2460: Internet Protocol, Version 6 (IPv6) Specification 505 */ 506 if (addrtype == IPV6_ADDR_ANY) { 507 //IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 508 goto drop; 509 } 510#endif 511 } 512 513 mtu = dst_mtu(dst); 514 if (mtu < IPV6_MIN_MTU) 515 mtu = IPV6_MIN_MTU; 516 517 if (skb->len > mtu && !skb_is_gso(skb)) { 518 /* Again, force OUTPUT device used as source address */ 519 skb->dev = dst->dev; 520 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 521 IP6_INC_STATS_BH(net, 522 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 523 IP6_INC_STATS_BH(net, 524 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 525 kfree_skb(skb); 526 return -EMSGSIZE; 527 } 528 529 if (skb_cow(skb, dst->dev->hard_header_len)) { 530 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 531 goto drop; 532 } 533 534 hdr = ipv6_hdr(skb); 535 536 /* Mangling hops number delayed to point after skb COW */ 537 538 hdr->hop_limit--; 539 540 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 541 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 542 ip6_forward_finish); 543 544error: 545 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 546drop: 547 kfree_skb(skb); 548 return -EINVAL; 549} 550 551static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 552{ 553 to->pkt_type = from->pkt_type; 554 to->priority = from->priority; 555 to->protocol = from->protocol; 556 skb_dst_drop(to); 557 skb_dst_set(to, dst_clone(skb_dst(from))); 558 to->dev = from->dev; 559 to->mark = from->mark; 560 561#ifdef CONFIG_NET_SCHED 562 to->tc_index = from->tc_index; 563#endif 564 nf_copy(to, from); 565#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 566 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 567 to->nf_trace = from->nf_trace; 568#endif 569 skb_copy_secmark(to, from); 570} 571 572int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 573{ 574 u16 offset = sizeof(struct ipv6hdr); 575 struct ipv6_opt_hdr *exthdr = 576 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 577 unsigned int packet_len = skb->tail - skb->network_header; 578 int found_rhdr = 0; 579 *nexthdr = &ipv6_hdr(skb)->nexthdr; 580 581 while (offset + 1 <= packet_len) { 582 583 switch (**nexthdr) { 584 585 case NEXTHDR_HOP: 586 break; 587 case NEXTHDR_ROUTING: 588 found_rhdr = 1; 589 break; 590 case NEXTHDR_DEST: 591#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 592 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 593 break; 594#endif 595 if (found_rhdr) 596 return offset; 597 break; 598 default : 599 return offset; 600 } 601 602 offset += ipv6_optlen(exthdr); 603 *nexthdr = &exthdr->nexthdr; 604 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 605 offset); 606 } 607 608 return offset; 609} 610 611static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 612{ 613 struct sk_buff *frag; 614 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 615 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 616 struct ipv6hdr *tmp_hdr; 617 struct frag_hdr *fh; 618 unsigned int mtu, hlen, left, len; 619 __be32 frag_id = 0; 620 int ptr, offset = 0, err=0; 621 u8 *prevhdr, nexthdr = 0; 622 struct net *net = dev_net(skb_dst(skb)->dev); 623 624 hlen = ip6_find_1stfragopt(skb, &prevhdr); 625 nexthdr = *prevhdr; 626 627 mtu = ip6_skb_dst_mtu(skb); 628 629 /* We must not fragment if the socket is set to force MTU discovery 630 * or if the skb it not generated by a local socket. 631 */ 632 if (!skb->local_df && skb->len > mtu) { 633 skb->dev = skb_dst(skb)->dev; 634 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 635 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 636 IPSTATS_MIB_FRAGFAILS); 637 kfree_skb(skb); 638 return -EMSGSIZE; 639 } 640 641 if (np && np->frag_size < mtu) { 642 if (np->frag_size) 643 mtu = np->frag_size; 644 } 645 mtu -= hlen + sizeof(struct frag_hdr); 646 647 if (skb_has_frags(skb)) { 648 int first_len = skb_pagelen(skb); 649 struct sk_buff *frag2; 650 651 if (first_len - hlen > mtu || 652 ((first_len - hlen) & 7) || 653 skb_cloned(skb)) 654 goto slow_path; 655 656 skb_walk_frags(skb, frag) { 657 /* Correct geometry. */ 658 if (frag->len > mtu || 659 ((frag->len & 7) && frag->next) || 660 skb_headroom(frag) < hlen) 661 goto slow_path_clean; 662 663 /* Partially cloned skb? */ 664 if (skb_shared(frag)) 665 goto slow_path_clean; 666 667 BUG_ON(frag->sk); 668 if (skb->sk) { 669 frag->sk = skb->sk; 670 frag->destructor = sock_wfree; 671 } 672 skb->truesize -= frag->truesize; 673 } 674 675 err = 0; 676 offset = 0; 677 frag = skb_shinfo(skb)->frag_list; 678 skb_frag_list_init(skb); 679 /* BUILD HEADER */ 680 681 *prevhdr = NEXTHDR_FRAGMENT; 682 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 683 if (!tmp_hdr) { 684 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 685 IPSTATS_MIB_FRAGFAILS); 686 return -ENOMEM; 687 } 688 689 __skb_pull(skb, hlen); 690 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 691 __skb_push(skb, hlen); 692 skb_reset_network_header(skb); 693 memcpy(skb_network_header(skb), tmp_hdr, hlen); 694 695 ipv6_select_ident(fh); 696 fh->nexthdr = nexthdr; 697 fh->reserved = 0; 698 fh->frag_off = htons(IP6_MF); 699 frag_id = fh->identification; 700 701 first_len = skb_pagelen(skb); 702 skb->data_len = first_len - skb_headlen(skb); 703 skb->len = first_len; 704 ipv6_hdr(skb)->payload_len = htons(first_len - 705 sizeof(struct ipv6hdr)); 706 707 dst_hold(&rt->dst); 708 709 for (;;) { 710 /* Prepare header of the next frame, 711 * before previous one went down. */ 712 if (frag) { 713 frag->ip_summed = CHECKSUM_NONE; 714 skb_reset_transport_header(frag); 715 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 716 __skb_push(frag, hlen); 717 skb_reset_network_header(frag); 718 memcpy(skb_network_header(frag), tmp_hdr, 719 hlen); 720 offset += skb->len - hlen - sizeof(struct frag_hdr); 721 fh->nexthdr = nexthdr; 722 fh->reserved = 0; 723 fh->frag_off = htons(offset); 724 if (frag->next != NULL) 725 fh->frag_off |= htons(IP6_MF); 726 fh->identification = frag_id; 727 ipv6_hdr(frag)->payload_len = 728 htons(frag->len - 729 sizeof(struct ipv6hdr)); 730 ip6_copy_metadata(frag, skb); 731 } 732 733 err = output(skb); 734 if(!err) 735 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 736 IPSTATS_MIB_FRAGCREATES); 737 738 if (err || !frag) 739 break; 740 741 skb = frag; 742 frag = skb->next; 743 skb->next = NULL; 744 } 745 746 kfree(tmp_hdr); 747 748 if (err == 0) { 749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 750 IPSTATS_MIB_FRAGOKS); 751 dst_release(&rt->dst); 752 return 0; 753 } 754 755 while (frag) { 756 skb = frag->next; 757 kfree_skb(frag); 758 frag = skb; 759 } 760 761 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 762 IPSTATS_MIB_FRAGFAILS); 763 dst_release(&rt->dst); 764 return err; 765 766slow_path_clean: 767 skb_walk_frags(skb, frag2) { 768 if (frag2 == frag) 769 break; 770 frag2->sk = NULL; 771 frag2->destructor = NULL; 772 skb->truesize += frag2->truesize; 773 } 774 } 775 776slow_path: 777 left = skb->len - hlen; /* Space per frame */ 778 ptr = hlen; /* Where to start from */ 779 780 /* 781 * Fragment the datagram. 782 */ 783 784 *prevhdr = NEXTHDR_FRAGMENT; 785 786 /* 787 * Keep copying data until we run out. 788 */ 789 while(left > 0) { 790 len = left; 791 /* IF: it doesn't fit, use 'mtu' - the data space left */ 792 if (len > mtu) 793 len = mtu; 794 /* IF: we are not sending upto and including the packet end 795 then align the next start on an eight byte boundary */ 796 if (len < left) { 797 len &= ~7; 798 } 799 /* 800 * Allocate buffer. 801 */ 802 803 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { 804 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 805 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 806 IPSTATS_MIB_FRAGFAILS); 807 err = -ENOMEM; 808 goto fail; 809 } 810 811 /* 812 * Set up data on packet 813 */ 814 815 ip6_copy_metadata(frag, skb); 816 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); 817 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 818 skb_reset_network_header(frag); 819 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 820 frag->transport_header = (frag->network_header + hlen + 821 sizeof(struct frag_hdr)); 822 823 /* 824 * Charge the memory for the fragment to any owner 825 * it might possess 826 */ 827 if (skb->sk) 828 skb_set_owner_w(frag, skb->sk); 829 830 /* 831 * Copy the packet header into the new buffer. 832 */ 833 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 834 835 /* 836 * Build fragment header. 837 */ 838 fh->nexthdr = nexthdr; 839 fh->reserved = 0; 840 if (!frag_id) { 841 ipv6_select_ident(fh); 842 frag_id = fh->identification; 843 } else 844 fh->identification = frag_id; 845 846 /* 847 * Copy a block of the IP datagram. 848 */ 849 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 850 BUG(); 851 left -= len; 852 853 fh->frag_off = htons(offset); 854 if (left > 0) 855 fh->frag_off |= htons(IP6_MF); 856 ipv6_hdr(frag)->payload_len = htons(frag->len - 857 sizeof(struct ipv6hdr)); 858 859 ptr += len; 860 offset += len; 861 862 /* 863 * Put this fragment into the sending queue. 864 */ 865 err = output(frag); 866 if (err) 867 goto fail; 868 869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 870 IPSTATS_MIB_FRAGCREATES); 871 } 872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 873 IPSTATS_MIB_FRAGOKS); 874 kfree_skb(skb); 875 return err; 876 877fail: 878 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 879 IPSTATS_MIB_FRAGFAILS); 880 kfree_skb(skb); 881 return err; 882} 883 884static inline int ip6_rt_check(struct rt6key *rt_key, 885 struct in6_addr *fl_addr, 886 struct in6_addr *addr_cache) 887{ 888 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 889 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 890} 891 892static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 893 struct dst_entry *dst, 894 struct flowi *fl) 895{ 896 struct ipv6_pinfo *np = inet6_sk(sk); 897 struct rt6_info *rt = (struct rt6_info *)dst; 898 899 if (!dst) 900 goto out; 901 902 /* Yes, checking route validity in not connected 903 * case is not very simple. Take into account, 904 * that we do not support routing by source, TOS, 905 * and MSG_DONTROUTE --ANK (980726) 906 * 907 * 1. ip6_rt_check(): If route was host route, 908 * check that cached destination is current. 909 * If it is network route, we still may 910 * check its validity using saved pointer 911 * to the last used address: daddr_cache. 912 * We do not want to save whole address now, 913 * (because main consumer of this service 914 * is tcp, which has not this problem), 915 * so that the last trick works only on connected 916 * sockets. 917 * 2. oif also should be the same. 918 */ 919 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 920#ifdef CONFIG_IPV6_SUBTREES 921 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 922#endif 923 (fl->oif && fl->oif != dst->dev->ifindex)) { 924 dst_release(dst); 925 dst = NULL; 926 } 927 928out: 929 return dst; 930} 931 932static int ip6_dst_lookup_tail(struct sock *sk, 933 struct dst_entry **dst, struct flowi *fl) 934{ 935 int err; 936 struct net *net = sock_net(sk); 937 938 if (*dst == NULL) 939 *dst = ip6_route_output(net, sk, fl); 940 941 if ((err = (*dst)->error)) 942 goto out_err_release; 943 944 if (ipv6_addr_any(&fl->fl6_src)) { 945 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 946 &fl->fl6_dst, 947 sk ? inet6_sk(sk)->srcprefs : 0, 948 &fl->fl6_src); 949 if (err) 950 goto out_err_release; 951 } 952 953#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 954 /* 955 * Here if the dst entry we've looked up 956 * has a neighbour entry that is in the INCOMPLETE 957 * state and the src address from the flow is 958 * marked as OPTIMISTIC, we release the found 959 * dst entry and replace it instead with the 960 * dst entry of the nexthop router 961 */ 962 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 963 struct inet6_ifaddr *ifp; 964 struct flowi fl_gw; 965 int redirect; 966 967 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 968 (*dst)->dev, 1); 969 970 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 971 if (ifp) 972 in6_ifa_put(ifp); 973 974 if (redirect) { 975 /* 976 * We need to get the dst entry for the 977 * default router instead 978 */ 979 dst_release(*dst); 980 memcpy(&fl_gw, fl, sizeof(struct flowi)); 981 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 982 *dst = ip6_route_output(net, sk, &fl_gw); 983 if ((err = (*dst)->error)) 984 goto out_err_release; 985 } 986 } 987#endif 988 989 return 0; 990 991out_err_release: 992 if (err == -ENETUNREACH) 993 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 994 dst_release(*dst); 995 *dst = NULL; 996 return err; 997} 998 999/** 1000 * ip6_dst_lookup - perform route lookup on flow 1001 * @sk: socket which provides route info 1002 * @dst: pointer to dst_entry * for result 1003 * @fl: flow to lookup 1004 * 1005 * This function performs a route lookup on the given flow. 1006 * 1007 * It returns zero on success, or a standard errno code on error. 1008 */ 1009int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1010{ 1011 *dst = NULL; 1012 return ip6_dst_lookup_tail(sk, dst, fl); 1013} 1014EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1015 1016/** 1017 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1018 * @sk: socket which provides the dst cache and route info 1019 * @dst: pointer to dst_entry * for result 1020 * @fl: flow to lookup 1021 * 1022 * This function performs a route lookup on the given flow with the 1023 * possibility of using the cached route in the socket if it is valid. 1024 * It will take the socket dst lock when operating on the dst cache. 1025 * As a result, this function can only be used in process context. 1026 * 1027 * It returns zero on success, or a standard errno code on error. 1028 */ 1029int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1030{ 1031 *dst = NULL; 1032 if (sk) { 1033 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1034 *dst = ip6_sk_dst_check(sk, *dst, fl); 1035 } 1036 1037 return ip6_dst_lookup_tail(sk, dst, fl); 1038} 1039EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1040 1041static inline int ip6_ufo_append_data(struct sock *sk, 1042 int getfrag(void *from, char *to, int offset, int len, 1043 int odd, struct sk_buff *skb), 1044 void *from, int length, int hh_len, int fragheaderlen, 1045 int transhdrlen, int mtu,unsigned int flags) 1046 1047{ 1048 struct sk_buff *skb; 1049 int err; 1050 1051 /* There is support for UDP large send offload by network 1052 * device, so create one single skb packet containing complete 1053 * udp datagram 1054 */ 1055 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1056 skb = sock_alloc_send_skb(sk, 1057 hh_len + fragheaderlen + transhdrlen + 20, 1058 (flags & MSG_DONTWAIT), &err); 1059 if (skb == NULL) 1060 return -ENOMEM; 1061 1062 /* reserve space for Hardware header */ 1063 skb_reserve(skb, hh_len); 1064 1065 /* create space for UDP/IP header */ 1066 skb_put(skb,fragheaderlen + transhdrlen); 1067 1068 /* initialize network header pointer */ 1069 skb_reset_network_header(skb); 1070 1071 /* initialize protocol header pointer */ 1072 skb->transport_header = skb->network_header + fragheaderlen; 1073 1074 skb->ip_summed = CHECKSUM_PARTIAL; 1075 skb->csum = 0; 1076 sk->sk_sndmsg_off = 0; 1077 } 1078 1079 err = skb_append_datato_frags(sk,skb, getfrag, from, 1080 (length - transhdrlen)); 1081 if (!err) { 1082 struct frag_hdr fhdr; 1083 1084 /* Specify the length of each IPv6 datagram fragment. 1085 * It has to be a multiple of 8. 1086 */ 1087 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1088 sizeof(struct frag_hdr)) & ~7; 1089 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1090 ipv6_select_ident(&fhdr); 1091 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1092 __skb_queue_tail(&sk->sk_write_queue, skb); 1093 1094 return 0; 1095 } 1096 /* There is not enough support do UPD LSO, 1097 * so follow normal path 1098 */ 1099 kfree_skb(skb); 1100 1101 return err; 1102} 1103 1104static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1105 gfp_t gfp) 1106{ 1107 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1108} 1109 1110static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1111 gfp_t gfp) 1112{ 1113 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1114} 1115 1116int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1117 int offset, int len, int odd, struct sk_buff *skb), 1118 void *from, int length, int transhdrlen, 1119 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1120 struct rt6_info *rt, unsigned int flags, int dontfrag) 1121{ 1122 struct inet_sock *inet = inet_sk(sk); 1123 struct ipv6_pinfo *np = inet6_sk(sk); 1124 struct sk_buff *skb; 1125 unsigned int maxfraglen, fragheaderlen; 1126 int exthdrlen; 1127 int hh_len; 1128 int mtu; 1129 int copy; 1130 int err; 1131 int offset = 0; 1132 int csummode = CHECKSUM_NONE; 1133 1134 if (flags&MSG_PROBE) 1135 return 0; 1136 if (skb_queue_empty(&sk->sk_write_queue)) { 1137 /* 1138 * setup for corking 1139 */ 1140 if (opt) { 1141 if (WARN_ON(np->cork.opt)) 1142 return -EINVAL; 1143 1144 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1145 if (unlikely(np->cork.opt == NULL)) 1146 return -ENOBUFS; 1147 1148 np->cork.opt->tot_len = opt->tot_len; 1149 np->cork.opt->opt_flen = opt->opt_flen; 1150 np->cork.opt->opt_nflen = opt->opt_nflen; 1151 1152 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1153 sk->sk_allocation); 1154 if (opt->dst0opt && !np->cork.opt->dst0opt) 1155 return -ENOBUFS; 1156 1157 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1158 sk->sk_allocation); 1159 if (opt->dst1opt && !np->cork.opt->dst1opt) 1160 return -ENOBUFS; 1161 1162 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1163 sk->sk_allocation); 1164 if (opt->hopopt && !np->cork.opt->hopopt) 1165 return -ENOBUFS; 1166 1167 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1168 sk->sk_allocation); 1169 if (opt->srcrt && !np->cork.opt->srcrt) 1170 return -ENOBUFS; 1171 1172 /* need source address above miyazawa*/ 1173 } 1174 dst_hold(&rt->dst); 1175 inet->cork.dst = &rt->dst; 1176 inet->cork.fl = *fl; 1177 np->cork.hop_limit = hlimit; 1178 np->cork.tclass = tclass; 1179 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1180 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1181 if (np->frag_size < mtu) { 1182 if (np->frag_size) 1183 mtu = np->frag_size; 1184 } 1185 inet->cork.fragsize = mtu; 1186 if (dst_allfrag(rt->dst.path)) 1187 inet->cork.flags |= IPCORK_ALLFRAG; 1188 inet->cork.length = 0; 1189 sk->sk_sndmsg_page = NULL; 1190 sk->sk_sndmsg_off = 0; 1191 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - 1192 rt->rt6i_nfheader_len; 1193 length += exthdrlen; 1194 transhdrlen += exthdrlen; 1195 } else { 1196 rt = (struct rt6_info *)inet->cork.dst; 1197 fl = &inet->cork.fl; 1198 opt = np->cork.opt; 1199 transhdrlen = 0; 1200 exthdrlen = 0; 1201 mtu = inet->cork.fragsize; 1202 } 1203 1204 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1205 1206 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1207 (opt ? opt->opt_nflen : 0); 1208 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1209 1210 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1211 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1212 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1213 return -EMSGSIZE; 1214 } 1215 } 1216 1217 1218 inet->cork.length += length; 1219 if (length > mtu) { 1220 int proto = sk->sk_protocol; 1221 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1222 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); 1223 return -EMSGSIZE; 1224 } 1225 1226 if (proto == IPPROTO_UDP && 1227 (rt->dst.dev->features & NETIF_F_UFO)) { 1228 1229 err = ip6_ufo_append_data(sk, getfrag, from, length, 1230 hh_len, fragheaderlen, 1231 transhdrlen, mtu, flags); 1232 if (err) 1233 goto error; 1234 return 0; 1235 } 1236 } 1237 1238 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1239 goto alloc_new_skb; 1240 1241 while (length > 0) { 1242 /* Check if the remaining data fits into current packet. */ 1243 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1244 if (copy < length) 1245 copy = maxfraglen - skb->len; 1246 1247 if (copy <= 0) { 1248 char *data; 1249 unsigned int datalen; 1250 unsigned int fraglen; 1251 unsigned int fraggap; 1252 unsigned int alloclen; 1253 struct sk_buff *skb_prev; 1254alloc_new_skb: 1255 skb_prev = skb; 1256 1257 /* There's no room in the current skb */ 1258 if (skb_prev) 1259 fraggap = skb_prev->len - maxfraglen; 1260 else 1261 fraggap = 0; 1262 1263 /* 1264 * If remaining data exceeds the mtu, 1265 * we know we need more fragment(s). 1266 */ 1267 datalen = length + fraggap; 1268 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1269 datalen = maxfraglen - fragheaderlen; 1270 1271 fraglen = datalen + fragheaderlen; 1272 if ((flags & MSG_MORE) && 1273 !(rt->dst.dev->features&NETIF_F_SG)) 1274 alloclen = mtu; 1275 else 1276 alloclen = datalen + fragheaderlen; 1277 1278 /* 1279 * The last fragment gets additional space at tail. 1280 * Note: we overallocate on fragments with MSG_MODE 1281 * because we have no idea if we're the last one. 1282 */ 1283 if (datalen == length + fraggap) 1284 alloclen += rt->dst.trailer_len; 1285 1286 /* 1287 * We just reserve space for fragment header. 1288 * Note: this may be overallocation if the message 1289 * (without MSG_MORE) fits into the MTU. 1290 */ 1291 alloclen += sizeof(struct frag_hdr); 1292 1293 if (transhdrlen) { 1294 skb = sock_alloc_send_skb(sk, 1295 alloclen + hh_len, 1296 (flags & MSG_DONTWAIT), &err); 1297 } else { 1298 skb = NULL; 1299 if (atomic_read(&sk->sk_wmem_alloc) <= 1300 2 * sk->sk_sndbuf) 1301 skb = sock_wmalloc(sk, 1302 alloclen + hh_len, 1, 1303 sk->sk_allocation); 1304 if (unlikely(skb == NULL)) 1305 err = -ENOBUFS; 1306 } 1307 if (skb == NULL) 1308 goto error; 1309 /* 1310 * Fill in the control structures 1311 */ 1312 skb->ip_summed = csummode; 1313 skb->csum = 0; 1314 /* reserve for fragmentation */ 1315 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1316 1317 /* 1318 * Find where to start putting bytes 1319 */ 1320 data = skb_put(skb, fraglen); 1321 skb_set_network_header(skb, exthdrlen); 1322 data += fragheaderlen; 1323 skb->transport_header = (skb->network_header + 1324 fragheaderlen); 1325 if (fraggap) { 1326 skb->csum = skb_copy_and_csum_bits( 1327 skb_prev, maxfraglen, 1328 data + transhdrlen, fraggap, 0); 1329 skb_prev->csum = csum_sub(skb_prev->csum, 1330 skb->csum); 1331 data += fraggap; 1332 pskb_trim_unique(skb_prev, maxfraglen); 1333 } 1334 copy = datalen - transhdrlen - fraggap; 1335 if (copy < 0) { 1336 err = -EINVAL; 1337 kfree_skb(skb); 1338 goto error; 1339 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1340 err = -EFAULT; 1341 kfree_skb(skb); 1342 goto error; 1343 } 1344 1345 offset += copy; 1346 length -= datalen - fraggap; 1347 transhdrlen = 0; 1348 exthdrlen = 0; 1349 csummode = CHECKSUM_NONE; 1350 1351 /* 1352 * Put the packet on the pending queue 1353 */ 1354 __skb_queue_tail(&sk->sk_write_queue, skb); 1355 continue; 1356 } 1357 1358 if (copy > length) 1359 copy = length; 1360 1361 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1362 unsigned int off; 1363 1364 off = skb->len; 1365 if (getfrag(from, skb_put(skb, copy), 1366 offset, copy, off, skb) < 0) { 1367 __skb_trim(skb, off); 1368 err = -EFAULT; 1369 goto error; 1370 } 1371 } else { 1372 int i = skb_shinfo(skb)->nr_frags; 1373 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1374 struct page *page = sk->sk_sndmsg_page; 1375 int off = sk->sk_sndmsg_off; 1376 unsigned int left; 1377 1378 if (page && (left = PAGE_SIZE - off) > 0) { 1379 if (copy >= left) 1380 copy = left; 1381 if (page != frag->page) { 1382 if (i == MAX_SKB_FRAGS) { 1383 err = -EMSGSIZE; 1384 goto error; 1385 } 1386 get_page(page); 1387 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1388 frag = &skb_shinfo(skb)->frags[i]; 1389 } 1390 } else if(i < MAX_SKB_FRAGS) { 1391 if (copy > PAGE_SIZE) 1392 copy = PAGE_SIZE; 1393 page = alloc_pages(sk->sk_allocation, 0); 1394 if (page == NULL) { 1395 err = -ENOMEM; 1396 goto error; 1397 } 1398 sk->sk_sndmsg_page = page; 1399 sk->sk_sndmsg_off = 0; 1400 1401 skb_fill_page_desc(skb, i, page, 0, 0); 1402 frag = &skb_shinfo(skb)->frags[i]; 1403 } else { 1404 err = -EMSGSIZE; 1405 goto error; 1406 } 1407 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1408 err = -EFAULT; 1409 goto error; 1410 } 1411 sk->sk_sndmsg_off += copy; 1412 frag->size += copy; 1413 skb->len += copy; 1414 skb->data_len += copy; 1415 skb->truesize += copy; 1416 atomic_add(copy, &sk->sk_wmem_alloc); 1417 } 1418 offset += copy; 1419 length -= copy; 1420 } 1421 return 0; 1422error: 1423 inet->cork.length -= length; 1424 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1425 return err; 1426} 1427 1428static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1429{ 1430 if (np->cork.opt) { 1431 kfree(np->cork.opt->dst0opt); 1432 kfree(np->cork.opt->dst1opt); 1433 kfree(np->cork.opt->hopopt); 1434 kfree(np->cork.opt->srcrt); 1435 kfree(np->cork.opt); 1436 np->cork.opt = NULL; 1437 } 1438 1439 if (inet->cork.dst) { 1440 dst_release(inet->cork.dst); 1441 inet->cork.dst = NULL; 1442 inet->cork.flags &= ~IPCORK_ALLFRAG; 1443 } 1444 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1445} 1446 1447int ip6_push_pending_frames(struct sock *sk) 1448{ 1449 struct sk_buff *skb, *tmp_skb; 1450 struct sk_buff **tail_skb; 1451 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1452 struct inet_sock *inet = inet_sk(sk); 1453 struct ipv6_pinfo *np = inet6_sk(sk); 1454 struct net *net = sock_net(sk); 1455 struct ipv6hdr *hdr; 1456 struct ipv6_txoptions *opt = np->cork.opt; 1457 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1458 struct flowi *fl = &inet->cork.fl; 1459 unsigned char proto = fl->proto; 1460 int err = 0; 1461 1462 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1463 goto out; 1464 tail_skb = &(skb_shinfo(skb)->frag_list); 1465 1466 /* move skb->data to ip header from ext header */ 1467 if (skb->data < skb_network_header(skb)) 1468 __skb_pull(skb, skb_network_offset(skb)); 1469 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1470 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1471 *tail_skb = tmp_skb; 1472 tail_skb = &(tmp_skb->next); 1473 skb->len += tmp_skb->len; 1474 skb->data_len += tmp_skb->len; 1475 skb->truesize += tmp_skb->truesize; 1476 tmp_skb->destructor = NULL; 1477 tmp_skb->sk = NULL; 1478 } 1479 1480 /* Allow local fragmentation. */ 1481 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1482 skb->local_df = 1; 1483 1484 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1485 __skb_pull(skb, skb_network_header_len(skb)); 1486 if (opt && opt->opt_flen) 1487 ipv6_push_frag_opts(skb, opt, &proto); 1488 if (opt && opt->opt_nflen) 1489 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1490 1491 skb_push(skb, sizeof(struct ipv6hdr)); 1492 skb_reset_network_header(skb); 1493 hdr = ipv6_hdr(skb); 1494 1495 *(__be32*)hdr = fl->fl6_flowlabel | 1496 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1497 1498 hdr->hop_limit = np->cork.hop_limit; 1499 hdr->nexthdr = proto; 1500 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1501 ipv6_addr_copy(&hdr->daddr, final_dst); 1502 1503 skb->priority = sk->sk_priority; 1504 skb->mark = sk->sk_mark; 1505 1506 skb_dst_set(skb, dst_clone(&rt->dst)); 1507 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1508 if (proto == IPPROTO_ICMPV6) { 1509 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1510 1511 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1512 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1513 } 1514 1515 err = ip6_local_out(skb); 1516 if (err) { 1517 if (err > 0) 1518 err = net_xmit_errno(err); 1519 if (err) 1520 goto error; 1521 } 1522 1523out: 1524 ip6_cork_release(inet, np); 1525 return err; 1526error: 1527 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1528 goto out; 1529} 1530 1531void ip6_flush_pending_frames(struct sock *sk) 1532{ 1533 struct sk_buff *skb; 1534 1535 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1536 if (skb_dst(skb)) 1537 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1538 IPSTATS_MIB_OUTDISCARDS); 1539 kfree_skb(skb); 1540 } 1541 1542 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1543} 1544