1/* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.1.1.1 2008/10/15 03:27:33 james26_jang Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load balancer bugs. 11 * Dave Platt : Interrupt stacking fix. 12 * Richard Kooijman : Timestamp fixes. 13 * Alan Cox : Changed buffer format. 14 * Alan Cox : destructor hook for AF_UNIX etc. 15 * Linus Torvalds : Better skb_clone. 16 * Alan Cox : Added skb_copy. 17 * Alan Cox : Added all the changed routines Linus 18 * only put in the headers 19 * Ray VanTassle : Fixed --skb->lock in free 20 * Alan Cox : skb_copy copy arp field 21 * Andi Kleen : slabified it. 22 * 23 * NOTE: 24 * The __skb_ routines should be called with interrupts 25 * disabled, or you better be *real* sure that the operation is atomic 26 * with respect to whatever list is being frobbed (e.g. via lock_sock() 27 * or via disabling bottom half handlers, etc). 28 * 29 * This program is free software; you can redistribute it and/or 30 * modify it under the terms of the GNU General Public License 31 * as published by the Free Software Foundation; either version 32 * 2 of the License, or (at your option) any later version. 33 */ 34 35/* 36 * The functions in this file will not compile correctly with gcc 2.4.x 37 */ 38 39#include <linux/config.h> 40#include <linux/types.h> 41#include <linux/kernel.h> 42#include <linux/sched.h> 43#include <linux/mm.h> 44#include <linux/interrupt.h> 45#include <linux/in.h> 46#include <linux/inet.h> 47#include <linux/slab.h> 48#include <linux/netdevice.h> 49#include <linux/string.h> 50#include <linux/skbuff.h> 51#include <linux/cache.h> 52#include <linux/rtnetlink.h> 53#include <linux/init.h> 54#include <linux/highmem.h> 55 56#include <net/protocol.h> 57#include <net/dst.h> 58#include <net/sock.h> 59#include <net/checksum.h> 60 61#include <asm/uaccess.h> 62#include <asm/system.h> 63 64int sysctl_hot_list_len = 128; 65 66static kmem_cache_t *skbuff_head_cache; 67 68static union { 69 struct sk_buff_head list; 70 char pad[SMP_CACHE_BYTES]; 71} skb_head_pool[NR_CPUS]; 72 73/* 74 * Keep out-of-line to prevent kernel bloat. 75 * __builtin_return_address is not used because it is not always 76 * reliable. 77 */ 78 79/** 80 * skb_over_panic - private function 81 * @skb: buffer 82 * @sz: size 83 * @here: address 84 * 85 * Out of line support code for skb_put(). Not user callable. 86 */ 87 88void skb_over_panic(struct sk_buff *skb, int sz, void *here) 89{ 90 printk("skput:over: %p:%d put:%d dev:%s", 91 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 92 BUG(); 93} 94 95/** 96 * skb_under_panic - private function 97 * @skb: buffer 98 * @sz: size 99 * @here: address 100 * 101 * Out of line support code for skb_push(). Not user callable. 102 */ 103 104 105void skb_under_panic(struct sk_buff *skb, int sz, void *here) 106{ 107 printk("skput:under: %p:%d put:%d dev:%s", 108 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 109 BUG(); 110} 111 112static __inline__ struct sk_buff *skb_head_from_pool(void) 113{ 114 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; 115 116 if (skb_queue_len(list)) { 117 struct sk_buff *skb; 118 unsigned long flags; 119 120 local_irq_save(flags); 121 skb = __skb_dequeue(list); 122 local_irq_restore(flags); 123 return skb; 124 } 125 return NULL; 126} 127 128static __inline__ void skb_head_to_pool(struct sk_buff *skb) 129{ 130 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; 131 132 if (skb_queue_len(list) < sysctl_hot_list_len) { 133 unsigned long flags; 134 135 local_irq_save(flags); 136 __skb_queue_head(list, skb); 137 local_irq_restore(flags); 138 139 return; 140 } 141 kmem_cache_free(skbuff_head_cache, skb); 142} 143 144 145/* Allocate a new skbuff. We do this ourselves so we can fill in a few 146 * 'private' fields and also do memory statistics to find all the 147 * [BEEP] leaks. 148 * 149 */ 150 151/** 152 * alloc_skb - allocate a network buffer 153 * @size: size to allocate 154 * @gfp_mask: allocation mask 155 * 156 * Allocate a new &sk_buff. The returned buffer has no headroom and a 157 * tail room of size bytes. The object has a reference count of one. 158 * The return is the buffer. On a failure the return is %NULL. 159 * 160 * Buffers may only be allocated from interrupts using a @gfp_mask of 161 * %GFP_ATOMIC. 162 */ 163 164struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) 165{ 166 struct sk_buff *skb; 167 u8 *data; 168 169 if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { 170 static int count = 0; 171 if (++count < 5) { 172 printk(KERN_ERR "alloc_skb called nonatomically " 173 "from interrupt %p\n", NET_CALLER(size)); 174 BUG(); 175 } 176 gfp_mask &= ~__GFP_WAIT; 177 } 178 179 /* Get the HEAD */ 180 skb = skb_head_from_pool(); 181 if (skb == NULL) { 182 skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); 183 if (skb == NULL) 184 goto nohead; 185 skb->next = skb->prev = NULL; 186 } 187 188 /* Get the DATA. Size must match skb_add_mtu(). */ 189 size = SKB_DATA_ALIGN(size); 190 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 191 if (data == NULL) 192 goto nodata; 193 194 skb->truesize = size + sizeof(struct sk_buff); 195 196 /* Load the data pointers. */ 197 skb->head = data; 198 skb->data = data; 199 skb->tail = data; 200 skb->end = data + size; 201 202 /* Set up other state */ 203 skb->len = 0; 204 skb->cloned = 0; 205 skb->data_len = 0; 206 207 atomic_set(&skb->users, 1); 208 atomic_set(&(skb_shinfo(skb)->dataref), 1); 209 skb_shinfo(skb)->nr_frags = 0; 210 skb_shinfo(skb)->frag_list = NULL; 211 return skb; 212 213nodata: 214 skb_head_to_pool(skb); 215nohead: 216 return NULL; 217} 218 219 220/* 221 * Slab constructor for a skb head. 222 */ 223static inline void skb_headerinit(void *p, kmem_cache_t *cache, 224 unsigned long flags) 225{ 226 struct sk_buff *skb = p; 227 228 skb->next = NULL; 229 skb->prev = NULL; 230 skb->list = NULL; 231 skb->sk = NULL; 232 skb->stamp.tv_sec=0; /* No idea about time */ 233 skb->dev = NULL; 234 skb->dst = NULL; 235 memset(skb->cb, 0, sizeof(skb->cb)); 236 skb->pkt_type = PACKET_HOST; /* Default type */ 237 skb->ip_summed = 0; 238 skb->priority = 0; 239 skb->security = 0; /* By default packets are insecure */ 240 skb->destructor = NULL; 241 242#ifdef CONFIG_NETFILTER 243 skb->nfmark = skb->nfcache = 0; 244 skb->nfct = NULL; 245#ifdef CONFIG_NETFILTER_DEBUG 246 skb->nf_debug = 0; 247#endif 248#endif 249#ifdef CONFIG_NET_SCHED 250 skb->tc_index = 0; 251#endif 252} 253 254static void skb_drop_fraglist(struct sk_buff *skb) 255{ 256 struct sk_buff *list = skb_shinfo(skb)->frag_list; 257 258 skb_shinfo(skb)->frag_list = NULL; 259 260 do { 261 struct sk_buff *this = list; 262 list = list->next; 263 kfree_skb(this); 264 } while (list); 265} 266 267static void skb_clone_fraglist(struct sk_buff *skb) 268{ 269 struct sk_buff *list; 270 271 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) 272 skb_get(list); 273} 274 275static void skb_release_data(struct sk_buff *skb) 276{ 277 if (!skb->cloned || 278 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { 279 if (skb_shinfo(skb)->nr_frags) { 280 int i; 281 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 282 put_page(skb_shinfo(skb)->frags[i].page); 283 } 284 285 if (skb_shinfo(skb)->frag_list) 286 skb_drop_fraglist(skb); 287 288 kfree(skb->head); 289 } 290} 291 292/* 293 * Free an skbuff by memory without cleaning the state. 294 */ 295void kfree_skbmem(struct sk_buff *skb) 296{ 297 skb_release_data(skb); 298 skb_head_to_pool(skb); 299} 300 301/** 302 * __kfree_skb - private function 303 * @skb: buffer 304 * 305 * Free an sk_buff. Release anything attached to the buffer. 306 * Clean the state. This is an internal helper function. Users should 307 * always call kfree_skb 308 */ 309 310void __kfree_skb(struct sk_buff *skb) 311{ 312 if (skb->list) { 313 printk(KERN_WARNING "Warning: kfree_skb passed an skb still " 314 "on a list (from %p).\n", NET_CALLER(skb)); 315 BUG(); 316 } 317 318 dst_release(skb->dst); 319 if(skb->destructor) { 320 if (in_irq()) { 321 printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", 322 NET_CALLER(skb)); 323 } 324 skb->destructor(skb); 325 } 326#ifdef CONFIG_NETFILTER 327 nf_conntrack_put(skb->nfct); 328#endif 329 skb_headerinit(skb, NULL, 0); /* clean state */ 330 kfree_skbmem(skb); 331} 332 333/** 334 * skb_clone - duplicate an sk_buff 335 * @skb: buffer to clone 336 * @gfp_mask: allocation priority 337 * 338 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 339 * copies share the same packet data but not structure. The new 340 * buffer has a reference count of 1. If the allocation fails the 341 * function returns %NULL otherwise the new buffer is returned. 342 * 343 * If this function is called from an interrupt gfp_mask() must be 344 * %GFP_ATOMIC. 345 */ 346 347struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 348{ 349 struct sk_buff *n; 350 351 n = skb_head_from_pool(); 352 if (!n) { 353 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 354 if (!n) 355 return NULL; 356 } 357 358#define C(x) n->x = skb->x 359 360 n->next = n->prev = NULL; 361 n->list = NULL; 362 n->sk = NULL; 363 C(stamp); 364 C(dev); 365 C(h); 366 C(nh); 367 C(mac); 368 C(dst); 369 dst_clone(n->dst); 370 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 371 C(len); 372 C(data_len); 373 C(csum); 374 n->cloned = 1; 375 C(pkt_type); 376 C(ip_summed); 377 C(priority); 378 atomic_set(&n->users, 1); 379 C(protocol); 380 C(security); 381 C(truesize); 382 C(head); 383 C(data); 384 C(tail); 385 C(end); 386 n->destructor = NULL; 387#ifdef CONFIG_NETFILTER 388 C(nfmark); 389 C(nfcache); 390 C(nfct); 391#ifdef CONFIG_NETFILTER_DEBUG 392 C(nf_debug); 393#endif 394#endif /*CONFIG_NETFILTER*/ 395#if defined(CONFIG_HIPPI) 396 C(private); 397#endif 398#ifdef CONFIG_NET_SCHED 399 C(tc_index); 400#endif 401 402 atomic_inc(&(skb_shinfo(skb)->dataref)); 403 skb->cloned = 1; 404#ifdef CONFIG_NETFILTER 405 nf_conntrack_get(skb->nfct); 406#endif 407 return n; 408} 409 410static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 411{ 412 /* 413 * Shift between the two data areas in bytes 414 */ 415 unsigned long offset = new->data - old->data; 416 417 new->list=NULL; 418 new->sk=NULL; 419 new->dev=old->dev; 420 new->priority=old->priority; 421 new->protocol=old->protocol; 422 new->dst=dst_clone(old->dst); 423 new->h.raw=old->h.raw+offset; 424 new->nh.raw=old->nh.raw+offset; 425 new->mac.raw=old->mac.raw+offset; 426 memcpy(new->cb, old->cb, sizeof(old->cb)); 427 atomic_set(&new->users, 1); 428 new->pkt_type=old->pkt_type; 429 new->stamp=old->stamp; 430 new->destructor = NULL; 431 new->security=old->security; 432#ifdef CONFIG_NETFILTER 433 new->nfmark=old->nfmark; 434 new->nfcache=old->nfcache; 435 new->nfct=old->nfct; 436 nf_conntrack_get(new->nfct); 437#ifdef CONFIG_NETFILTER_DEBUG 438 new->nf_debug=old->nf_debug; 439#endif 440#endif 441#ifdef CONFIG_NET_SCHED 442 new->tc_index = old->tc_index; 443#endif 444} 445 446/** 447 * skb_copy - create private copy of an sk_buff 448 * @skb: buffer to copy 449 * @gfp_mask: allocation priority 450 * 451 * Make a copy of both an &sk_buff and its data. This is used when the 452 * caller wishes to modify the data and needs a private copy of the 453 * data to alter. Returns %NULL on failure or the pointer to the buffer 454 * on success. The returned buffer has a reference count of 1. 455 * 456 * As by-product this function converts non-linear &sk_buff to linear 457 * one, so that &sk_buff becomes completely private and caller is allowed 458 * to modify all the data of returned buffer. This means that this 459 * function is not recommended for use in circumstances when only 460 * header is going to be modified. Use pskb_copy() instead. 461 */ 462 463struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 464{ 465 struct sk_buff *n; 466 int headerlen = skb->data-skb->head; 467 468 /* 469 * Allocate the copy buffer 470 */ 471 n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); 472 if(n==NULL) 473 return NULL; 474 475 /* Set the data pointer */ 476 skb_reserve(n,headerlen); 477 /* Set the tail pointer and length */ 478 skb_put(n,skb->len); 479 n->csum = skb->csum; 480 n->ip_summed = skb->ip_summed; 481 482 if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) 483 BUG(); 484 485 copy_skb_header(n, skb); 486 487 return n; 488} 489 490/* Keep head the same: replace data */ 491int skb_linearize(struct sk_buff *skb, int gfp_mask) 492{ 493 unsigned int size; 494 u8 *data; 495 long offset; 496 int headerlen = skb->data - skb->head; 497 int expand = (skb->tail+skb->data_len) - skb->end; 498 499 if (skb_shared(skb)) 500 BUG(); 501 502 if (expand <= 0) 503 expand = 0; 504 505 size = (skb->end - skb->head + expand); 506 size = SKB_DATA_ALIGN(size); 507 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 508 if (data == NULL) 509 return -ENOMEM; 510 511 /* Copy entire thing */ 512 if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) 513 BUG(); 514 515 /* Offset between the two in bytes */ 516 offset = data - skb->head; 517 518 /* Free old data. */ 519 skb_release_data(skb); 520 521 skb->head = data; 522 skb->end = data + size; 523 524 /* Set up new pointers */ 525 skb->h.raw += offset; 526 skb->nh.raw += offset; 527 skb->mac.raw += offset; 528 skb->tail += offset; 529 skb->data += offset; 530 531 /* Set up shinfo */ 532 atomic_set(&(skb_shinfo(skb)->dataref), 1); 533 skb_shinfo(skb)->nr_frags = 0; 534 skb_shinfo(skb)->frag_list = NULL; 535 536 /* We are no longer a clone, even if we were. */ 537 skb->cloned = 0; 538 539 skb->tail += skb->data_len; 540 skb->data_len = 0; 541 return 0; 542} 543 544 545/** 546 * pskb_copy - create copy of an sk_buff with private head. 547 * @skb: buffer to copy 548 * @gfp_mask: allocation priority 549 * 550 * Make a copy of both an &sk_buff and part of its data, located 551 * in header. Fragmented data remain shared. This is used when 552 * the caller wishes to modify only header of &sk_buff and needs 553 * private copy of the header to alter. Returns %NULL on failure 554 * or the pointer to the buffer on success. 555 * The returned buffer has a reference count of 1. 556 */ 557 558struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) 559{ 560 struct sk_buff *n; 561 562 /* 563 * Allocate the copy buffer 564 */ 565 n=alloc_skb(skb->end - skb->head, gfp_mask); 566 if(n==NULL) 567 return NULL; 568 569 /* Set the data pointer */ 570 skb_reserve(n,skb->data-skb->head); 571 /* Set the tail pointer and length */ 572 skb_put(n,skb_headlen(skb)); 573 /* Copy the bytes */ 574 memcpy(n->data, skb->data, n->len); 575 n->csum = skb->csum; 576 n->ip_summed = skb->ip_summed; 577 578 n->data_len = skb->data_len; 579 n->len = skb->len; 580 581 if (skb_shinfo(skb)->nr_frags) { 582 int i; 583 584 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 585 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 586 get_page(skb_shinfo(n)->frags[i].page); 587 } 588 skb_shinfo(n)->nr_frags = i; 589 } 590 591 if (skb_shinfo(skb)->frag_list) { 592 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 593 skb_clone_fraglist(n); 594 } 595 596 copy_skb_header(n, skb); 597 598 return n; 599} 600 601/** 602 * pskb_expand_head - reallocate header of &sk_buff 603 * @skb: buffer to reallocate 604 * @nhead: room to add at head 605 * @ntail: room to add at tail 606 * @gfp_mask: allocation priority 607 * 608 * Expands (or creates identical copy, if &nhead and &ntail are zero) 609 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 610 * reference count of 1. Returns zero in the case of success or error, 611 * if expansion failed. In the last case, &sk_buff is not changed. 612 * 613 * All the pointers pointing into skb header may change and must be 614 * reloaded after call to this function. 615 */ 616 617int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) 618{ 619 int i; 620 u8 *data; 621 int size = nhead + (skb->end - skb->head) + ntail; 622 long off; 623 624 if (skb_shared(skb)) 625 BUG(); 626 627 size = SKB_DATA_ALIGN(size); 628 629 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 630 if (data == NULL) 631 goto nodata; 632 633 /* Copy only real data... and, alas, header. This should be 634 * optimized for the cases when header is void. */ 635 memcpy(data+nhead, skb->head, skb->tail-skb->head); 636 memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); 637 638 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) 639 get_page(skb_shinfo(skb)->frags[i].page); 640 641 if (skb_shinfo(skb)->frag_list) 642 skb_clone_fraglist(skb); 643 644 skb_release_data(skb); 645 646 off = (data+nhead) - skb->head; 647 648 skb->head = data; 649 skb->end = data+size; 650 651 skb->data += off; 652 skb->tail += off; 653 skb->mac.raw += off; 654 skb->h.raw += off; 655 skb->nh.raw += off; 656 skb->cloned = 0; 657 atomic_set(&skb_shinfo(skb)->dataref, 1); 658 return 0; 659 660nodata: 661 return -ENOMEM; 662} 663 664/* Make private copy of skb with writable head and some headroom */ 665 666struct sk_buff * 667skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 668{ 669 struct sk_buff *skb2; 670 int delta = headroom - skb_headroom(skb); 671 672 if (delta <= 0) 673 return pskb_copy(skb, GFP_ATOMIC); 674 675 skb2 = skb_clone(skb, GFP_ATOMIC); 676 if (skb2 == NULL || 677 !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) 678 return skb2; 679 680 kfree_skb(skb2); 681 return NULL; 682} 683 684 685/** 686 * skb_copy_expand - copy and expand sk_buff 687 * @skb: buffer to copy 688 * @newheadroom: new free bytes at head 689 * @newtailroom: new free bytes at tail 690 * @gfp_mask: allocation priority 691 * 692 * Make a copy of both an &sk_buff and its data and while doing so 693 * allocate additional space. 694 * 695 * This is used when the caller wishes to modify the data and needs a 696 * private copy of the data to alter as well as more space for new fields. 697 * Returns %NULL on failure or the pointer to the buffer 698 * on success. The returned buffer has a reference count of 1. 699 * 700 * You must pass %GFP_ATOMIC as the allocation priority if this function 701 * is called from an interrupt. 702 */ 703 704 705struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 706 int newheadroom, 707 int newtailroom, 708 int gfp_mask) 709{ 710 struct sk_buff *n; 711 712 /* 713 * Allocate the copy buffer 714 */ 715 716 n=alloc_skb(newheadroom + skb->len + newtailroom, 717 gfp_mask); 718 if(n==NULL) 719 return NULL; 720 721 skb_reserve(n,newheadroom); 722 723 /* Set the tail pointer and length */ 724 skb_put(n,skb->len); 725 726 /* Copy the data only. */ 727 if (skb_copy_bits(skb, 0, n->data, skb->len)) 728 BUG(); 729 730 copy_skb_header(n, skb); 731 return n; 732} 733 734/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 735 * If realloc==0 and trimming is impossible without change of data, 736 * it is BUG(). 737 */ 738 739int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 740{ 741 int offset = skb_headlen(skb); 742 int nfrags = skb_shinfo(skb)->nr_frags; 743 int i; 744 745 for (i=0; i<nfrags; i++) { 746 int end = offset + skb_shinfo(skb)->frags[i].size; 747 if (end > len) { 748 if (skb_cloned(skb)) { 749 if (!realloc) 750 BUG(); 751 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 752 return -ENOMEM; 753 } 754 if (len <= offset) { 755 put_page(skb_shinfo(skb)->frags[i].page); 756 skb_shinfo(skb)->nr_frags--; 757 } else { 758 skb_shinfo(skb)->frags[i].size = len-offset; 759 } 760 } 761 offset = end; 762 } 763 764 if (offset < len) { 765 skb->data_len -= skb->len - len; 766 skb->len = len; 767 } else { 768 if (len <= skb_headlen(skb)) { 769 skb->len = len; 770 skb->data_len = 0; 771 skb->tail = skb->data + len; 772 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 773 skb_drop_fraglist(skb); 774 } else { 775 skb->data_len -= skb->len - len; 776 skb->len = len; 777 } 778 } 779 780 return 0; 781} 782 783/** 784 * __pskb_pull_tail - advance tail of skb header 785 * @skb: buffer to reallocate 786 * @delta: number of bytes to advance tail 787 * 788 * The function makes a sense only on a fragmented &sk_buff, 789 * it expands header moving its tail forward and copying necessary 790 * data from fragmented part. 791 * 792 * &sk_buff MUST have reference count of 1. 793 * 794 * Returns %NULL (and &sk_buff does not change) if pull failed 795 * or value of new tail of skb in the case of success. 796 * 797 * All the pointers pointing into skb header may change and must be 798 * reloaded after call to this function. 799 */ 800 801/* Moves tail of skb head forward, copying data from fragmented part, 802 * when it is necessary. 803 * 1. It may fail due to malloc failure. 804 * 2. It may change skb pointers. 805 * 806 * It is pretty complicated. Luckily, it is called only in exceptional cases. 807 */ 808unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) 809{ 810 int i, k, eat; 811 812 /* If skb has not enough free space at tail, get new one 813 * plus 128 bytes for future expansions. If we have enough 814 * room at tail, reallocate without expansion only if skb is cloned. 815 */ 816 eat = (skb->tail+delta) - skb->end; 817 818 if (eat > 0 || skb_cloned(skb)) { 819 if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) 820 return NULL; 821 } 822 823 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 824 BUG(); 825 826 /* Optimization: no fragments, no reasons to preestimate 827 * size of pulled pages. Superb. 828 */ 829 if (skb_shinfo(skb)->frag_list == NULL) 830 goto pull_pages; 831 832 /* Estimate size of pulled pages. */ 833 eat = delta; 834 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 835 if (skb_shinfo(skb)->frags[i].size >= eat) 836 goto pull_pages; 837 eat -= skb_shinfo(skb)->frags[i].size; 838 } 839 840 /* If we need update frag list, we are in troubles. 841 * Certainly, it possible to add an offset to skb data, 842 * but taking into account that pulling is expected to 843 * be very rare operation, it is worth to fight against 844 * further bloating skb head and crucify ourselves here instead. 845 * Pure masohism, indeed. 8)8) 846 */ 847 if (eat) { 848 struct sk_buff *list = skb_shinfo(skb)->frag_list; 849 struct sk_buff *clone = NULL; 850 struct sk_buff *insp = NULL; 851 852 do { 853 if (list == NULL) 854 BUG(); 855 856 if (list->len <= eat) { 857 /* Eaten as whole. */ 858 eat -= list->len; 859 list = list->next; 860 insp = list; 861 } else { 862 /* Eaten partially. */ 863 864 if (skb_shared(list)) { 865 /* Sucks! We need to fork list. :-( */ 866 clone = skb_clone(list, GFP_ATOMIC); 867 if (clone == NULL) 868 return NULL; 869 insp = list->next; 870 list = clone; 871 } else { 872 /* This may be pulled without 873 * problems. */ 874 insp = list; 875 } 876 if (pskb_pull(list, eat) == NULL) { 877 if (clone) 878 kfree_skb(clone); 879 return NULL; 880 } 881 break; 882 } 883 } while (eat); 884 885 /* Free pulled out fragments. */ 886 while ((list = skb_shinfo(skb)->frag_list) != insp) { 887 skb_shinfo(skb)->frag_list = list->next; 888 kfree_skb(list); 889 } 890 /* And insert new clone at head. */ 891 if (clone) { 892 clone->next = list; 893 skb_shinfo(skb)->frag_list = clone; 894 } 895 } 896 /* Success! Now we may commit changes to skb data. */ 897 898pull_pages: 899 eat = delta; 900 k = 0; 901 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 902 if (skb_shinfo(skb)->frags[i].size <= eat) { 903 put_page(skb_shinfo(skb)->frags[i].page); 904 eat -= skb_shinfo(skb)->frags[i].size; 905 } else { 906 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 907 if (eat) { 908 skb_shinfo(skb)->frags[k].page_offset += eat; 909 skb_shinfo(skb)->frags[k].size -= eat; 910 eat = 0; 911 } 912 k++; 913 } 914 } 915 skb_shinfo(skb)->nr_frags = k; 916 917 skb->tail += delta; 918 skb->data_len -= delta; 919 920 return skb->tail; 921} 922 923/* Copy some data bits from skb to kernel buffer. */ 924 925int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 926{ 927 int i, copy; 928 int start = skb->len - skb->data_len; 929 930 if (offset > (int)skb->len-len) 931 goto fault; 932 933 /* Copy header. */ 934 if ((copy = start-offset) > 0) { 935 if (copy > len) 936 copy = len; 937 memcpy(to, skb->data + offset, copy); 938 if ((len -= copy) == 0) 939 return 0; 940 offset += copy; 941 to += copy; 942 } 943 944 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 945 int end; 946 947 BUG_TRAP(start <= offset+len); 948 949 end = start + skb_shinfo(skb)->frags[i].size; 950 if ((copy = end-offset) > 0) { 951 u8 *vaddr; 952 953 if (copy > len) 954 copy = len; 955 956 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 957 memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ 958 offset-start, copy); 959 kunmap_skb_frag(vaddr); 960 961 if ((len -= copy) == 0) 962 return 0; 963 offset += copy; 964 to += copy; 965 } 966 start = end; 967 } 968 969 if (skb_shinfo(skb)->frag_list) { 970 struct sk_buff *list; 971 972 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 973 int end; 974 975 BUG_TRAP(start <= offset+len); 976 977 end = start + list->len; 978 if ((copy = end-offset) > 0) { 979 if (copy > len) 980 copy = len; 981 if (skb_copy_bits(list, offset-start, to, copy)) 982 goto fault; 983 if ((len -= copy) == 0) 984 return 0; 985 offset += copy; 986 to += copy; 987 } 988 start = end; 989 } 990 } 991 if (len == 0) 992 return 0; 993 994fault: 995 return -EFAULT; 996} 997 998/* Checksum skb data. */ 999 1000unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) 1001{ 1002 int i, copy; 1003 int start = skb->len - skb->data_len; 1004 int pos = 0; 1005 1006 /* Checksum header. */ 1007 if ((copy = start-offset) > 0) { 1008 if (copy > len) 1009 copy = len; 1010 csum = csum_partial(skb->data+offset, copy, csum); 1011 if ((len -= copy) == 0) 1012 return csum; 1013 offset += copy; 1014 pos = copy; 1015 } 1016 1017 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 1018 int end; 1019 1020 BUG_TRAP(start <= offset+len); 1021 1022 end = start + skb_shinfo(skb)->frags[i].size; 1023 if ((copy = end-offset) > 0) { 1024 unsigned int csum2; 1025 u8 *vaddr; 1026 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1027 1028 if (copy > len) 1029 copy = len; 1030 vaddr = kmap_skb_frag(frag); 1031 csum2 = csum_partial(vaddr + frag->page_offset + 1032 offset-start, copy, 0); 1033 kunmap_skb_frag(vaddr); 1034 csum = csum_block_add(csum, csum2, pos); 1035 if (!(len -= copy)) 1036 return csum; 1037 offset += copy; 1038 pos += copy; 1039 } 1040 start = end; 1041 } 1042 1043 if (skb_shinfo(skb)->frag_list) { 1044 struct sk_buff *list; 1045 1046 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 1047 int end; 1048 1049 BUG_TRAP(start <= offset+len); 1050 1051 end = start + list->len; 1052 if ((copy = end-offset) > 0) { 1053 unsigned int csum2; 1054 if (copy > len) 1055 copy = len; 1056 csum2 = skb_checksum(list, offset-start, copy, 0); 1057 csum = csum_block_add(csum, csum2, pos); 1058 if ((len -= copy) == 0) 1059 return csum; 1060 offset += copy; 1061 pos += copy; 1062 } 1063 start = end; 1064 } 1065 } 1066 if (len == 0) 1067 return csum; 1068 1069 BUG(); 1070 return csum; 1071} 1072 1073/* Both of above in one bottle. */ 1074 1075unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) 1076{ 1077 int i, copy; 1078 int start = skb->len - skb->data_len; 1079 int pos = 0; 1080 1081 /* Copy header. */ 1082 if ((copy = start-offset) > 0) { 1083 if (copy > len) 1084 copy = len; 1085 csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); 1086 if ((len -= copy) == 0) 1087 return csum; 1088 offset += copy; 1089 to += copy; 1090 pos = copy; 1091 } 1092 1093 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 1094 int end; 1095 1096 BUG_TRAP(start <= offset+len); 1097 1098 end = start + skb_shinfo(skb)->frags[i].size; 1099 if ((copy = end-offset) > 0) { 1100 unsigned int csum2; 1101 u8 *vaddr; 1102 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1103 1104 if (copy > len) 1105 copy = len; 1106 vaddr = kmap_skb_frag(frag); 1107 csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + 1108 offset-start, to, copy, 0); 1109 kunmap_skb_frag(vaddr); 1110 csum = csum_block_add(csum, csum2, pos); 1111 if (!(len -= copy)) 1112 return csum; 1113 offset += copy; 1114 to += copy; 1115 pos += copy; 1116 } 1117 start = end; 1118 } 1119 1120 if (skb_shinfo(skb)->frag_list) { 1121 struct sk_buff *list; 1122 1123 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 1124 unsigned int csum2; 1125 int end; 1126 1127 BUG_TRAP(start <= offset+len); 1128 1129 end = start + list->len; 1130 if ((copy = end-offset) > 0) { 1131 if (copy > len) 1132 copy = len; 1133 csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); 1134 csum = csum_block_add(csum, csum2, pos); 1135 if ((len -= copy) == 0) 1136 return csum; 1137 offset += copy; 1138 to += copy; 1139 pos += copy; 1140 } 1141 start = end; 1142 } 1143 } 1144 if (len == 0) 1145 return csum; 1146 1147 BUG(); 1148 return csum; 1149} 1150 1151void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1152{ 1153 unsigned int csum; 1154 long csstart; 1155 1156 if (skb->ip_summed == CHECKSUM_HW) 1157 csstart = skb->h.raw - skb->data; 1158 else 1159 csstart = skb->len - skb->data_len; 1160 1161 if (csstart > skb->len - skb->data_len) 1162 BUG(); 1163 1164 memcpy(to, skb->data, csstart); 1165 1166 csum = 0; 1167 if (csstart != skb->len) 1168 csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, 1169 skb->len-csstart, 0); 1170 1171 if (skb->ip_summed == CHECKSUM_HW) { 1172 long csstuff = csstart + skb->csum; 1173 1174 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1175 } 1176} 1177 1178 1179void __init skb_init(void) 1180{ 1181 int i; 1182 1183 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1184 sizeof(struct sk_buff), 1185 0, 1186 SLAB_HWCACHE_ALIGN, 1187 skb_headerinit, NULL); 1188 if (!skbuff_head_cache) 1189 panic("cannot create skbuff cache"); 1190 1191 for (i=0; i<NR_CPUS; i++) 1192 skb_queue_head_init(&skb_head_pool[i].list); 1193} 1194