1/* 2 * NET3 Protocol independent device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the non IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Florian la Roche <rzsfl@rz.uni-sb.de> 16 * Alan Cox <gw4pts@gw4pts.ampr.org> 17 * David Hinds <dahinds@users.sourceforge.net> 18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 19 * Adam Sulmicki <adam@cfar.umd.edu> 20 * Pekka Riikonen <priikone@poesidon.pspt.fi> 21 * 22 * Changes: 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set 24 * to 2 if register_netdev gets called 25 * before net_dev_init & also removed a 26 * few lines of code in the process. 27 * Alan Cox : device private ioctl copies fields back. 28 * Alan Cox : Transmit queue code does relevant 29 * stunts to keep the queue safe. 30 * Alan Cox : Fixed double lock. 31 * Alan Cox : Fixed promisc NULL pointer trap 32 * ???????? : Support the full private ioctl range 33 * Alan Cox : Moved ioctl permission check into 34 * drivers 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI 36 * Alan Cox : 100 backlog just doesn't cut it when 37 * you start doing multicast video 8) 38 * Alan Cox : Rewrote net_bh and list manager. 39 * Alan Cox : Fix ETH_P_ALL echoback lengths. 40 * Alan Cox : Took out transmit every packet pass 41 * Saved a few bytes in the ioctl handler 42 * Alan Cox : Network driver sets packet type before 43 * calling netif_rx. Saves a function 44 * call a packet. 45 * Alan Cox : Hashed net_bh() 46 * Richard Kooijman: Timestamp fixes. 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR 48 * Alan Cox : Device lock protection. 49 * Alan Cox : Fixed nasty side effect of device close 50 * changes. 51 * Rudi Cilibrasi : Pass the right thing to 52 * set_mac_address() 53 * Dave Miller : 32bit quantity for the device lock to 54 * make it work out on a Sparc. 55 * Bjorn Ekwall : Added KERNELD hack. 56 * Alan Cox : Cleaned up the backlog initialise. 57 * Craig Metz : SIOCGIFCONF fix if space for under 58 * 1 device. 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there 60 * is no device open function. 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF 63 * Cyrus Durgin : Cleaned for KMOD 64 * Adam Sulmicki : Bug Fix : Network Device Unload 65 * A network device unload needs to purge 66 * the backlog queue. 67 * Paul Rusty Russell : SIOCSIFNAME 68 * Pekka Riikonen : Netdev boot-time settings code 69 * Andrew Morton : Make unregister_netdevice wait 70 * indefinitely on dev->refcnt 71 * J Hadi Salim : - Backlog queue sampling 72 * - netif_rx() feedback 73 */ 74 75#include <asm/uaccess.h> 76#include <asm/system.h> 77#include <linux/bitops.h> 78#include <linux/capability.h> 79#include <linux/cpu.h> 80#include <linux/types.h> 81#include <linux/kernel.h> 82#include <linux/sched.h> 83#include <linux/mutex.h> 84#include <linux/string.h> 85#include <linux/mm.h> 86#include <linux/socket.h> 87#include <linux/sockios.h> 88#include <linux/errno.h> 89#include <linux/interrupt.h> 90#include <linux/if_ether.h> 91#include <linux/netdevice.h> 92#include <linux/etherdevice.h> 93#include <linux/notifier.h> 94#include <linux/skbuff.h> 95#include <net/sock.h> 96#include <linux/rtnetlink.h> 97#include <linux/proc_fs.h> 98#include <linux/seq_file.h> 99#include <linux/stat.h> 100#include <linux/if_bridge.h> 101#include <net/dst.h> 102#include <net/pkt_sched.h> 103#include <net/checksum.h> 104#include <linux/highmem.h> 105#include <linux/init.h> 106#include <linux/kmod.h> 107#include <linux/module.h> 108#include <linux/kallsyms.h> 109#include <linux/netpoll.h> 110#include <linux/rcupdate.h> 111#include <linux/delay.h> 112#include <net/wext.h> 113#include <net/iw_handler.h> 114#include <asm/current.h> 115#include <linux/audit.h> 116#include <linux/dmaengine.h> 117#include <linux/err.h> 118#include <linux/ctype.h> 119#include <linux/if_arp.h> 120#include <typedefs.h> 121#include <bcmdefs.h> 122/* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 123#include <linux/if_pppox.h> 124#include <linux/ppp_comm.h> 125/* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 126 127#ifdef CONFIG_INET_GRO 128/* Instead of increasing this, you should create a hash table. */ 129#define MAX_GRO_SKBS 8 130 131/* This should be increased if a protocol with a bigger head is added. */ 132#define GRO_MAX_HEAD (MAX_HEADER + 128) 133#endif /* CONFIG_INET_GRO */ 134 135/* 136 * The list of packet types we will receive (as opposed to discard) 137 * and the routines to invoke. 138 * 139 * Why 16. Because with 16 the only overlap we get on a hash of the 140 * low nibble of the protocol value is RARP/SNAP/X.25. 141 * 142 * NOTE: That is no longer true with the addition of VLAN tags. Not 143 * sure which should go first, but I bet it won't make much 144 * difference if we are running VLANs. The good news is that 145 * this protocol won't be in the list unless compiled in, so 146 * the average user (w/out VLANs) will not be adversely affected. 147 * --BLG 148 * 149 * 0800 IP 150 * 8100 802.1Q VLAN 151 * 0001 802.3 152 * 0002 AX.25 153 * 0004 802.2 154 * 8035 RARP 155 * 0005 SNAP 156 * 0805 X.25 157 * 0806 ARP 158 * 8137 IPX 159 * 0009 Localtalk 160 * 86DD IPv6 161 */ 162 163static DEFINE_SPINLOCK(ptype_lock); 164static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ 165static struct list_head ptype_all __read_mostly; /* Taps */ 166 167/* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 168static struct addr_info pptp_ip_addr; 169 170#define NTOHS_ETH_P_PPTP_GRE ntohs(ETH_P_PPTP_GRE) 171#define NTOHS_ETH_P_IP ntohs(ETH_P_IP) 172#define NTOHS_ETH_P_PPP_SES ntohs(ETH_P_PPP_SES) 173#define NTOHS_ETH_P_PPPOE_SESS ntohs(ETH_P_PPPOE_SESS) 174/* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 175 176#ifdef CONFIG_NET_DMA 177static struct dma_client *net_dma_client; 178static unsigned int net_dma_count; 179static spinlock_t net_dma_event_lock; 180#endif 181 182/* 183 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 184 * semaphore. 185 * 186 * Pure readers hold dev_base_lock for reading. 187 * 188 * Writers must hold the rtnl semaphore while they loop through the 189 * dev_base_head list, and hold dev_base_lock for writing when they do the 190 * actual updates. This allows pure readers to access the list even 191 * while a writer is preparing to update it. 192 * 193 * To put it another way, dev_base_lock is held for writing only to 194 * protect against pure readers; the rtnl semaphore provides the 195 * protection against other writers. 196 * 197 * See, for example usages, register_netdevice() and 198 * unregister_netdevice(), which must be called with the rtnl 199 * semaphore held. 200 */ 201LIST_HEAD(dev_base_head); 202DEFINE_RWLOCK(dev_base_lock); 203 204EXPORT_SYMBOL(dev_base_head); 205EXPORT_SYMBOL(dev_base_lock); 206 207#define NETDEV_HASHBITS 8 208static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; 209static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; 210 211static inline struct hlist_head *dev_name_hash(const char *name) 212{ 213 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 214 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; 215} 216 217static inline struct hlist_head *dev_index_hash(int ifindex) 218{ 219 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; 220} 221 222/* 223 * Our notifier list 224 */ 225 226static RAW_NOTIFIER_HEAD(netdev_chain); 227 228/* 229 * Device drivers call our routines to queue packets here. We empty the 230 * queue in the local softnet handler. 231 */ 232DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; 233 234#ifdef CONFIG_SYSFS 235extern int netdev_sysfs_init(void); 236extern int netdev_register_sysfs(struct net_device *); 237extern void netdev_unregister_sysfs(struct net_device *); 238#else 239#define netdev_sysfs_init() (0) 240#define netdev_register_sysfs(dev) (0) 241#define netdev_unregister_sysfs(dev) do { } while(0) 242#endif 243 244#ifdef CONFIG_DEBUG_LOCK_ALLOC 245/* 246 * register_netdevice() inits dev->_xmit_lock and sets lockdep class 247 * according to dev->type 248 */ 249static const unsigned short netdev_lock_type[] = 250 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, 251 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, 252 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, 253 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, 254 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, 255 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, 256 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, 257 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, 258 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, 259 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, 260 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, 261 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 262 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, 263 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, 264 ARPHRD_NONE}; 265 266static const char *netdev_lock_name[] = 267 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 268 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", 269 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", 270 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", 271 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", 272 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", 273 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", 274 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", 275 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", 276 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", 277 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", 278 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 279 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", 280 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", 281 "_xmit_NONE"}; 282 283static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 284 285static inline unsigned short netdev_lock_pos(unsigned short dev_type) 286{ 287 int i; 288 289 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) 290 if (netdev_lock_type[i] == dev_type) 291 return i; 292 /* the last key is used by default */ 293 return ARRAY_SIZE(netdev_lock_type) - 1; 294} 295 296static inline void netdev_set_lockdep_class(spinlock_t *lock, 297 unsigned short dev_type) 298{ 299 int i; 300 301 i = netdev_lock_pos(dev_type); 302 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], 303 netdev_lock_name[i]); 304} 305#else 306static inline void netdev_set_lockdep_class(spinlock_t *lock, 307 unsigned short dev_type) 308{ 309} 310#endif 311 312/******************************************************************************* 313 314 Protocol management and registration routines 315 316*******************************************************************************/ 317 318/* 319 * Add a protocol ID to the list. Now that the input handler is 320 * smarter we can dispense with all the messy stuff that used to be 321 * here. 322 * 323 * BEWARE!!! Protocol handlers, mangling input packets, 324 * MUST BE last in hash buckets and checking protocol handlers 325 * MUST start from promiscuous ptype_all chain in net_bh. 326 * It is true now, do not change it. 327 * Explanation follows: if protocol handler, mangling packet, will 328 * be the first on list, it is not able to sense, that packet 329 * is cloned and should be copied-on-write, so that it will 330 * change it and subsequent readers will get broken packet. 331 * --ANK (980803) 332 */ 333 334/** 335 * dev_add_pack - add packet handler 336 * @pt: packet type declaration 337 * 338 * Add a protocol handler to the networking stack. The passed &packet_type 339 * is linked into kernel lists and may not be freed until it has been 340 * removed from the kernel lists. 341 * 342 * This call does not sleep therefore it can not 343 * guarantee all CPU's that are in middle of receiving packets 344 * will see the new packet type (until the next received packet). 345 */ 346 347void dev_add_pack(struct packet_type *pt) 348{ 349 int hash; 350 351 spin_lock_bh(&ptype_lock); 352 if (pt->type == htons(ETH_P_ALL)) 353 list_add_rcu(&pt->list, &ptype_all); 354 else { 355 hash = ntohs(pt->type) & 15; 356 list_add_rcu(&pt->list, &ptype_base[hash]); 357 } 358 spin_unlock_bh(&ptype_lock); 359} 360 361/** 362 * __dev_remove_pack - remove packet handler 363 * @pt: packet type declaration 364 * 365 * Remove a protocol handler that was previously added to the kernel 366 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 367 * from the kernel lists and can be freed or reused once this function 368 * returns. 369 * 370 * The packet type might still be in use by receivers 371 * and must not be freed until after all the CPU's have gone 372 * through a quiescent state. 373 */ 374void __dev_remove_pack(struct packet_type *pt) 375{ 376 struct list_head *head; 377 struct packet_type *pt1; 378 379 spin_lock_bh(&ptype_lock); 380 381 if (pt->type == htons(ETH_P_ALL)) 382 head = &ptype_all; 383 else 384 head = &ptype_base[ntohs(pt->type) & 15]; 385 386 list_for_each_entry(pt1, head, list) { 387 if (pt == pt1) { 388 list_del_rcu(&pt->list); 389 goto out; 390 } 391 } 392 393 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 394out: 395 spin_unlock_bh(&ptype_lock); 396} 397/** 398 * dev_remove_pack - remove packet handler 399 * @pt: packet type declaration 400 * 401 * Remove a protocol handler that was previously added to the kernel 402 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 403 * from the kernel lists and can be freed or reused once this function 404 * returns. 405 * 406 * This call sleeps to guarantee that no CPU is looking at the packet 407 * type after return. 408 */ 409void dev_remove_pack(struct packet_type *pt) 410{ 411 __dev_remove_pack(pt); 412 413 synchronize_net(); 414} 415 416/****************************************************************************** 417 418 Device Boot-time Settings Routines 419 420*******************************************************************************/ 421 422/* Boot time configuration table */ 423static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; 424 425/** 426 * netdev_boot_setup_add - add new setup entry 427 * @name: name of the device 428 * @map: configured settings for the device 429 * 430 * Adds new setup entry to the dev_boot_setup list. The function 431 * returns 0 on error and 1 on success. This is a generic routine to 432 * all netdevices. 433 */ 434static int netdev_boot_setup_add(char *name, struct ifmap *map) 435{ 436 struct netdev_boot_setup *s; 437 int i; 438 439 s = dev_boot_setup; 440 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 441 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { 442 memset(s[i].name, 0, sizeof(s[i].name)); 443 strcpy(s[i].name, name); 444 memcpy(&s[i].map, map, sizeof(s[i].map)); 445 break; 446 } 447 } 448 449 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; 450} 451 452/** 453 * netdev_boot_setup_check - check boot time settings 454 * @dev: the netdevice 455 * 456 * Check boot time settings for the device. 457 * The found settings are set for the device to be used 458 * later in the device probing. 459 * Returns 0 if no settings found, 1 if they are. 460 */ 461int netdev_boot_setup_check(struct net_device *dev) 462{ 463 struct netdev_boot_setup *s = dev_boot_setup; 464 int i; 465 466 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 467 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && 468 !strncmp(dev->name, s[i].name, strlen(s[i].name))) { 469 dev->irq = s[i].map.irq; 470 dev->base_addr = s[i].map.base_addr; 471 dev->mem_start = s[i].map.mem_start; 472 dev->mem_end = s[i].map.mem_end; 473 return 1; 474 } 475 } 476 return 0; 477} 478 479 480/** 481 * netdev_boot_base - get address from boot time settings 482 * @prefix: prefix for network device 483 * @unit: id for network device 484 * 485 * Check boot time settings for the base address of device. 486 * The found settings are set for the device to be used 487 * later in the device probing. 488 * Returns 0 if no settings found. 489 */ 490unsigned long netdev_boot_base(const char *prefix, int unit) 491{ 492 const struct netdev_boot_setup *s = dev_boot_setup; 493 char name[IFNAMSIZ]; 494 int i; 495 496 sprintf(name, "%s%d", prefix, unit); 497 498 /* 499 * If device already registered then return base of 1 500 * to indicate not to probe for this interface 501 */ 502 if (__dev_get_by_name(name)) 503 return 1; 504 505 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) 506 if (!strcmp(name, s[i].name)) 507 return s[i].map.base_addr; 508 return 0; 509} 510 511/* 512 * Saves at boot time configured settings for any netdevice. 513 */ 514int __init netdev_boot_setup(char *str) 515{ 516 int ints[5]; 517 struct ifmap map; 518 519 str = get_options(str, ARRAY_SIZE(ints), ints); 520 if (!str || !*str) 521 return 0; 522 523 /* Save settings */ 524 memset(&map, 0, sizeof(map)); 525 if (ints[0] > 0) 526 map.irq = ints[1]; 527 if (ints[0] > 1) 528 map.base_addr = ints[2]; 529 if (ints[0] > 2) 530 map.mem_start = ints[3]; 531 if (ints[0] > 3) 532 map.mem_end = ints[4]; 533 534 /* Add new entry to the list */ 535 return netdev_boot_setup_add(str, &map); 536} 537 538__setup("netdev=", netdev_boot_setup); 539 540/******************************************************************************* 541 542 Device Interface Subroutines 543 544*******************************************************************************/ 545 546/** 547 * __dev_get_by_name - find a device by its name 548 * @name: name to find 549 * 550 * Find an interface by name. Must be called under RTNL semaphore 551 * or @dev_base_lock. If the name is found a pointer to the device 552 * is returned. If the name is not found then %NULL is returned. The 553 * reference counters are not incremented so the caller must be 554 * careful with locks. 555 */ 556 557struct net_device *__dev_get_by_name(const char *name) 558{ 559 struct hlist_node *p; 560 561 hlist_for_each(p, dev_name_hash(name)) { 562 struct net_device *dev 563 = hlist_entry(p, struct net_device, name_hlist); 564 if (!strncmp(dev->name, name, IFNAMSIZ)) 565 return dev; 566 } 567 return NULL; 568} 569 570/** 571 * dev_get_by_name - find a device by its name 572 * @name: name to find 573 * 574 * Find an interface by name. This can be called from any 575 * context and does its own locking. The returned handle has 576 * the usage count incremented and the caller must use dev_put() to 577 * release it when it is no longer needed. %NULL is returned if no 578 * matching device is found. 579 */ 580 581struct net_device *dev_get_by_name(const char *name) 582{ 583 struct net_device *dev; 584 585 read_lock(&dev_base_lock); 586 dev = __dev_get_by_name(name); 587 if (dev) 588 dev_hold(dev); 589 read_unlock(&dev_base_lock); 590 return dev; 591} 592 593/** 594 * __dev_get_by_index - find a device by its ifindex 595 * @ifindex: index of device 596 * 597 * Search for an interface by index. Returns %NULL if the device 598 * is not found or a pointer to the device. The device has not 599 * had its reference counter increased so the caller must be careful 600 * about locking. The caller must hold either the RTNL semaphore 601 * or @dev_base_lock. 602 */ 603 604struct net_device *__dev_get_by_index(int ifindex) 605{ 606 struct hlist_node *p; 607 608 hlist_for_each(p, dev_index_hash(ifindex)) { 609 struct net_device *dev 610 = hlist_entry(p, struct net_device, index_hlist); 611 if (dev->ifindex == ifindex) 612 return dev; 613 } 614 return NULL; 615} 616 617 618/** 619 * dev_get_by_index - find a device by its ifindex 620 * @ifindex: index of device 621 * 622 * Search for an interface by index. Returns NULL if the device 623 * is not found or a pointer to the device. The device returned has 624 * had a reference added and the pointer is safe until the user calls 625 * dev_put to indicate they have finished with it. 626 */ 627 628struct net_device *dev_get_by_index(int ifindex) 629{ 630 struct net_device *dev; 631 632 read_lock(&dev_base_lock); 633 dev = __dev_get_by_index(ifindex); 634 if (dev) 635 dev_hold(dev); 636 read_unlock(&dev_base_lock); 637 return dev; 638} 639 640/** 641 * dev_getbyhwaddr - find a device by its hardware address 642 * @type: media type of device 643 * @ha: hardware address 644 * 645 * Search for an interface by MAC address. Returns NULL if the device 646 * is not found or a pointer to the device. The caller must hold the 647 * rtnl semaphore. The returned device has not had its ref count increased 648 * and the caller must therefore be careful about locking 649 * 650 * BUGS: 651 * If the API was consistent this would be __dev_get_by_hwaddr 652 */ 653 654struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) 655{ 656 struct net_device *dev; 657 658 ASSERT_RTNL(); 659 660 for_each_netdev(dev) 661 if (dev->type == type && 662 !memcmp(dev->dev_addr, ha, dev->addr_len)) 663 return dev; 664 665 return NULL; 666} 667 668EXPORT_SYMBOL(dev_getbyhwaddr); 669 670struct net_device *__dev_getfirstbyhwtype(unsigned short type) 671{ 672 struct net_device *dev; 673 674 ASSERT_RTNL(); 675 for_each_netdev(dev) 676 if (dev->type == type) 677 return dev; 678 679 return NULL; 680} 681 682EXPORT_SYMBOL(__dev_getfirstbyhwtype); 683 684struct net_device *dev_getfirstbyhwtype(unsigned short type) 685{ 686 struct net_device *dev; 687 688 rtnl_lock(); 689 dev = __dev_getfirstbyhwtype(type); 690 if (dev) 691 dev_hold(dev); 692 rtnl_unlock(); 693 return dev; 694} 695 696EXPORT_SYMBOL(dev_getfirstbyhwtype); 697 698/** 699 * dev_get_by_flags - find any device with given flags 700 * @if_flags: IFF_* values 701 * @mask: bitmask of bits in if_flags to check 702 * 703 * Search for any interface with the given flags. Returns NULL if a device 704 * is not found or a pointer to the device. The device returned has 705 * had a reference added and the pointer is safe until the user calls 706 * dev_put to indicate they have finished with it. 707 */ 708 709struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) 710{ 711 struct net_device *dev, *ret; 712 713 ret = NULL; 714 read_lock(&dev_base_lock); 715 for_each_netdev(dev) { 716 if (((dev->flags ^ if_flags) & mask) == 0) { 717 dev_hold(dev); 718 ret = dev; 719 break; 720 } 721 } 722 read_unlock(&dev_base_lock); 723 return ret; 724} 725 726/** 727 * dev_valid_name - check if name is okay for network device 728 * @name: name string 729 * 730 * Network device names need to be valid file names to 731 * to allow sysfs to work. We also disallow any kind of 732 * whitespace. 733 */ 734int dev_valid_name(const char *name) 735{ 736 if (*name == '\0') 737 return 0; 738 if (strlen(name) >= IFNAMSIZ) 739 return 0; 740 if (!strcmp(name, ".") || !strcmp(name, "..")) 741 return 0; 742 743 while (*name) { 744 if (*name == '/' || isspace(*name)) 745 return 0; 746 name++; 747 } 748 return 1; 749} 750 751/** 752 * dev_alloc_name - allocate a name for a device 753 * @dev: device 754 * @name: name format string 755 * 756 * Passed a format string - eg "lt%d" it will try and find a suitable 757 * id. It scans list of devices to build up a free map, then chooses 758 * the first empty slot. The caller must hold the dev_base or rtnl lock 759 * while allocating the name and adding the device in order to avoid 760 * duplicates. 761 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 762 * Returns the number of the unit assigned or a negative errno code. 763 */ 764 765int dev_alloc_name(struct net_device *dev, const char *name) 766{ 767 int i = 0; 768 char buf[IFNAMSIZ]; 769 const char *p; 770 const int max_netdevices = 8*PAGE_SIZE; 771 long *inuse; 772 struct net_device *d; 773 774 p = strnchr(name, IFNAMSIZ-1, '%'); 775 if (p) { 776 /* 777 * Verify the string as this thing may have come from 778 * the user. There must be either one "%d" and no other "%" 779 * characters. 780 */ 781 if (p[1] != 'd' || strchr(p + 2, '%')) 782 return -EINVAL; 783 784 /* Use one page as a bit array of possible slots */ 785 inuse = (long *) get_zeroed_page(GFP_ATOMIC); 786 if (!inuse) 787 return -ENOMEM; 788 789 for_each_netdev(d) { 790 if (!sscanf(d->name, name, &i)) 791 continue; 792 if (i < 0 || i >= max_netdevices) 793 continue; 794 795 /* avoid cases where sscanf is not exact inverse of printf */ 796 snprintf(buf, sizeof(buf), name, i); 797 if (!strncmp(buf, d->name, IFNAMSIZ)) 798 set_bit(i, inuse); 799 } 800 801 i = find_first_zero_bit(inuse, max_netdevices); 802 free_page((unsigned long) inuse); 803 } 804 805 snprintf(buf, sizeof(buf), name, i); 806 if (!__dev_get_by_name(buf)) { 807 strlcpy(dev->name, buf, IFNAMSIZ); 808 return i; 809 } 810 811 /* It is possible to run out of possible slots 812 * when the name is long and there isn't enough space left 813 * for the digits, or if all bits are used. 814 */ 815 return -ENFILE; 816} 817 818 819/** 820 * dev_change_name - change name of a device 821 * @dev: device 822 * @newname: name (or format string) must be at least IFNAMSIZ 823 * 824 * Change name of a device, can pass format strings "eth%d". 825 * for wildcarding. 826 */ 827int dev_change_name(struct net_device *dev, char *newname) 828{ 829 int err = 0; 830 831 ASSERT_RTNL(); 832 833 if (dev->flags & IFF_UP) 834 return -EBUSY; 835 836 if (!dev_valid_name(newname)) 837 return -EINVAL; 838 839 if (strchr(newname, '%')) { 840 err = dev_alloc_name(dev, newname); 841 if (err < 0) 842 return err; 843 strcpy(newname, dev->name); 844 } 845 else if (__dev_get_by_name(newname)) 846 return -EEXIST; 847 else 848 strlcpy(dev->name, newname, IFNAMSIZ); 849 850 device_rename(&dev->dev, dev->name); 851 hlist_del(&dev->name_hlist); 852 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); 853 raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); 854 855 return err; 856} 857 858/** 859 * netdev_features_change - device changes features 860 * @dev: device to cause notification 861 * 862 * Called to indicate a device has changed features. 863 */ 864void netdev_features_change(struct net_device *dev) 865{ 866 raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); 867} 868EXPORT_SYMBOL(netdev_features_change); 869 870/** 871 * netdev_state_change - device changes state 872 * @dev: device to cause notification 873 * 874 * Called to indicate a device has changed state. This function calls 875 * the notifier chains for netdev_chain and sends a NEWLINK message 876 * to the routing socket. 877 */ 878void netdev_state_change(struct net_device *dev) 879{ 880 if (dev->flags & IFF_UP) { 881 raw_notifier_call_chain(&netdev_chain, 882 NETDEV_CHANGE, dev); 883 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 884 } 885} 886 887/** 888 * dev_load - load a network module 889 * @name: name of interface 890 * 891 * If a network interface is not present and the process has suitable 892 * privileges this function loads the module. If module loading is not 893 * available in this kernel then it becomes a nop. 894 */ 895 896void dev_load(const char *name) 897{ 898 struct net_device *dev; 899 900 read_lock(&dev_base_lock); 901 dev = __dev_get_by_name(name); 902 read_unlock(&dev_base_lock); 903 904 if (!dev && capable(CAP_SYS_MODULE)) 905 request_module("%s", name); 906} 907 908static int default_rebuild_header(struct sk_buff *skb) 909{ 910 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", 911 skb->dev ? skb->dev->name : "NULL!!!"); 912 kfree_skb(skb); 913 return 1; 914} 915 916/* Foxconn added start pling 10/27/2009 */ 917#ifdef CONFIG_IPV6 918extern const char lan_if_name[]; 919extern const char wan_if_name[]; 920extern int lan_dad_detected; 921extern int wan_dad_detected; 922#endif 923/* Foxconn added end pling 10/27/2009 */ 924 925/** 926 * dev_open - prepare an interface for use. 927 * @dev: device to open 928 * 929 * Takes a device from down to up state. The device's private open 930 * function is invoked and then the multicast lists are loaded. Finally 931 * the device is moved into the up state and a %NETDEV_UP message is 932 * sent to the netdev notifier chain. 933 * 934 * Calling this function on an active interface is a nop. On a failure 935 * a negative errno code is returned. 936 */ 937int dev_open(struct net_device *dev) 938{ 939 int ret = 0; 940 941 /* 942 * Is it already up? 943 */ 944 945 if (dev->flags & IFF_UP) 946 return 0; 947 948 /* 949 * Is it even present? 950 */ 951 if (!netif_device_present(dev)) 952 return -ENODEV; 953 954 /* 955 * Call device private open method 956 */ 957 set_bit(__LINK_STATE_START, &dev->state); 958 if (dev->open) { 959 ret = dev->open(dev); 960 if (ret) 961 clear_bit(__LINK_STATE_START, &dev->state); 962 } 963 964 /* 965 * If it went open OK then: 966 */ 967 968 if (!ret) { 969 /* 970 * Set the flags. 971 */ 972 dev->flags |= IFF_UP; 973 974 /* 975 * Initialize multicasting status 976 */ 977 dev_mc_upload(dev); 978 979 /* 980 * Wakeup transmit queue engine 981 */ 982 dev_activate(dev); 983 984 /* 985 * ... and announce new interface. 986 */ 987 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); 988 } 989 return ret; 990} 991 992/** 993 * dev_close - shutdown an interface. 994 * @dev: device to shutdown 995 * 996 * This function moves an active device into down state. A 997 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 998 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 999 * chain. 1000 */ 1001int dev_close(struct net_device *dev) 1002{ 1003 if (!(dev->flags & IFF_UP)) 1004 return 0; 1005 1006 /* 1007 * Tell people we are going down, so that they can 1008 * prepare to death, when device is still operating. 1009 */ 1010 raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); 1011 1012 dev_deactivate(dev); 1013 1014 clear_bit(__LINK_STATE_START, &dev->state); 1015 1016 /* Synchronize to scheduled poll. We cannot touch poll list, 1017 * it can be even on different cpu. So just clear netif_running(), 1018 * and wait when poll really will happen. Actually, the best place 1019 * for this is inside dev->stop() after device stopped its irq 1020 * engine, but this requires more changes in devices. */ 1021 1022 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1023 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { 1024 /* No hurry. */ 1025 msleep(1); 1026 } 1027 1028 /* 1029 * Call the device specific close. This cannot fail. 1030 * Only if device is UP 1031 * 1032 * We allow it to be called even after a DETACH hot-plug 1033 * event. 1034 */ 1035 if (dev->stop) 1036 dev->stop(dev); 1037 1038 /* 1039 * Device is now down. 1040 */ 1041 1042 dev->flags &= ~IFF_UP; 1043 1044 /* 1045 * Tell people we are down 1046 */ 1047 raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); 1048 1049 /* Foxconn added start pling 10/29/2009 */ 1050 /* Clear the IPv6 DAD flags when interface is down */ 1051#ifdef CONFIG_IPV6 1052 if (strcmp(dev->name, lan_if_name) == 0) 1053 lan_dad_detected = 0; 1054 else if (strcmp(dev->name, wan_if_name) == 0) 1055 wan_dad_detected = 0; 1056 1057 /* Foxconn added start pling 09/01/2010 */ 1058 /* Restore IPv6 forwarding that might be disabled previously by DAD */ 1059 extern int restore_ipv6_forwarding(struct net_device *dev); 1060 restore_ipv6_forwarding(dev); 1061 /* Foxconn added end pling 09/01/2010 */ 1062#endif 1063 /* Foxconn added end pling 10/29/2009 */ 1064 1065 return 0; 1066} 1067 1068 1069/* 1070 * Device change register/unregister. These are not inline or static 1071 * as we export them to the world. 1072 */ 1073 1074/** 1075 * register_netdevice_notifier - register a network notifier block 1076 * @nb: notifier 1077 * 1078 * Register a notifier to be called when network device events occur. 1079 * The notifier passed is linked into the kernel structures and must 1080 * not be reused until it has been unregistered. A negative errno code 1081 * is returned on a failure. 1082 * 1083 * When registered all registration and up events are replayed 1084 * to the new notifier to allow device to have a race free 1085 * view of the network device list. 1086 */ 1087 1088int register_netdevice_notifier(struct notifier_block *nb) 1089{ 1090 struct net_device *dev; 1091 int err; 1092 1093 rtnl_lock(); 1094 err = raw_notifier_chain_register(&netdev_chain, nb); 1095 if (!err) { 1096 for_each_netdev(dev) { 1097 nb->notifier_call(nb, NETDEV_REGISTER, dev); 1098 1099 if (dev->flags & IFF_UP) 1100 nb->notifier_call(nb, NETDEV_UP, dev); 1101 } 1102 } 1103 rtnl_unlock(); 1104 return err; 1105} 1106 1107/** 1108 * unregister_netdevice_notifier - unregister a network notifier block 1109 * @nb: notifier 1110 * 1111 * Unregister a notifier previously registered by 1112 * register_netdevice_notifier(). The notifier is unlinked into the 1113 * kernel structures and may then be reused. A negative errno code 1114 * is returned on a failure. 1115 */ 1116 1117int unregister_netdevice_notifier(struct notifier_block *nb) 1118{ 1119 int err; 1120 1121 rtnl_lock(); 1122 err = raw_notifier_chain_unregister(&netdev_chain, nb); 1123 rtnl_unlock(); 1124 return err; 1125} 1126 1127/** 1128 * call_netdevice_notifiers - call all network notifier blocks 1129 * @val: value passed unmodified to notifier function 1130 * @v: pointer passed unmodified to notifier function 1131 * 1132 * Call all network notifier blocks. Parameters and return value 1133 * are as for raw_notifier_call_chain(). 1134 */ 1135 1136int call_netdevice_notifiers(unsigned long val, void *v) 1137{ 1138 return raw_notifier_call_chain(&netdev_chain, val, v); 1139} 1140 1141/* When > 0 there are consumers of rx skb time stamps */ 1142static atomic_t netstamp_needed = ATOMIC_INIT(0); 1143 1144void net_enable_timestamp(void) 1145{ 1146 atomic_inc(&netstamp_needed); 1147} 1148 1149void net_disable_timestamp(void) 1150{ 1151 atomic_dec(&netstamp_needed); 1152} 1153 1154static inline void net_timestamp(struct sk_buff *skb) 1155{ 1156 if (atomic_read(&netstamp_needed)) 1157 __net_timestamp(skb); 1158 else 1159 skb->tstamp.tv64 = 0; 1160} 1161 1162/* 1163 * Support routine. Sends outgoing frames to any network 1164 * taps currently in use. 1165 */ 1166 1167static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1168{ 1169 struct packet_type *ptype; 1170 1171 net_timestamp(skb); 1172 1173 rcu_read_lock(); 1174 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1175 /* Never send packets back to the socket 1176 * they originated from - MvS (miquels@drinkel.ow.org) 1177 */ 1178 if ((ptype->dev == dev || !ptype->dev) && 1179 (ptype->af_packet_priv == NULL || 1180 (struct sock *)ptype->af_packet_priv != skb->sk)) { 1181 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); 1182 if (!skb2) 1183 break; 1184 1185 /* skb->nh should be correctly 1186 set by sender, so that the second statement is 1187 just protection against buggy protocols. 1188 */ 1189 skb_reset_mac_header(skb2); 1190 1191 if (skb_network_header(skb2) < skb2->data || 1192 skb2->network_header > skb2->tail) { 1193 if (net_ratelimit()) 1194 printk(KERN_CRIT "protocol %04x is " 1195 "buggy, dev %s\n", 1196 skb2->protocol, dev->name); 1197 skb_reset_network_header(skb2); 1198 } 1199 1200 skb2->transport_header = skb2->network_header; 1201 skb2->pkt_type = PACKET_OUTGOING; 1202 ptype->func(skb2, skb->dev, ptype, skb->dev); 1203 } 1204 } 1205 rcu_read_unlock(); 1206} 1207 1208 1209void __netif_schedule(struct net_device *dev) 1210{ 1211 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { 1212 unsigned long flags; 1213 struct softnet_data *sd; 1214 1215 local_irq_save(flags); 1216 sd = &__get_cpu_var(softnet_data); 1217 dev->next_sched = sd->output_queue; 1218 sd->output_queue = dev; 1219 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1220 local_irq_restore(flags); 1221 } 1222} 1223EXPORT_SYMBOL(__netif_schedule); 1224 1225void __netif_rx_schedule(struct net_device *dev) 1226{ 1227 unsigned long flags; 1228 1229 local_irq_save(flags); 1230 dev_hold(dev); 1231 list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); 1232 if (dev->quota < 0) 1233 dev->quota += dev->weight; 1234 else 1235 dev->quota = dev->weight; 1236 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 1237 local_irq_restore(flags); 1238} 1239EXPORT_SYMBOL(__netif_rx_schedule); 1240 1241void dev_kfree_skb_any(struct sk_buff *skb) 1242{ 1243 if (in_irq() || irqs_disabled()) 1244 dev_kfree_skb_irq(skb); 1245 else 1246 dev_kfree_skb(skb); 1247} 1248EXPORT_SYMBOL(dev_kfree_skb_any); 1249 1250 1251/* Hot-plugging. */ 1252void netif_device_detach(struct net_device *dev) 1253{ 1254 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1255 netif_running(dev)) { 1256 netif_stop_queue(dev); 1257 } 1258} 1259EXPORT_SYMBOL(netif_device_detach); 1260 1261void netif_device_attach(struct net_device *dev) 1262{ 1263 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1264 netif_running(dev)) { 1265 netif_wake_queue(dev); 1266 __netdev_watchdog_up(dev); 1267 } 1268} 1269EXPORT_SYMBOL(netif_device_attach); 1270 1271 1272/* 1273 * Invalidate hardware checksum when packet is to be mangled, and 1274 * complete checksum manually on outgoing path. 1275 */ 1276int skb_checksum_help(struct sk_buff *skb) 1277{ 1278 __wsum csum; 1279 int ret = 0, offset; 1280 1281 if (skb->ip_summed == CHECKSUM_COMPLETE) 1282 goto out_set_summed; 1283 1284 if (unlikely(skb_shinfo(skb)->gso_size)) { 1285 /* Let GSO fix up the checksum. */ 1286 goto out_set_summed; 1287 } 1288 1289 if (skb_cloned(skb)) { 1290 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1291 if (ret) 1292 goto out; 1293 } 1294 1295 offset = skb->csum_start - skb_headroom(skb); 1296 BUG_ON(offset > (int)skb->len); 1297 csum = skb_checksum(skb, offset, skb->len-offset, 0); 1298 1299 offset = skb_headlen(skb) - offset; 1300 BUG_ON(offset <= 0); 1301 BUG_ON(skb->csum_offset + 2 > offset); 1302 1303 *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = 1304 csum_fold(csum); 1305out_set_summed: 1306 skb->ip_summed = CHECKSUM_NONE; 1307out: 1308 return ret; 1309} 1310 1311/** 1312 * skb_gso_segment - Perform segmentation on skb. 1313 * @skb: buffer to segment 1314 * @features: features for the output path (see dev->features) 1315 * 1316 * This function segments the given skb and returns a list of segments. 1317 * 1318 * It may return NULL if the skb requires no segmentation. This is 1319 * only possible when GSO is used for verifying header integrity. 1320 */ 1321struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1322{ 1323 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1324 struct packet_type *ptype; 1325 __be16 type = skb->protocol; 1326 int err; 1327 1328 BUG_ON(skb_shinfo(skb)->frag_list); 1329 1330 skb_reset_mac_header(skb); 1331 skb->mac_len = skb->network_header - skb->mac_header; 1332 __skb_pull(skb, skb->mac_len); 1333 1334 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { 1335 if (skb_header_cloned(skb) && 1336 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 1337 return ERR_PTR(err); 1338 } 1339 1340 rcu_read_lock(); 1341 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { 1342 if (ptype->type == type && !ptype->dev && ptype->gso_segment) { 1343 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 1344 err = ptype->gso_send_check(skb); 1345 segs = ERR_PTR(err); 1346 if (err || skb_gso_ok(skb, features)) 1347 break; 1348 __skb_push(skb, (skb->data - 1349 skb_network_header(skb))); 1350 } 1351 segs = ptype->gso_segment(skb, features); 1352 break; 1353 } 1354 } 1355 rcu_read_unlock(); 1356 1357 __skb_push(skb, skb->data - skb_mac_header(skb)); 1358 1359 return segs; 1360} 1361 1362EXPORT_SYMBOL(skb_gso_segment); 1363 1364/* Take action when hardware reception checksum errors are detected. */ 1365#ifdef CONFIG_BUG 1366void netdev_rx_csum_fault(struct net_device *dev) 1367{ 1368 if (net_ratelimit()) { 1369 printk(KERN_ERR "%s: hw csum failure.\n", 1370 dev ? dev->name : "<unknown>"); 1371 dump_stack(); 1372 } 1373} 1374EXPORT_SYMBOL(netdev_rx_csum_fault); 1375#endif 1376 1377/* Actually, we should eliminate this check as soon as we know, that: 1378 * 1. IOMMU is present and allows to map all the memory. 1379 * 2. No high memory really exists on this machine. 1380 */ 1381 1382static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1383{ 1384#ifdef CONFIG_HIGHMEM 1385 int i; 1386 1387 if (dev->features & NETIF_F_HIGHDMA) 1388 return 0; 1389 1390 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1391 if (PageHighMem(skb_shinfo(skb)->frags[i].page)) 1392 return 1; 1393 1394#endif 1395 return 0; 1396} 1397 1398struct dev_gso_cb { 1399 void (*destructor)(struct sk_buff *skb); 1400}; 1401 1402#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) 1403 1404static void dev_gso_skb_destructor(struct sk_buff *skb) 1405{ 1406 struct dev_gso_cb *cb; 1407 1408 do { 1409 struct sk_buff *nskb = skb->next; 1410 1411 skb->next = nskb->next; 1412 nskb->next = NULL; 1413 kfree_skb(nskb); 1414 } while (skb->next); 1415 1416 cb = DEV_GSO_CB(skb); 1417 if (cb->destructor) 1418 cb->destructor(skb); 1419} 1420 1421/** 1422 * dev_gso_segment - Perform emulated hardware segmentation on skb. 1423 * @skb: buffer to segment 1424 * 1425 * This function segments the given skb and stores the list of segments 1426 * in skb->next. 1427 */ 1428static int dev_gso_segment(struct sk_buff *skb) 1429{ 1430 struct net_device *dev = skb->dev; 1431 struct sk_buff *segs; 1432 int features = dev->features & ~(illegal_highdma(dev, skb) ? 1433 NETIF_F_SG : 0); 1434 1435 segs = skb_gso_segment(skb, features); 1436 1437 /* Verifying header integrity only. */ 1438 if (!segs) 1439 return 0; 1440 1441 if (unlikely(IS_ERR(segs))) 1442 return PTR_ERR(segs); 1443 1444 skb->next = segs; 1445 DEV_GSO_CB(skb)->destructor = skb->destructor; 1446 skb->destructor = dev_gso_skb_destructor; 1447 1448 return 0; 1449} 1450 1451int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 1452{ 1453 if (likely(!skb->next)) { 1454 if (!list_empty(&ptype_all)) 1455 dev_queue_xmit_nit(skb, dev); 1456 1457 if (netif_needs_gso(dev, skb)) { 1458 if (unlikely(dev_gso_segment(skb))) 1459 goto out_kfree_skb; 1460 if (skb->next) 1461 goto gso; 1462 } 1463 1464 return dev->hard_start_xmit(skb, dev); 1465 } 1466 1467gso: 1468 do { 1469 struct sk_buff *nskb = skb->next; 1470 int rc; 1471 1472 skb->next = nskb->next; 1473 nskb->next = NULL; 1474 rc = dev->hard_start_xmit(nskb, dev); 1475 if (unlikely(rc)) { 1476 nskb->next = skb->next; 1477 skb->next = nskb; 1478 return rc; 1479 } 1480 if (unlikely(netif_queue_stopped(dev) && skb->next)) 1481 return NETDEV_TX_BUSY; 1482 } while (skb->next); 1483 1484 skb->destructor = DEV_GSO_CB(skb)->destructor; 1485 1486out_kfree_skb: 1487 kfree_skb(skb); 1488 return 0; 1489} 1490 1491#define HARD_TX_LOCK(dev, cpu) { \ 1492 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1493 netif_tx_lock(dev); \ 1494 } \ 1495} 1496 1497#define HARD_TX_UNLOCK(dev) { \ 1498 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1499 netif_tx_unlock(dev); \ 1500 } \ 1501} 1502 1503/** 1504 * dev_queue_xmit - transmit a buffer 1505 * @skb: buffer to transmit 1506 * 1507 * Queue a buffer for transmission to a network device. The caller must 1508 * have set the device and priority and built the buffer before calling 1509 * this function. The function can be called from an interrupt. 1510 * 1511 * A negative errno code is returned on a failure. A success does not 1512 * guarantee the frame will be transmitted as it may be dropped due 1513 * to congestion or traffic shaping. 1514 * 1515 * ----------------------------------------------------------------------------------- 1516 * I notice this method can also return errors from the queue disciplines, 1517 * including NET_XMIT_DROP, which is a positive value. So, errors can also 1518 * be positive. 1519 * 1520 * Regardless of the return value, the skb is consumed, so it is currently 1521 * difficult to retry a send to this method. (You can bump the ref count 1522 * before sending to hold a reference for retry if you are careful.) 1523 * 1524 * When calling this method, interrupts MUST be enabled. This is because 1525 * the BH enable code must have IRQs enabled so that it will not deadlock. 1526 * --BLG 1527 */ 1528int BCMFASTPATH_HOST dev_queue_xmit(struct sk_buff *skb) 1529{ 1530 struct net_device *dev = skb->dev; 1531 struct Qdisc *q; 1532 int rc = -ENOMEM; 1533 unsigned short proto; 1534 1535 /* GSO will handle the following emulations directly. */ 1536 if (netif_needs_gso(dev, skb)) 1537 goto gso; 1538 1539 if (skb_shinfo(skb)->frag_list && 1540 !(dev->features & NETIF_F_FRAGLIST) && 1541 __skb_linearize(skb)) 1542 goto out_kfree_skb; 1543 1544 /* Fragmented skb is linearized if device does not support SG, 1545 * or if at least one of fragments is in highmem and device 1546 * does not support DMA from it. 1547 */ 1548 if (skb_shinfo(skb)->nr_frags && 1549 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && 1550 __skb_linearize(skb)) 1551 goto out_kfree_skb; 1552 1553 /* If packet is not checksummed and device does not support 1554 * checksumming for this protocol, complete checksumming here. 1555 */ 1556 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1557 skb_set_transport_header(skb, skb->csum_start - 1558 skb_headroom(skb)); 1559 1560 if (!(dev->features & NETIF_F_GEN_CSUM) && 1561 (!(dev->features & NETIF_F_IP_CSUM) || 1562 skb->protocol != htons(ETH_P_IP))) 1563 if (skb_checksum_help(skb)) 1564 goto out_kfree_skb; 1565 } 1566 1567gso: 1568 spin_lock_prefetch(&dev->queue_lock); 1569 1570 /* Disable soft irqs for various locks below. Also 1571 * stops preemption for RCU. 1572 */ 1573 rcu_read_lock_bh(); 1574 1575 /* Updates of qdisc are serialized by queue_lock. 1576 * The struct Qdisc which is pointed to by qdisc is now a 1577 * rcu structure - it may be accessed without acquiring 1578 * a lock (but the structure may be stale.) The freeing of the 1579 * qdisc will be deferred until it's known that there are no 1580 * more references to it. 1581 * 1582 * If the qdisc has an enqueue function, we still need to 1583 * hold the queue_lock before calling it, since queue_lock 1584 * also serializes access to the device queue. 1585 */ 1586 1587 proto = *(unsigned short *)(skb->data + ETH_ALEN + ETH_ALEN); /* foxconn added Bob, 10/30/2008 */ 1588 q = rcu_dereference(dev->qdisc); 1589#ifdef CONFIG_NET_CLS_ACT 1590 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1591#endif 1592 if (q->enqueue) { 1593 /* Grab device queue */ 1594 spin_lock(&dev->queue_lock); 1595 q = dev->qdisc; 1596 //if (q->enqueue) { 1597 if ( (q->enqueue) && (htons(proto) != ETH_P_8021Q)) { /* foxconn added Bob, 10/30/2008, check 802.1q vlan type */ 1598 rc = q->enqueue(skb, q); 1599 qdisc_run(dev); 1600 spin_unlock(&dev->queue_lock); 1601 1602 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; 1603 goto out; 1604 } 1605 spin_unlock(&dev->queue_lock); 1606 } 1607 1608 /* The device has no queue. Common case for software devices: 1609 loopback, all the sorts of tunnels... 1610 1611 Really, it is unlikely that netif_tx_lock protection is necessary 1612 here. (f.e. loopback and IP tunnels are clean ignoring statistics 1613 counters.) 1614 However, it is possible, that they rely on protection 1615 made by us here. 1616 1617 Check this and shot the lock. It is not prone from deadlocks. 1618 Either shot noqueue qdisc, it is even simpler 8) 1619 */ 1620 if (dev->flags & IFF_UP) { 1621 int cpu = smp_processor_id(); /* ok because BHs are off */ 1622 1623 if (dev->xmit_lock_owner != cpu) { 1624 1625 HARD_TX_LOCK(dev, cpu); 1626 1627 if (!netif_queue_stopped(dev)) { 1628 rc = 0; 1629 if (!dev_hard_start_xmit(skb, dev)) { 1630 HARD_TX_UNLOCK(dev); 1631 goto out; 1632 } 1633 } 1634 HARD_TX_UNLOCK(dev); 1635 if (net_ratelimit()) 1636 printk(KERN_CRIT "Virtual device %s asks to " 1637 "queue packet!\n", dev->name); 1638 } else { 1639 /* Recursion is detected! It is possible, 1640 * unfortunately */ 1641 if (net_ratelimit()) 1642 printk(KERN_CRIT "Dead loop on virtual device " 1643 "%s, fix it urgently!\n", dev->name); 1644 } 1645 } 1646 1647 rc = -ENETDOWN; 1648 rcu_read_unlock_bh(); 1649 1650out_kfree_skb: 1651 kfree_skb(skb); 1652 return rc; 1653out: 1654 rcu_read_unlock_bh(); 1655 return rc; 1656} 1657 1658 1659/*======================================================================= 1660 Receiver routines 1661 =======================================================================*/ 1662 1663int netdev_max_backlog __read_mostly = 1000; 1664int netdev_budget __read_mostly = 300; 1665int weight_p __read_mostly = 64; /* old backlog weight */ 1666 1667DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 1668 1669 1670/** 1671 * netif_rx - post buffer to the network code 1672 * @skb: buffer to post 1673 * 1674 * This function receives a packet from a device driver and queues it for 1675 * the upper (protocol) levels to process. It always succeeds. The buffer 1676 * may be dropped during processing for congestion control or by the 1677 * protocol layers. 1678 * 1679 * return values: 1680 * NET_RX_SUCCESS (no congestion) 1681 * NET_RX_CN_LOW (low congestion) 1682 * NET_RX_CN_MOD (moderate congestion) 1683 * NET_RX_CN_HIGH (high congestion) 1684 * NET_RX_DROP (packet was dropped) 1685 * 1686 */ 1687 1688int netif_rx(struct sk_buff *skb) 1689{ 1690 struct softnet_data *queue; 1691 unsigned long flags; 1692 1693 /* if netpoll wants it, pretend we never saw it */ 1694 if (netpoll_rx(skb)) 1695 return NET_RX_DROP; 1696 1697 if (!skb->tstamp.tv64) 1698 net_timestamp(skb); 1699 1700 /* 1701 * The code is rearranged so that the path is the most 1702 * short when CPU is congested, but is still operating. 1703 */ 1704 local_irq_save(flags); 1705 queue = &__get_cpu_var(softnet_data); 1706 1707 __get_cpu_var(netdev_rx_stat).total++; 1708 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 1709 if (queue->input_pkt_queue.qlen) { 1710enqueue: 1711 dev_hold(skb->dev); 1712 __skb_queue_tail(&queue->input_pkt_queue, skb); 1713 local_irq_restore(flags); 1714 return NET_RX_SUCCESS; 1715 } 1716 1717 netif_rx_schedule(&queue->backlog_dev); 1718 goto enqueue; 1719 } 1720 1721 __get_cpu_var(netdev_rx_stat).dropped++; 1722 local_irq_restore(flags); 1723 1724 kfree_skb(skb); 1725 return NET_RX_DROP; 1726} 1727 1728int netif_rx_ni(struct sk_buff *skb) 1729{ 1730 int err; 1731 1732 preempt_disable(); 1733 err = netif_rx(skb); 1734 if (local_softirq_pending()) 1735 do_softirq(); 1736 preempt_enable(); 1737 1738 return err; 1739} 1740 1741EXPORT_SYMBOL(netif_rx_ni); 1742 1743static inline struct net_device *skb_bond(struct sk_buff *skb) 1744{ 1745 struct net_device *dev = skb->dev; 1746 1747 if (dev->master) { 1748 if (skb_bond_should_drop(skb)) { 1749 kfree_skb(skb); 1750 return NULL; 1751 } 1752 skb->dev = dev->master; 1753 } 1754 1755 return dev; 1756} 1757 1758static void net_tx_action(struct softirq_action *h) 1759{ 1760 struct softnet_data *sd = &__get_cpu_var(softnet_data); 1761 1762 if (sd->completion_queue) { 1763 struct sk_buff *clist; 1764 1765 local_irq_disable(); 1766 clist = sd->completion_queue; 1767 sd->completion_queue = NULL; 1768 local_irq_enable(); 1769 1770 while (clist) { 1771 struct sk_buff *skb = clist; 1772 clist = clist->next; 1773 1774 BUG_TRAP(!atomic_read(&skb->users)); 1775 __kfree_skb(skb); 1776 } 1777 } 1778 1779 if (sd->output_queue) { 1780 struct net_device *head; 1781 1782 local_irq_disable(); 1783 head = sd->output_queue; 1784 sd->output_queue = NULL; 1785 local_irq_enable(); 1786 1787 while (head) { 1788 struct net_device *dev = head; 1789 head = head->next_sched; 1790 1791 smp_mb__before_clear_bit(); 1792 clear_bit(__LINK_STATE_SCHED, &dev->state); 1793 1794 if (spin_trylock(&dev->queue_lock)) { 1795 qdisc_run(dev); 1796 spin_unlock(&dev->queue_lock); 1797 } else { 1798 netif_schedule(dev); 1799 } 1800 } 1801 } 1802} 1803 1804static inline int deliver_skb(struct sk_buff *skb, 1805 struct packet_type *pt_prev, 1806 struct net_device *orig_dev) 1807{ 1808 atomic_inc(&skb->users); 1809 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1810} 1811 1812#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 1813/* These hooks defined here for ATM */ 1814struct net_bridge; 1815struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, 1816 unsigned char *addr); 1817void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; 1818 1819/* 1820 * If bridge module is loaded call bridging hook. 1821 * returns NULL if packet was consumed. 1822 */ 1823struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, 1824 struct sk_buff *skb) __read_mostly; 1825static inline struct sk_buff *handle_bridge(struct sk_buff *skb, 1826 struct packet_type **pt_prev, int *ret, 1827 struct net_device *orig_dev) 1828{ 1829 struct net_bridge_port *port; 1830 1831 if (skb->pkt_type == PACKET_LOOPBACK || 1832 (port = rcu_dereference(skb->dev->br_port)) == NULL) 1833 return skb; 1834 1835 if (*pt_prev) { 1836 *ret = deliver_skb(skb, *pt_prev, orig_dev); 1837 *pt_prev = NULL; 1838 } 1839 1840 return br_handle_frame_hook(port, skb); 1841} 1842#else 1843#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) 1844#endif 1845 1846#ifdef CONFIG_NET_CLS_ACT 1847/* TODO: Maybe we should just force sch_ingress to be compiled in 1848 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 1849 * a compare and 2 stores extra right now if we dont have it on 1850 * but have CONFIG_NET_CLS_ACT 1851 * NOTE: This doesnt stop any functionality; if you dont have 1852 * the ingress scheduler, you just cant add policies on ingress. 1853 * 1854 */ 1855static int ing_filter(struct sk_buff *skb) 1856{ 1857 struct Qdisc *q; 1858 struct net_device *dev = skb->dev; 1859 int result = TC_ACT_OK; 1860 1861 if (dev->qdisc_ingress) { 1862 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); 1863 if (MAX_RED_LOOP < ttl++) { 1864 printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", 1865 skb->iif, skb->dev->ifindex); 1866 return TC_ACT_SHOT; 1867 } 1868 1869 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); 1870 1871 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); 1872 1873 spin_lock(&dev->ingress_lock); 1874 if ((q = dev->qdisc_ingress) != NULL) 1875 result = q->enqueue(skb, q); 1876 spin_unlock(&dev->ingress_lock); 1877 1878 } 1879 1880 return result; 1881} 1882#endif 1883 1884/* Foxconn added start pling 03/20/2012 */ 1885static unsigned long call_id = 0; 1886static unsigned long peer_call_id = 0; 1887 1888void dev_import_call_id(unsigned long pptp_call_id, unsigned long pptp_peer_call_id) 1889{ 1890 call_id = pptp_call_id; 1891 peer_call_id = pptp_peer_call_id; 1892} 1893/* Foxconn added end pling 03/20/2012 */ 1894 1895int netif_receive_skb(struct sk_buff *skb) 1896{ 1897 struct packet_type *ptype, *pt_prev; 1898 struct net_device *orig_dev; 1899 int ret = NET_RX_DROP; 1900 __be16 type; 1901 1902 /* if we've gotten here through NAPI, check netpoll */ 1903 if (skb->dev->poll && netpoll_rx(skb)) 1904 return NET_RX_DROP; 1905 1906 if (!skb->tstamp.tv64) 1907 net_timestamp(skb); 1908 1909 if (!skb->iif) 1910 skb->iif = skb->dev->ifindex; 1911 1912 orig_dev = skb_bond(skb); 1913 1914 if (!orig_dev) 1915 return NET_RX_DROP; 1916 1917 __get_cpu_var(netdev_rx_stat).total++; 1918 1919 skb_reset_network_header(skb); 1920 skb_reset_transport_header(skb); 1921 skb->mac_len = skb->network_header - skb->mac_header; 1922 1923 pt_prev = NULL; 1924 1925 rcu_read_lock(); 1926 1927#ifdef CONFIG_NET_CLS_ACT 1928 if (skb->tc_verd & TC_NCLS) { 1929 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 1930 goto ncls; 1931 } 1932#endif 1933 1934 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1935 if (!ptype->dev || ptype->dev == skb->dev) { 1936 if (pt_prev) 1937 ret = deliver_skb(skb, pt_prev, orig_dev); 1938 pt_prev = ptype; 1939 } 1940 } 1941 1942#ifdef CONFIG_NET_CLS_ACT 1943 if (pt_prev) { 1944 ret = deliver_skb(skb, pt_prev, orig_dev); 1945 pt_prev = NULL; /* noone else should process this after*/ 1946 } else { 1947 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); 1948 } 1949 1950 ret = ing_filter(skb); 1951 1952 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { 1953 kfree_skb(skb); 1954 goto out; 1955 } 1956 1957 skb->tc_verd = 0; 1958ncls: 1959#endif 1960 1961 /* Foxconn added start pling 03/14/2011 */ 1962 /* For SamKnows briding: bridge "eth0" under "br0" with "vlan1" and "eth1". 1963 * If packet comes from eth0 with VID1, then bypass bridge handling. 1964 * ps. we only check the 4th byte of interface name: 1965 * eth0 1966 * eth1 1967 * vlan1 1968 * ^ 1969 * +-- 4th byte='0' means packet is from eth0. 1970 */ 1971 if ((skb->dev->name[3] == '0') && 1972 (skb->protocol == htons(ETH_P_8021Q)) && 1973 (skb->data[0] == 0x00) && 1974 (skb->data[1] == 0x01)) { 1975 goto bypass_handle_bridge; 1976 } 1977 /* Foxconn added end pling 03/14/2011 */ 1978 1979 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 1980 if (!skb) 1981 goto out; 1982 1983 /* Foxconn added start pling 03/14/2011 */ 1984bypass_handle_bridge: 1985 /* Foxconn added end pling 03/14/2011 */ 1986 1987 type = skb->protocol; 1988 /* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 1989 int rsttype = 0; 1990 /* Check dst_addr & src_addr, if PPTP was active */ 1991 if (pptp_ip_addr.dst_addr && pptp_ip_addr.src_addr) { 1992 if (type == NTOHS_ETH_P_IP) { 1993 struct pptp_ip_hdr *iphdr; 1994 iphdr = (struct pptp_ip_hdr *)(skb->data); 1995 if ((iphdr->saddr == pptp_ip_addr.dst_addr) && 1996 (iphdr->daddr == pptp_ip_addr.src_addr)) { 1997 /* Check if GRE header presented */ 1998 if (iphdr->protocol == IP_PROTOCOL_GRE) { 1999 struct pptp_gre_hdr *grehdr; 2000 int grehdrlen = 8, iphdrlen = (int)(iphdr->ihl * 4), hdrlen; 2001 unsigned short ppp_proto; 2002 2003 grehdr = (struct pptp_gre_hdr *)((char *)(iphdr) + iphdrlen); 2004 if (GRE_IS_S(grehdr->flags)) grehdrlen += 4; 2005 if (GRE_IS_A(grehdr->version)) grehdrlen += 4; 2006 2007 /* Foxconn added start pling 03/20/2012 */ 2008 /* Handle PPTP passthrough packets */ 2009 //printk(KERN_EMERG "grehdr->call_id=%x, call_id=%x\n", htons(grehdr->call_id), call_id); 2010 if (htons(grehdr->call_id) != call_id) { 2011 //printk(KERN_EMERG "Not DUT PPTP call (our:%x, pkt:%x)\n", call_id, htons(grehdr->call_id)); 2012 goto reset_type; 2013 } 2014 /* Foxconn added end pling 03/20/2012 */ 2015 2016 /* Foxconn added start pling 04/28/2011 */ 2017 /* Russia MPD3 issue: sometimes server does not send 'ff03' */ 2018 /* Per Netgear spec, 2019 * -- only RU region, or 2020 * -- WW firmware (with Russian language) 2021 */ 2022 hdrlen = iphdrlen + grehdrlen; 2023 if ((skb->data[hdrlen] != 0xff) || skb->data[hdrlen+1] != 0x3) { 2024#if (defined RU_VERSION) 2025 if (1) 2026#elif (defined WW_VERSION) 2027 if (strcmp(nvram_get("gui_region"), "Russian") == 0) 2028#else /* Other FW, don't apply this patch */ 2029 if (0) 2030#endif 2031 { 2032 ppp_proto = ntohs(*(unsigned short *)((unsigned char *)skb->data + hdrlen)); 2033 goto check_header; 2034 } 2035 } 2036 /* Foxconn added end pling 04/28/2011 */ 2037 2038 hdrlen = iphdrlen + grehdrlen + 2; 2039 ppp_proto = 2040 ntohs(*(unsigned short *)((unsigned char *)skb->data + hdrlen)); 2041 2042 /* Foxconn added start pling 04/28/2011, Russia MPD3 issue */ 2043check_header: 2044 /* Foxconn added end pling 04/28/2011 */ 2045 2046 /* Check if PPP header presented */ 2047 if ((grehdr->protocol == GRE_PROTOCOL_PPTP) && 2048 ((int)(ntohs(grehdr->payload_len)) > 0) && 2049 ((int)(ntohs(iphdr->tot_len)) > hdrlen) && 2050 (ppp_proto <= PPP_NETWORK_LAYER) && (ppp_proto > 0)) { 2051 /* Set packet type == pptp */ 2052 type = NTOHS_ETH_P_PPTP_GRE; /* Foxconn defined (0x082F) */ 2053 } 2054 } /* End if (IP_PROTOCOL_GRE) */ 2055 } /* End if (src_addr, dst_addr) */ 2056 } /* End if (ETH_P_IP) */ 2057 } /* End if (src_addr && dst_addr) */ 2058 2059 if (type == NTOHS_ETH_P_PPP_SES) { // PPPoE Session packet 2060 if ((*((unsigned char *)skb->data + 6) <= PPP_NW_LAYER) && 2061 (*((unsigned short *)((unsigned char *)skb->data + 6)) > 0)) { 2062 type = NTOHS_ETH_P_PPPOE_SESS; 2063 } 2064 } 2065reset_type: 2066 if ((rsttype == 1) && (ret == NET_RX_BYPASS) && (type == NTOHS_ETH_P_PPTP_GRE)) { 2067 type = NTOHS_ETH_P_IP; 2068 rsttype = 0; 2069 } 2070 /* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 2071 2072 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { 2073 if (ptype->type == type && 2074 (!ptype->dev || ptype->dev == skb->dev)) { 2075 if (pt_prev) 2076 ret = deliver_skb(skb, pt_prev, orig_dev); 2077 pt_prev = ptype; 2078 } 2079 } 2080 2081 if (pt_prev) { 2082 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2083 /* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 2084 if ((ret == NET_RX_BYPASS) && (pt_prev->type == NTOHS_ETH_P_PPTP_GRE)) { 2085 rsttype = 1; 2086 goto reset_type; 2087 } 2088 /* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 2089 } else { 2090 kfree_skb(skb); 2091 /* Jamal, now you will not able to escape explaining 2092 * me how you were going to use this. :-) 2093 */ 2094 ret = NET_RX_DROP; 2095 } 2096 2097out: 2098 rcu_read_unlock(); 2099 return ret; 2100} 2101 2102#ifdef CONFIG_INET_GRO 2103static int BCMFASTPATH_HOST napi_gro_complete(struct sk_buff *skb) 2104{ 2105 struct packet_type *ptype; 2106 __be16 type = skb->protocol; 2107 struct list_head *head = &ptype_base[ntohs(type) & 15]; 2108 int err = -ENOENT; 2109 2110 if (NAPI_GRO_CB(skb)->count == 1) { 2111 skb_shinfo(skb)->gso_size = 0; 2112 goto out; 2113 } 2114 2115 rcu_read_lock(); 2116 list_for_each_entry_rcu(ptype, head, list) { 2117 if (ptype->type != type || ptype->dev || !ptype->gro_complete) 2118 continue; 2119 2120 err = ptype->gro_complete(skb); 2121 break; 2122 } 2123 rcu_read_unlock(); 2124 2125 if (err) { 2126 WARN_ON(&ptype->list == head); 2127 kfree_skb(skb); 2128 return NET_RX_SUCCESS; 2129 } 2130 2131out: 2132 return netif_receive_skb(skb); 2133} 2134 2135void BCMFASTPATH_HOST napi_gro_flush(struct net_device *gro_dev) 2136{ 2137 struct sk_buff *skb, *next; 2138 2139 for (skb = gro_dev->gro_list; skb; skb = next) { 2140 next = skb->next; 2141 skb->next = NULL; 2142 napi_gro_complete(skb); 2143 } 2144 2145 gro_dev->gro_count = 0; 2146 gro_dev->gro_list = NULL; 2147} 2148EXPORT_SYMBOL(napi_gro_flush); 2149 2150void * BCMFASTPATH_HOST skb_gro_header(struct sk_buff *skb, unsigned int hlen) 2151{ 2152 unsigned int offset = skb_gro_offset(skb); 2153 2154 hlen += offset; 2155 if (hlen <= skb_headlen(skb)) 2156 return skb->data + offset; 2157 2158 if (unlikely(!skb_shinfo(skb)->nr_frags || 2159 skb_shinfo(skb)->frags[0].size <= 2160 hlen - skb_headlen(skb) || 2161 PageHighMem(skb_shinfo(skb)->frags[0].page))) 2162 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; 2163 2164 return page_address(skb_shinfo(skb)->frags[0].page) + 2165 skb_shinfo(skb)->frags[0].page_offset + 2166 offset - skb_headlen(skb); 2167} 2168EXPORT_SYMBOL(skb_gro_header); 2169 2170int skb_gro_len_max = 0; 2171 2172int BCMFASTPATH_HOST dev_gro_receive(struct net_device *gro_dev, struct sk_buff *skb) 2173{ 2174 struct sk_buff **pp = NULL; 2175 struct packet_type *ptype; 2176 __be16 type = skb->protocol; 2177 struct list_head *head = &ptype_base[ntohs(type) & 15]; 2178 int same_flow; 2179 int mac_len; 2180 int ret; 2181 2182 if (type != ntohs(ETH_P_IP)) 2183 goto normal; 2184 2185 if (!(skb->dev->features & NETIF_F_GRO)) 2186 goto normal; 2187 2188 if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) 2189 goto normal; 2190 2191 rcu_read_lock(); 2192 list_for_each_entry_rcu(ptype, head, list) { 2193 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2194 continue; 2195 2196 skb_set_network_header(skb, skb_gro_offset(skb)); 2197 mac_len = skb->network_header - skb->mac_header; 2198 skb->mac_len = mac_len; 2199 NAPI_GRO_CB(skb)->same_flow = 0; 2200 NAPI_GRO_CB(skb)->flush = 0; 2201 NAPI_GRO_CB(skb)->free = 0; 2202 2203 pp = ptype->gro_receive(&gro_dev->gro_list, skb); 2204 break; 2205 } 2206 rcu_read_unlock(); 2207 2208 if (&ptype->list == head) 2209 goto normal; 2210 2211 same_flow = NAPI_GRO_CB(skb)->same_flow; 2212 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; 2213 2214 if (pp) { 2215 struct sk_buff *nskb = *pp; 2216 2217 *pp = nskb->next; 2218 nskb->next = NULL; 2219 napi_gro_complete(nskb); 2220 gro_dev->gro_count--; 2221 } 2222 2223 if (same_flow) 2224 goto ok; 2225 2226 if (NAPI_GRO_CB(skb)->flush || gro_dev->gro_count >= MAX_GRO_SKBS || 2227 (skb_gro_len_max && skb_gro_len(skb) > skb_gro_len_max)) { 2228 napi_gro_flush(gro_dev); 2229 goto normal; 2230 } 2231 2232 gro_dev->gro_count++; 2233 NAPI_GRO_CB(skb)->count = 1; 2234 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 2235 skb->next = gro_dev->gro_list; 2236 gro_dev->gro_list = skb; 2237 ret = GRO_HELD; 2238 2239pull: 2240 if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { 2241 if (gro_dev->gro_list == skb) 2242 gro_dev->gro_list = skb->next; 2243 ret = GRO_DROP; 2244 } 2245 2246ok: 2247 return ret; 2248 2249normal: 2250 ret = GRO_NORMAL; 2251 goto pull; 2252} 2253EXPORT_SYMBOL(dev_gro_receive); 2254 2255static int BCMFASTPATH_HOST __napi_gro_receive(struct net_device *gro_dev, struct sk_buff *skb) 2256{ 2257 struct sk_buff *p; 2258 2259 for (p = gro_dev->gro_list; p; p = p->next) { 2260 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) 2261 && !compare_ether_header(skb_mac_header(p), 2262 skb_gro_mac_header(skb)); 2263 NAPI_GRO_CB(p)->flush = 0; 2264 } 2265 2266 return dev_gro_receive(gro_dev, skb); 2267} 2268 2269int BCMFASTPATH_HOST napi_skb_finish(int ret, struct sk_buff *skb) 2270{ 2271 int err = NET_RX_SUCCESS; 2272 2273 switch (ret) { 2274 case GRO_NORMAL: 2275 return netif_receive_skb(skb); 2276 2277 case GRO_DROP: 2278 err = NET_RX_DROP; 2279 /* fall through */ 2280 2281 case GRO_MERGED_FREE: 2282 kfree_skb(skb); 2283 break; 2284 } 2285 2286 return err; 2287} 2288EXPORT_SYMBOL(napi_skb_finish); 2289 2290int BCMFASTPATH_HOST napi_gro_receive(struct net_device *gro_dev, struct sk_buff *skb) 2291{ 2292 skb_gro_reset_offset(skb); 2293 2294 return napi_skb_finish(__napi_gro_receive(gro_dev, skb), skb); 2295} 2296EXPORT_SYMBOL(napi_gro_receive); 2297#endif /* CONFIG_INET_GRO */ 2298 2299static int process_backlog(struct net_device *backlog_dev, int *budget) 2300{ 2301 int work = 0; 2302 int quota = min(backlog_dev->quota, *budget); 2303 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2304 unsigned long start_time = jiffies; 2305 2306 backlog_dev->weight = weight_p; 2307 for (;;) { 2308 struct sk_buff *skb; 2309 struct net_device *dev; 2310 2311 local_irq_disable(); 2312 skb = __skb_dequeue(&queue->input_pkt_queue); 2313 if (!skb) 2314 goto job_done; 2315 local_irq_enable(); 2316 2317 dev = skb->dev; 2318 2319#ifdef CONFIG_INET_GRO 2320 napi_gro_receive(skb->dev, skb); 2321#else 2322 netif_receive_skb(skb); 2323#endif /* CONFIG_INET_GRO */ 2324 2325 dev_put(dev); 2326 2327 work++; 2328 2329 if (work >= quota || jiffies - start_time > 1) 2330 break; 2331 2332 } 2333 2334 backlog_dev->quota -= work; 2335 *budget -= work; 2336 return -1; 2337 2338job_done: 2339 backlog_dev->quota -= work; 2340 *budget -= work; 2341 2342 list_del(&backlog_dev->poll_list); 2343 smp_mb__before_clear_bit(); 2344 netif_poll_enable(backlog_dev); 2345 2346 local_irq_enable(); 2347 return 0; 2348} 2349 2350static void net_rx_action(struct softirq_action *h) 2351{ 2352 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2353 unsigned long start_time = jiffies; 2354 int budget = netdev_budget; 2355 void *have; 2356 2357 local_irq_disable(); 2358 2359 while (!list_empty(&queue->poll_list)) { 2360 struct net_device *dev; 2361 2362 if (budget <= 0 || jiffies - start_time > 1) 2363 goto softnet_break; 2364 2365 local_irq_enable(); 2366 2367 dev = list_entry(queue->poll_list.next, 2368 struct net_device, poll_list); 2369 have = netpoll_poll_lock(dev); 2370 2371 if (dev->quota <= 0 || dev->poll(dev, &budget)) { 2372 netpoll_poll_unlock(have); 2373 local_irq_disable(); 2374 list_move_tail(&dev->poll_list, &queue->poll_list); 2375 if (dev->quota < 0) 2376 dev->quota += dev->weight; 2377 else 2378 dev->quota = dev->weight; 2379 } else { 2380 netpoll_poll_unlock(have); 2381 dev_put(dev); 2382 local_irq_disable(); 2383 } 2384 } 2385out: 2386 local_irq_enable(); 2387#ifdef CONFIG_NET_DMA 2388 /* 2389 * There may not be any more sk_buffs coming right now, so push 2390 * any pending DMA copies to hardware 2391 */ 2392 if (net_dma_client) { 2393 struct dma_chan *chan; 2394 rcu_read_lock(); 2395 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) 2396 dma_async_memcpy_issue_pending(chan); 2397 rcu_read_unlock(); 2398 } 2399#endif 2400 return; 2401 2402softnet_break: 2403 __get_cpu_var(netdev_rx_stat).time_squeeze++; 2404 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2405 goto out; 2406} 2407 2408static gifconf_func_t * gifconf_list [NPROTO]; 2409 2410/** 2411 * register_gifconf - register a SIOCGIF handler 2412 * @family: Address family 2413 * @gifconf: Function handler 2414 * 2415 * Register protocol dependent address dumping routines. The handler 2416 * that is passed must not be freed or reused until it has been replaced 2417 * by another handler. 2418 */ 2419int register_gifconf(unsigned int family, gifconf_func_t * gifconf) 2420{ 2421 if (family >= NPROTO) 2422 return -EINVAL; 2423 gifconf_list[family] = gifconf; 2424 return 0; 2425} 2426 2427 2428/* 2429 * Map an interface index to its name (SIOCGIFNAME) 2430 */ 2431 2432/* 2433 * We need this ioctl for efficient implementation of the 2434 * if_indextoname() function required by the IPv6 API. Without 2435 * it, we would have to search all the interfaces to find a 2436 * match. --pb 2437 */ 2438 2439static int dev_ifname(struct ifreq __user *arg) 2440{ 2441 struct net_device *dev; 2442 struct ifreq ifr; 2443 2444 /* 2445 * Fetch the caller's info block. 2446 */ 2447 2448 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 2449 return -EFAULT; 2450 2451 read_lock(&dev_base_lock); 2452 dev = __dev_get_by_index(ifr.ifr_ifindex); 2453 if (!dev) { 2454 read_unlock(&dev_base_lock); 2455 return -ENODEV; 2456 } 2457 2458 strcpy(ifr.ifr_name, dev->name); 2459 read_unlock(&dev_base_lock); 2460 2461 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 2462 return -EFAULT; 2463 return 0; 2464} 2465 2466/* 2467 * Perform a SIOCGIFCONF call. This structure will change 2468 * size eventually, and there is nothing I can do about it. 2469 * Thus we will need a 'compatibility mode'. 2470 */ 2471 2472static int dev_ifconf(char __user *arg) 2473{ 2474 struct ifconf ifc; 2475 struct net_device *dev; 2476 char __user *pos; 2477 int len; 2478 int total; 2479 int i; 2480 2481 /* 2482 * Fetch the caller's info block. 2483 */ 2484 2485 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 2486 return -EFAULT; 2487 2488 pos = ifc.ifc_buf; 2489 len = ifc.ifc_len; 2490 2491 /* 2492 * Loop over the interfaces, and write an info block for each. 2493 */ 2494 2495 total = 0; 2496 for_each_netdev(dev) { 2497 for (i = 0; i < NPROTO; i++) { 2498 if (gifconf_list[i]) { 2499 int done; 2500 if (!pos) 2501 done = gifconf_list[i](dev, NULL, 0); 2502 else 2503 done = gifconf_list[i](dev, pos + total, 2504 len - total); 2505 if (done < 0) 2506 return -EFAULT; 2507 total += done; 2508 } 2509 } 2510 } 2511 2512 /* 2513 * All done. Write the updated control block back to the caller. 2514 */ 2515 ifc.ifc_len = total; 2516 2517 /* 2518 * Both BSD and Solaris return 0 here, so we do too. 2519 */ 2520 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; 2521} 2522 2523#ifdef CONFIG_PROC_FS 2524/* 2525 * This is invoked by the /proc filesystem handler to display a device 2526 * in detail. 2527 */ 2528void *dev_seq_start(struct seq_file *seq, loff_t *pos) 2529{ 2530 loff_t off; 2531 struct net_device *dev; 2532 2533 read_lock(&dev_base_lock); 2534 if (!*pos) 2535 return SEQ_START_TOKEN; 2536 2537 off = 1; 2538 for_each_netdev(dev) 2539 if (off++ == *pos) 2540 return dev; 2541 2542 return NULL; 2543} 2544 2545void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2546{ 2547 ++*pos; 2548 return v == SEQ_START_TOKEN ? 2549 first_net_device() : next_net_device((struct net_device *)v); 2550} 2551 2552void dev_seq_stop(struct seq_file *seq, void *v) 2553{ 2554 read_unlock(&dev_base_lock); 2555} 2556 2557static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 2558{ 2559 struct net_device_stats *stats = dev->get_stats(dev); 2560 2561 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 2562 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 2563 dev->name, stats->rx_bytes, stats->rx_packets, 2564 stats->rx_errors, 2565 stats->rx_dropped + stats->rx_missed_errors, 2566 stats->rx_fifo_errors, 2567 stats->rx_length_errors + stats->rx_over_errors + 2568 stats->rx_crc_errors + stats->rx_frame_errors, 2569 stats->rx_compressed, stats->multicast, 2570 stats->tx_bytes, stats->tx_packets, 2571 stats->tx_errors, stats->tx_dropped, 2572 stats->tx_fifo_errors, stats->collisions, 2573 stats->tx_carrier_errors + 2574 stats->tx_aborted_errors + 2575 stats->tx_window_errors + 2576 stats->tx_heartbeat_errors, 2577 stats->tx_compressed); 2578} 2579 2580/* 2581 * Called from the PROCfs module. This now uses the new arbitrary sized 2582 * /proc/net interface to create /proc/net/dev 2583 */ 2584static int dev_seq_show(struct seq_file *seq, void *v) 2585{ 2586 if (v == SEQ_START_TOKEN) 2587 seq_puts(seq, "Inter-| Receive " 2588 " | Transmit\n" 2589 " face |bytes packets errs drop fifo frame " 2590 "compressed multicast|bytes packets errs " 2591 "drop fifo colls carrier compressed\n"); 2592 else 2593 dev_seq_printf_stats(seq, v); 2594 return 0; 2595} 2596 2597static struct netif_rx_stats *softnet_get_online(loff_t *pos) 2598{ 2599 struct netif_rx_stats *rc = NULL; 2600 2601 while (*pos < NR_CPUS) 2602 if (cpu_online(*pos)) { 2603 rc = &per_cpu(netdev_rx_stat, *pos); 2604 break; 2605 } else 2606 ++*pos; 2607 return rc; 2608} 2609 2610static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 2611{ 2612 return softnet_get_online(pos); 2613} 2614 2615static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2616{ 2617 ++*pos; 2618 return softnet_get_online(pos); 2619} 2620 2621static void softnet_seq_stop(struct seq_file *seq, void *v) 2622{ 2623} 2624 2625static int softnet_seq_show(struct seq_file *seq, void *v) 2626{ 2627 struct netif_rx_stats *s = v; 2628 2629 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 2630 s->total, s->dropped, s->time_squeeze, 0, 2631 0, 0, 0, 0, /* was fastroute */ 2632 s->cpu_collision ); 2633 return 0; 2634} 2635 2636static const struct seq_operations dev_seq_ops = { 2637 .start = dev_seq_start, 2638 .next = dev_seq_next, 2639 .stop = dev_seq_stop, 2640 .show = dev_seq_show, 2641}; 2642 2643static int dev_seq_open(struct inode *inode, struct file *file) 2644{ 2645 return seq_open(file, &dev_seq_ops); 2646} 2647 2648static const struct file_operations dev_seq_fops = { 2649 .owner = THIS_MODULE, 2650 .open = dev_seq_open, 2651 .read = seq_read, 2652 .llseek = seq_lseek, 2653 .release = seq_release, 2654}; 2655 2656static const struct seq_operations softnet_seq_ops = { 2657 .start = softnet_seq_start, 2658 .next = softnet_seq_next, 2659 .stop = softnet_seq_stop, 2660 .show = softnet_seq_show, 2661}; 2662 2663static int softnet_seq_open(struct inode *inode, struct file *file) 2664{ 2665 return seq_open(file, &softnet_seq_ops); 2666} 2667 2668static const struct file_operations softnet_seq_fops = { 2669 .owner = THIS_MODULE, 2670 .open = softnet_seq_open, 2671 .read = seq_read, 2672 .llseek = seq_lseek, 2673 .release = seq_release, 2674}; 2675 2676static void *ptype_get_idx(loff_t pos) 2677{ 2678 struct packet_type *pt = NULL; 2679 loff_t i = 0; 2680 int t; 2681 2682 list_for_each_entry_rcu(pt, &ptype_all, list) { 2683 if (i == pos) 2684 return pt; 2685 ++i; 2686 } 2687 2688 for (t = 0; t < 16; t++) { 2689 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 2690 if (i == pos) 2691 return pt; 2692 ++i; 2693 } 2694 } 2695 return NULL; 2696} 2697 2698static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 2699{ 2700 rcu_read_lock(); 2701 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; 2702} 2703 2704static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2705{ 2706 struct packet_type *pt; 2707 struct list_head *nxt; 2708 int hash; 2709 2710 ++*pos; 2711 if (v == SEQ_START_TOKEN) 2712 return ptype_get_idx(0); 2713 2714 pt = v; 2715 nxt = pt->list.next; 2716 if (pt->type == htons(ETH_P_ALL)) { 2717 if (nxt != &ptype_all) 2718 goto found; 2719 hash = 0; 2720 nxt = ptype_base[0].next; 2721 } else 2722 hash = ntohs(pt->type) & 15; 2723 2724 while (nxt == &ptype_base[hash]) { 2725 if (++hash >= 16) 2726 return NULL; 2727 nxt = ptype_base[hash].next; 2728 } 2729found: 2730 return list_entry(nxt, struct packet_type, list); 2731} 2732 2733static void ptype_seq_stop(struct seq_file *seq, void *v) 2734{ 2735 rcu_read_unlock(); 2736} 2737 2738static void ptype_seq_decode(struct seq_file *seq, void *sym) 2739{ 2740#ifdef CONFIG_KALLSYMS 2741 unsigned long offset = 0, symsize; 2742 const char *symname; 2743 char *modname; 2744 char namebuf[128]; 2745 2746 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, 2747 &modname, namebuf); 2748 2749 if (symname) { 2750 char *delim = ":"; 2751 2752 if (!modname) 2753 modname = delim = ""; 2754 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, 2755 symname, offset); 2756 return; 2757 } 2758#endif 2759 2760 seq_printf(seq, "[%p]", sym); 2761} 2762 2763static int ptype_seq_show(struct seq_file *seq, void *v) 2764{ 2765 struct packet_type *pt = v; 2766 2767 if (v == SEQ_START_TOKEN) 2768 seq_puts(seq, "Type Device Function\n"); 2769 else { 2770 if (pt->type == htons(ETH_P_ALL)) 2771 seq_puts(seq, "ALL "); 2772 else 2773 seq_printf(seq, "%04x", ntohs(pt->type)); 2774 2775 seq_printf(seq, " %-8s ", 2776 pt->dev ? pt->dev->name : ""); 2777 ptype_seq_decode(seq, pt->func); 2778 seq_putc(seq, '\n'); 2779 } 2780 2781 return 0; 2782} 2783 2784static const struct seq_operations ptype_seq_ops = { 2785 .start = ptype_seq_start, 2786 .next = ptype_seq_next, 2787 .stop = ptype_seq_stop, 2788 .show = ptype_seq_show, 2789}; 2790 2791static int ptype_seq_open(struct inode *inode, struct file *file) 2792{ 2793 return seq_open(file, &ptype_seq_ops); 2794} 2795 2796static const struct file_operations ptype_seq_fops = { 2797 .owner = THIS_MODULE, 2798 .open = ptype_seq_open, 2799 .read = seq_read, 2800 .llseek = seq_lseek, 2801 .release = seq_release, 2802}; 2803 2804 2805static int __init dev_proc_init(void) 2806{ 2807 int rc = -ENOMEM; 2808 2809 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) 2810 goto out; 2811 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) 2812 goto out_dev; 2813 if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) 2814 goto out_dev2; 2815 2816 if (wext_proc_init()) 2817 goto out_softnet; 2818 rc = 0; 2819out: 2820 return rc; 2821out_softnet: 2822 proc_net_remove("ptype"); 2823out_dev2: 2824 proc_net_remove("softnet_stat"); 2825out_dev: 2826 proc_net_remove("dev"); 2827 goto out; 2828} 2829#else 2830#define dev_proc_init() 0 2831#endif /* CONFIG_PROC_FS */ 2832 2833 2834/** 2835 * netdev_set_master - set up master/slave pair 2836 * @slave: slave device 2837 * @master: new master device 2838 * 2839 * Changes the master device of the slave. Pass %NULL to break the 2840 * bonding. The caller must hold the RTNL semaphore. On a failure 2841 * a negative errno code is returned. On success the reference counts 2842 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 2843 * function returns zero. 2844 */ 2845int netdev_set_master(struct net_device *slave, struct net_device *master) 2846{ 2847 struct net_device *old = slave->master; 2848 2849 ASSERT_RTNL(); 2850 2851 if (master) { 2852 if (old) 2853 return -EBUSY; 2854 dev_hold(master); 2855 } 2856 2857 slave->master = master; 2858 2859 synchronize_net(); 2860 2861 if (old) 2862 dev_put(old); 2863 2864 if (master) 2865 slave->flags |= IFF_SLAVE; 2866 else 2867 slave->flags &= ~IFF_SLAVE; 2868 2869 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 2870 return 0; 2871} 2872 2873/** 2874 * dev_set_promiscuity - update promiscuity count on a device 2875 * @dev: device 2876 * @inc: modifier 2877 * 2878 * Add or remove promiscuity from a device. While the count in the device 2879 * remains above zero the interface remains promiscuous. Once it hits zero 2880 * the device reverts back to normal filtering operation. A negative inc 2881 * value is used to drop promiscuity on the device. 2882 */ 2883void dev_set_promiscuity(struct net_device *dev, int inc) 2884{ 2885 unsigned short old_flags = dev->flags; 2886 2887 if ((dev->promiscuity += inc) == 0) 2888 dev->flags &= ~IFF_PROMISC; 2889 else 2890 dev->flags |= IFF_PROMISC; 2891 if (dev->flags != old_flags) { 2892 dev_mc_upload(dev); 2893 printk(KERN_INFO "device %s %s promiscuous mode\n", 2894 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2895 "left"); 2896 audit_log(current->audit_context, GFP_ATOMIC, 2897 AUDIT_ANOM_PROMISCUOUS, 2898 "dev=%s prom=%d old_prom=%d auid=%u", 2899 dev->name, (dev->flags & IFF_PROMISC), 2900 (old_flags & IFF_PROMISC), 2901 audit_get_loginuid(current->audit_context)); 2902 } 2903} 2904 2905/** 2906 * dev_set_allmulti - update allmulti count on a device 2907 * @dev: device 2908 * @inc: modifier 2909 * 2910 * Add or remove reception of all multicast frames to a device. While the 2911 * count in the device remains above zero the interface remains listening 2912 * to all interfaces. Once it hits zero the device reverts back to normal 2913 * filtering operation. A negative @inc value is used to drop the counter 2914 * when releasing a resource needing all multicasts. 2915 */ 2916 2917void dev_set_allmulti(struct net_device *dev, int inc) 2918{ 2919 unsigned short old_flags = dev->flags; 2920 2921 dev->flags |= IFF_ALLMULTI; 2922 if ((dev->allmulti += inc) == 0) 2923 dev->flags &= ~IFF_ALLMULTI; 2924 if (dev->flags ^ old_flags) 2925 dev_mc_upload(dev); 2926} 2927 2928unsigned dev_get_flags(const struct net_device *dev) 2929{ 2930 unsigned flags; 2931 2932 flags = (dev->flags & ~(IFF_PROMISC | 2933 IFF_ALLMULTI | 2934 IFF_RUNNING | 2935 IFF_LOWER_UP | 2936 IFF_DORMANT)) | 2937 (dev->gflags & (IFF_PROMISC | 2938 IFF_ALLMULTI)); 2939 2940 if (netif_running(dev)) { 2941 if (netif_oper_up(dev)) 2942 flags |= IFF_RUNNING; 2943 if (netif_carrier_ok(dev)) 2944 flags |= IFF_LOWER_UP; 2945 if (netif_dormant(dev)) 2946 flags |= IFF_DORMANT; 2947 } 2948 2949 return flags; 2950} 2951 2952int dev_change_flags(struct net_device *dev, unsigned flags) 2953{ 2954 int ret, changes; 2955 int old_flags = dev->flags; 2956 2957 /* 2958 * Set the flags on our device. 2959 */ 2960 2961 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | 2962 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | 2963 IFF_AUTOMEDIA)) | 2964 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | 2965 IFF_ALLMULTI)); 2966 2967 /* 2968 * Load in the correct multicast list now the flags have changed. 2969 */ 2970 2971 dev_mc_upload(dev); 2972 2973 /* 2974 * Have we downed the interface. We handle IFF_UP ourselves 2975 * according to user attempts to set it, rather than blindly 2976 * setting it. 2977 */ 2978 2979 ret = 0; 2980 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 2981 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 2982 2983 if (!ret) 2984 dev_mc_upload(dev); 2985 } 2986 2987 if (dev->flags & IFF_UP && 2988 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | 2989 IFF_VOLATILE))) 2990 raw_notifier_call_chain(&netdev_chain, 2991 NETDEV_CHANGE, dev); 2992 2993 if ((flags ^ dev->gflags) & IFF_PROMISC) { 2994 int inc = (flags & IFF_PROMISC) ? +1 : -1; 2995 dev->gflags ^= IFF_PROMISC; 2996 dev_set_promiscuity(dev, inc); 2997 } 2998 2999 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 3000 is important. Some (broken) drivers set IFF_PROMISC, when 3001 IFF_ALLMULTI is requested not asking us and not reporting. 3002 */ 3003 if ((flags ^ dev->gflags) & IFF_ALLMULTI) { 3004 int inc = (flags & IFF_ALLMULTI) ? +1 : -1; 3005 dev->gflags ^= IFF_ALLMULTI; 3006 dev_set_allmulti(dev, inc); 3007 } 3008 3009 /* Exclude state transition flags, already notified */ 3010 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); 3011 if (changes) 3012 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 3013 3014 return ret; 3015} 3016 3017int dev_set_mtu(struct net_device *dev, int new_mtu) 3018{ 3019 int err; 3020 3021 if (new_mtu == dev->mtu) 3022 return 0; 3023 3024 /* MTU must be positive. */ 3025 if (new_mtu < 0) 3026 return -EINVAL; 3027 3028 if (!netif_device_present(dev)) 3029 return -ENODEV; 3030 3031 err = 0; 3032 if (dev->change_mtu) 3033 err = dev->change_mtu(dev, new_mtu); 3034 else 3035 dev->mtu = new_mtu; 3036 if (!err && dev->flags & IFF_UP) 3037 raw_notifier_call_chain(&netdev_chain, 3038 NETDEV_CHANGEMTU, dev); 3039 return err; 3040} 3041 3042int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 3043{ 3044 int err; 3045 3046 if (!dev->set_mac_address) 3047 return -EOPNOTSUPP; 3048 if (sa->sa_family != dev->type) 3049 return -EINVAL; 3050 if (!netif_device_present(dev)) 3051 return -ENODEV; 3052 err = dev->set_mac_address(dev, sa); 3053 if (!err) 3054 raw_notifier_call_chain(&netdev_chain, 3055 NETDEV_CHANGEADDR, dev); 3056 return err; 3057} 3058 3059/* 3060 * Perform the SIOCxIFxxx calls. 3061 */ 3062static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) 3063{ 3064 int err; 3065 struct net_device *dev = __dev_get_by_name(ifr->ifr_name); 3066 3067 if (!dev) 3068 return -ENODEV; 3069 3070 switch (cmd) { 3071 case SIOCGIFFLAGS: /* Get interface flags */ 3072 ifr->ifr_flags = dev_get_flags(dev); 3073 return 0; 3074 3075 case SIOCSIFFLAGS: /* Set interface flags */ 3076 return dev_change_flags(dev, ifr->ifr_flags); 3077 3078 case SIOCGIFMETRIC: /* Get the metric on the interface 3079 (currently unused) */ 3080 ifr->ifr_metric = 0; 3081 return 0; 3082 3083 case SIOCSIFMETRIC: /* Set the metric on the interface 3084 (currently unused) */ 3085 return -EOPNOTSUPP; 3086 3087 case SIOCGIFMTU: /* Get the MTU of a device */ 3088 ifr->ifr_mtu = dev->mtu; 3089 return 0; 3090 3091 case SIOCSIFMTU: /* Set the MTU of a device */ 3092 return dev_set_mtu(dev, ifr->ifr_mtu); 3093 3094 case SIOCGIFHWADDR: 3095 if (!dev->addr_len) 3096 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); 3097 else 3098 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 3099 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3100 ifr->ifr_hwaddr.sa_family = dev->type; 3101 return 0; 3102 3103 case SIOCSIFHWADDR: 3104 return dev_set_mac_address(dev, &ifr->ifr_hwaddr); 3105 3106 case SIOCSIFHWBROADCAST: 3107 if (ifr->ifr_hwaddr.sa_family != dev->type) 3108 return -EINVAL; 3109 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, 3110 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3111 raw_notifier_call_chain(&netdev_chain, 3112 NETDEV_CHANGEADDR, dev); 3113 return 0; 3114 3115 case SIOCGIFMAP: 3116 ifr->ifr_map.mem_start = dev->mem_start; 3117 ifr->ifr_map.mem_end = dev->mem_end; 3118 ifr->ifr_map.base_addr = dev->base_addr; 3119 ifr->ifr_map.irq = dev->irq; 3120 ifr->ifr_map.dma = dev->dma; 3121 ifr->ifr_map.port = dev->if_port; 3122 return 0; 3123 3124 case SIOCSIFMAP: 3125 if (dev->set_config) { 3126 if (!netif_device_present(dev)) 3127 return -ENODEV; 3128 return dev->set_config(dev, &ifr->ifr_map); 3129 } 3130 return -EOPNOTSUPP; 3131 3132 case SIOCADDMULTI: 3133 if (!dev->set_multicast_list || 3134 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3135 return -EINVAL; 3136 if (!netif_device_present(dev)) 3137 return -ENODEV; 3138 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 3139 dev->addr_len, 1); 3140 3141 case SIOCDELMULTI: 3142 if (!dev->set_multicast_list || 3143 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3144 return -EINVAL; 3145 if (!netif_device_present(dev)) 3146 return -ENODEV; 3147 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 3148 dev->addr_len, 1); 3149 3150 case SIOCGIFINDEX: 3151 ifr->ifr_ifindex = dev->ifindex; 3152 return 0; 3153 3154 case SIOCGIFTXQLEN: 3155 ifr->ifr_qlen = dev->tx_queue_len; 3156 return 0; 3157 3158 case SIOCSIFTXQLEN: 3159 if (ifr->ifr_qlen < 0) 3160 return -EINVAL; 3161 dev->tx_queue_len = ifr->ifr_qlen; 3162 return 0; 3163 3164 case SIOCSIFNAME: 3165 ifr->ifr_newname[IFNAMSIZ-1] = '\0'; 3166 return dev_change_name(dev, ifr->ifr_newname); 3167 3168 /* 3169 * Unknown or private ioctl 3170 */ 3171 3172 default: 3173 if ((cmd >= SIOCDEVPRIVATE && 3174 cmd <= SIOCDEVPRIVATE + 15) || 3175 cmd == SIOCBONDENSLAVE || 3176 cmd == SIOCBONDRELEASE || 3177 cmd == SIOCBONDSETHWADDR || 3178 cmd == SIOCBONDSLAVEINFOQUERY || 3179 cmd == SIOCBONDINFOQUERY || 3180 cmd == SIOCBONDCHANGEACTIVE || 3181 cmd == SIOCGMIIPHY || 3182 cmd == SIOCGMIIREG || 3183 cmd == SIOCSMIIREG || 3184 cmd == SIOCBRADDIF || 3185 cmd == SIOCBRDELIF || 3186 cmd == SIOCWANDEV) { 3187 err = -EOPNOTSUPP; 3188 if (dev->do_ioctl) { 3189 if (netif_device_present(dev)) 3190 err = dev->do_ioctl(dev, ifr, 3191 cmd); 3192 else 3193 err = -ENODEV; 3194 } 3195 } else 3196 err = -EINVAL; 3197 3198 } 3199 return err; 3200} 3201 3202/* 3203 * This function handles all "interface"-type I/O control requests. The actual 3204 * 'doing' part of this is dev_ifsioc above. 3205 */ 3206 3207/** 3208 * dev_ioctl - network device ioctl 3209 * @cmd: command to issue 3210 * @arg: pointer to a struct ifreq in user space 3211 * 3212 * Issue ioctl functions to devices. This is normally called by the 3213 * user space syscall interfaces but can sometimes be useful for 3214 * other purposes. The return value is the return from the syscall if 3215 * positive or a negative errno code on error. 3216 */ 3217 3218int dev_ioctl(unsigned int cmd, void __user *arg) 3219{ 3220 struct ifreq ifr; 3221 int ret; 3222 char *colon; 3223 3224 /* One special case: SIOCGIFCONF takes ifconf argument 3225 and requires shared lock, because it sleeps writing 3226 to user space. 3227 */ 3228 3229 if (cmd == SIOCGIFCONF) { 3230 rtnl_lock(); 3231 ret = dev_ifconf((char __user *) arg); 3232 rtnl_unlock(); 3233 return ret; 3234 } 3235 if (cmd == SIOCGIFNAME) 3236 return dev_ifname((struct ifreq __user *)arg); 3237 3238 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3239 return -EFAULT; 3240 3241 ifr.ifr_name[IFNAMSIZ-1] = 0; 3242 3243 colon = strchr(ifr.ifr_name, ':'); 3244 if (colon) 3245 *colon = 0; 3246 3247 /* 3248 * See which interface the caller is talking about. 3249 */ 3250 3251 switch (cmd) { 3252 /* 3253 * These ioctl calls: 3254 * - can be done by all. 3255 * - atomic and do not require locking. 3256 * - return a value 3257 */ 3258 case SIOCGIFFLAGS: 3259 case SIOCGIFMETRIC: 3260 case SIOCGIFMTU: 3261 case SIOCGIFHWADDR: 3262 case SIOCGIFSLAVE: 3263 case SIOCGIFMAP: 3264 case SIOCGIFINDEX: 3265 case SIOCGIFTXQLEN: 3266 dev_load(ifr.ifr_name); 3267 read_lock(&dev_base_lock); 3268 ret = dev_ifsioc(&ifr, cmd); 3269 read_unlock(&dev_base_lock); 3270 if (!ret) { 3271 if (colon) 3272 *colon = ':'; 3273 if (copy_to_user(arg, &ifr, 3274 sizeof(struct ifreq))) 3275 ret = -EFAULT; 3276 } 3277 return ret; 3278 3279 case SIOCETHTOOL: 3280 dev_load(ifr.ifr_name); 3281 rtnl_lock(); 3282 ret = dev_ethtool(&ifr); 3283 rtnl_unlock(); 3284 if (!ret) { 3285 if (colon) 3286 *colon = ':'; 3287 if (copy_to_user(arg, &ifr, 3288 sizeof(struct ifreq))) 3289 ret = -EFAULT; 3290 } 3291 return ret; 3292 3293 /* 3294 * These ioctl calls: 3295 * - require superuser power. 3296 * - require strict serialization. 3297 * - return a value 3298 */ 3299 case SIOCGMIIPHY: 3300 case SIOCGMIIREG: 3301 case SIOCSIFNAME: 3302 if (!capable(CAP_NET_ADMIN)) 3303 return -EPERM; 3304 dev_load(ifr.ifr_name); 3305 rtnl_lock(); 3306 ret = dev_ifsioc(&ifr, cmd); 3307 rtnl_unlock(); 3308 if (!ret) { 3309 if (colon) 3310 *colon = ':'; 3311 if (copy_to_user(arg, &ifr, 3312 sizeof(struct ifreq))) 3313 ret = -EFAULT; 3314 } 3315 return ret; 3316 3317 /* 3318 * These ioctl calls: 3319 * - require superuser power. 3320 * - require strict serialization. 3321 * - do not return a value 3322 */ 3323 case SIOCSIFFLAGS: 3324 case SIOCSIFMETRIC: 3325 case SIOCSIFMTU: 3326 case SIOCSIFMAP: 3327 case SIOCSIFHWADDR: 3328 case SIOCSIFSLAVE: 3329 case SIOCADDMULTI: 3330 case SIOCDELMULTI: 3331 case SIOCSIFHWBROADCAST: 3332 case SIOCSIFTXQLEN: 3333 case SIOCSMIIREG: 3334 case SIOCBONDENSLAVE: 3335 case SIOCBONDRELEASE: 3336 case SIOCBONDSETHWADDR: 3337 case SIOCBONDCHANGEACTIVE: 3338 case SIOCBRADDIF: 3339 case SIOCBRDELIF: 3340 if (!capable(CAP_NET_ADMIN)) 3341 return -EPERM; 3342 /* fall through */ 3343 case SIOCBONDSLAVEINFOQUERY: 3344 case SIOCBONDINFOQUERY: 3345 dev_load(ifr.ifr_name); 3346 rtnl_lock(); 3347 ret = dev_ifsioc(&ifr, cmd); 3348 rtnl_unlock(); 3349 return ret; 3350 3351 case SIOCGIFMEM: 3352 /* Get the per device memory space. We can add this but 3353 * currently do not support it */ 3354 case SIOCSIFMEM: 3355 /* Set the per device memory buffer space. 3356 * Not applicable in our case */ 3357 case SIOCSIFLINK: 3358 return -EINVAL; 3359 3360 /* 3361 * Unknown or private ioctl. 3362 */ 3363 default: 3364 if (cmd == SIOCWANDEV || 3365 (cmd >= SIOCDEVPRIVATE && 3366 cmd <= SIOCDEVPRIVATE + 15)) { 3367 dev_load(ifr.ifr_name); 3368 rtnl_lock(); 3369 ret = dev_ifsioc(&ifr, cmd); 3370 rtnl_unlock(); 3371 if (!ret && copy_to_user(arg, &ifr, 3372 sizeof(struct ifreq))) 3373 ret = -EFAULT; 3374 return ret; 3375 } 3376 /* Take care of Wireless Extensions */ 3377 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) 3378 return wext_handle_ioctl(&ifr, cmd, arg); 3379 return -EINVAL; 3380 } 3381} 3382 3383 3384/** 3385 * dev_new_index - allocate an ifindex 3386 * 3387 * Returns a suitable unique value for a new device interface 3388 * number. The caller must hold the rtnl semaphore or the 3389 * dev_base_lock to be sure it remains unique. 3390 */ 3391static int dev_new_index(void) 3392{ 3393 static int ifindex; 3394 for (;;) { 3395 if (++ifindex <= 0) 3396 ifindex = 1; 3397 if (!__dev_get_by_index(ifindex)) 3398 return ifindex; 3399 } 3400} 3401 3402static int dev_boot_phase = 1; 3403 3404/* Delayed registration/unregisteration */ 3405static DEFINE_SPINLOCK(net_todo_list_lock); 3406static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); 3407 3408static void net_set_todo(struct net_device *dev) 3409{ 3410 spin_lock(&net_todo_list_lock); 3411 list_add_tail(&dev->todo_list, &net_todo_list); 3412 spin_unlock(&net_todo_list_lock); 3413} 3414 3415/** 3416 * register_netdevice - register a network device 3417 * @dev: device to register 3418 * 3419 * Take a completed network device structure and add it to the kernel 3420 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 3421 * chain. 0 is returned on success. A negative errno code is returned 3422 * on a failure to set up the device, or if the name is a duplicate. 3423 * 3424 * Callers must hold the rtnl semaphore. You may want 3425 * register_netdev() instead of this. 3426 * 3427 * BUGS: 3428 * The locking appears insufficient to guarantee two parallel registers 3429 * will not get the same name. 3430 */ 3431 3432int register_netdevice(struct net_device *dev) 3433{ 3434 struct hlist_head *head; 3435 struct hlist_node *p; 3436 int ret; 3437 3438 BUG_ON(dev_boot_phase); 3439 ASSERT_RTNL(); 3440 3441 might_sleep(); 3442 3443 /* When net_device's are persistent, this will be fatal. */ 3444 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 3445 3446 spin_lock_init(&dev->queue_lock); 3447 spin_lock_init(&dev->_xmit_lock); 3448 netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); 3449 dev->xmit_lock_owner = -1; 3450 spin_lock_init(&dev->ingress_lock); 3451 3452 dev->iflink = -1; 3453 3454 /* Init, if this function is available */ 3455 if (dev->init) { 3456 ret = dev->init(dev); 3457 if (ret) { 3458 if (ret > 0) 3459 ret = -EIO; 3460 goto out; 3461 } 3462 } 3463 3464 if (!dev_valid_name(dev->name)) { 3465 ret = -EINVAL; 3466 goto out; 3467 } 3468 3469 dev->ifindex = dev_new_index(); 3470 if (dev->iflink == -1) 3471 dev->iflink = dev->ifindex; 3472 3473 /* Check for existence of name */ 3474 head = dev_name_hash(dev->name); 3475 hlist_for_each(p, head) { 3476 struct net_device *d 3477 = hlist_entry(p, struct net_device, name_hlist); 3478 if (!strncmp(d->name, dev->name, IFNAMSIZ)) { 3479 ret = -EEXIST; 3480 goto out; 3481 } 3482 } 3483 3484 /* Fix illegal SG+CSUM combinations. */ 3485 if ((dev->features & NETIF_F_SG) && 3486 !(dev->features & NETIF_F_ALL_CSUM)) { 3487 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", 3488 dev->name); 3489 dev->features &= ~NETIF_F_SG; 3490 } 3491 3492 /* TSO requires that SG is present as well. */ 3493 if ((dev->features & NETIF_F_TSO) && 3494 !(dev->features & NETIF_F_SG)) { 3495 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", 3496 dev->name); 3497 dev->features &= ~NETIF_F_TSO; 3498 } 3499 if (dev->features & NETIF_F_UFO) { 3500 if (!(dev->features & NETIF_F_HW_CSUM)) { 3501 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " 3502 "NETIF_F_HW_CSUM feature.\n", 3503 dev->name); 3504 dev->features &= ~NETIF_F_UFO; 3505 } 3506 if (!(dev->features & NETIF_F_SG)) { 3507 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " 3508 "NETIF_F_SG feature.\n", 3509 dev->name); 3510 dev->features &= ~NETIF_F_UFO; 3511 } 3512 } 3513 3514 /* 3515 * nil rebuild_header routine, 3516 * that should be never called and used as just bug trap. 3517 */ 3518 3519 if (!dev->rebuild_header) 3520 dev->rebuild_header = default_rebuild_header; 3521 3522 ret = netdev_register_sysfs(dev); 3523 if (ret) 3524 goto out; 3525 dev->reg_state = NETREG_REGISTERED; 3526 3527 /* 3528 * Default initial state at registry is that the 3529 * device is present. 3530 */ 3531 3532 set_bit(__LINK_STATE_PRESENT, &dev->state); 3533 3534 dev_init_scheduler(dev); 3535 write_lock_bh(&dev_base_lock); 3536 list_add_tail(&dev->dev_list, &dev_base_head); 3537 hlist_add_head(&dev->name_hlist, head); 3538 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); 3539 dev_hold(dev); 3540 write_unlock_bh(&dev_base_lock); 3541 3542 /* Notify protocols, that a new device appeared. */ 3543 raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); 3544 3545 ret = 0; 3546 3547out: 3548 return ret; 3549} 3550 3551/** 3552 * register_netdev - register a network device 3553 * @dev: device to register 3554 * 3555 * Take a completed network device structure and add it to the kernel 3556 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 3557 * chain. 0 is returned on success. A negative errno code is returned 3558 * on a failure to set up the device, or if the name is a duplicate. 3559 * 3560 * This is a wrapper around register_netdevice that takes the rtnl semaphore 3561 * and expands the device name if you passed a format string to 3562 * alloc_netdev. 3563 */ 3564int register_netdev(struct net_device *dev) 3565{ 3566 int err; 3567 3568 rtnl_lock(); 3569 3570 /* 3571 * If the name is a format string the caller wants us to do a 3572 * name allocation. 3573 */ 3574 if (strchr(dev->name, '%')) { 3575 err = dev_alloc_name(dev, dev->name); 3576 if (err < 0) 3577 goto out; 3578 } 3579 3580 err = register_netdevice(dev); 3581out: 3582 rtnl_unlock(); 3583 return err; 3584} 3585EXPORT_SYMBOL(register_netdev); 3586 3587/* 3588 * netdev_wait_allrefs - wait until all references are gone. 3589 * 3590 * This is called when unregistering network devices. 3591 * 3592 * Any protocol or device that holds a reference should register 3593 * for netdevice notification, and cleanup and put back the 3594 * reference if they receive an UNREGISTER event. 3595 * We can get stuck here if buggy protocols don't correctly 3596 * call dev_put. 3597 */ 3598static void netdev_wait_allrefs(struct net_device *dev) 3599{ 3600 unsigned long rebroadcast_time, warning_time; 3601 3602 rebroadcast_time = warning_time = jiffies; 3603 while (atomic_read(&dev->refcnt) != 0) { 3604 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 3605 rtnl_lock(); 3606 3607 /* Rebroadcast unregister notification */ 3608 raw_notifier_call_chain(&netdev_chain, 3609 NETDEV_UNREGISTER, dev); 3610 3611 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 3612 &dev->state)) { 3613 /* We must not have linkwatch events 3614 * pending on unregister. If this 3615 * happens, we simply run the queue 3616 * unscheduled, resulting in a noop 3617 * for this device. 3618 */ 3619 linkwatch_run_queue(); 3620 } 3621 3622 __rtnl_unlock(); 3623 3624 rebroadcast_time = jiffies; 3625 } 3626 3627 msleep(250); 3628 3629 if (time_after(jiffies, warning_time + 10 * HZ)) { 3630 printk(KERN_EMERG "unregister_netdevice: " 3631 "waiting for %s to become free. Usage " 3632 "count = %d\n", 3633 dev->name, atomic_read(&dev->refcnt)); 3634 warning_time = jiffies; 3635 } 3636 } 3637} 3638 3639/* The sequence is: 3640 * 3641 * rtnl_lock(); 3642 * ... 3643 * register_netdevice(x1); 3644 * register_netdevice(x2); 3645 * ... 3646 * unregister_netdevice(y1); 3647 * unregister_netdevice(y2); 3648 * ... 3649 * rtnl_unlock(); 3650 * free_netdev(y1); 3651 * free_netdev(y2); 3652 * 3653 * We are invoked by rtnl_unlock() after it drops the semaphore. 3654 * This allows us to deal with problems: 3655 * 1) We can delete sysfs objects which invoke hotplug 3656 * without deadlocking with linkwatch via keventd. 3657 * 2) Since we run with the RTNL semaphore not held, we can sleep 3658 * safely in order to wait for the netdev refcnt to drop to zero. 3659 */ 3660static DEFINE_MUTEX(net_todo_run_mutex); 3661void netdev_run_todo(void) 3662{ 3663 struct list_head list; 3664 3665 /* Need to guard against multiple cpu's getting out of order. */ 3666 mutex_lock(&net_todo_run_mutex); 3667 3668 /* Not safe to do outside the semaphore. We must not return 3669 * until all unregister events invoked by the local processor 3670 * have been completed (either by this todo run, or one on 3671 * another cpu). 3672 */ 3673 if (list_empty(&net_todo_list)) 3674 goto out; 3675 3676 /* Snapshot list, allow later requests */ 3677 spin_lock(&net_todo_list_lock); 3678 list_replace_init(&net_todo_list, &list); 3679 spin_unlock(&net_todo_list_lock); 3680 3681 while (!list_empty(&list)) { 3682 struct net_device *dev 3683 = list_entry(list.next, struct net_device, todo_list); 3684 list_del(&dev->todo_list); 3685 3686 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 3687 printk(KERN_ERR "network todo '%s' but state %d\n", 3688 dev->name, dev->reg_state); 3689 dump_stack(); 3690 continue; 3691 } 3692 3693 dev->reg_state = NETREG_UNREGISTERED; 3694 3695 netdev_wait_allrefs(dev); 3696 3697 /* paranoia */ 3698 BUG_ON(atomic_read(&dev->refcnt)); 3699 BUG_TRAP(!dev->ip_ptr); 3700 BUG_TRAP(!dev->ip6_ptr); 3701 BUG_TRAP(!dev->dn_ptr); 3702 3703 if (dev->destructor) 3704 dev->destructor(dev); 3705 3706 /* Free network device */ 3707 kobject_put(&dev->dev.kobj); 3708 } 3709 3710out: 3711 mutex_unlock(&net_todo_run_mutex); 3712} 3713 3714static struct net_device_stats *internal_stats(struct net_device *dev) 3715{ 3716 return &dev->stats; 3717} 3718 3719/** 3720 * alloc_netdev - allocate network device 3721 * @sizeof_priv: size of private data to allocate space for 3722 * @name: device name format string 3723 * @setup: callback to initialize device 3724 * 3725 * Allocates a struct net_device with private data area for driver use 3726 * and performs basic initialization. 3727 */ 3728struct net_device *alloc_netdev(int sizeof_priv, const char *name, 3729 void (*setup)(struct net_device *)) 3730{ 3731 void *p; 3732 struct net_device *dev; 3733 int alloc_size; 3734 3735 BUG_ON(strlen(name) >= sizeof(dev->name)); 3736 3737 /* ensure 32-byte alignment of both the device and private area */ 3738 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 3739 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; 3740 3741 p = kzalloc(alloc_size, GFP_KERNEL); 3742 if (!p) { 3743 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); 3744 return NULL; 3745 } 3746 3747 dev = (struct net_device *) 3748 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); 3749 dev->padded = (char *)dev - (char *)p; 3750 3751 if (sizeof_priv) 3752 dev->priv = netdev_priv(dev); 3753 3754 dev->get_stats = internal_stats; 3755 setup(dev); 3756 strcpy(dev->name, name); 3757 return dev; 3758} 3759EXPORT_SYMBOL(alloc_netdev); 3760 3761/** 3762 * free_netdev - free network device 3763 * @dev: device 3764 * 3765 * This function does the last stage of destroying an allocated device 3766 * interface. The reference to the device object is released. 3767 * If this is the last reference then it will be freed. 3768 */ 3769void free_netdev(struct net_device *dev) 3770{ 3771#ifdef CONFIG_SYSFS 3772 /* Compatibility with error handling in drivers */ 3773 if (dev->reg_state == NETREG_UNINITIALIZED) { 3774 kfree((char *)dev - dev->padded); 3775 return; 3776 } 3777 3778 BUG_ON(dev->reg_state != NETREG_UNREGISTERED); 3779 dev->reg_state = NETREG_RELEASED; 3780 3781 /* will free via device release */ 3782 put_device(&dev->dev); 3783#else 3784 kfree((char *)dev - dev->padded); 3785#endif 3786} 3787 3788/* Synchronize with packet receive processing. */ 3789void synchronize_net(void) 3790{ 3791 might_sleep(); 3792 synchronize_rcu(); 3793} 3794 3795/** 3796 * unregister_netdevice - remove device from the kernel 3797 * @dev: device 3798 * 3799 * This function shuts down a device interface and removes it 3800 * from the kernel tables. On success 0 is returned, on a failure 3801 * a negative errno code is returned. 3802 * 3803 * Callers must hold the rtnl semaphore. You may want 3804 * unregister_netdev() instead of this. 3805 */ 3806 3807void unregister_netdevice(struct net_device *dev) 3808{ 3809 BUG_ON(dev_boot_phase); 3810 ASSERT_RTNL(); 3811 3812 /* Some devices call without registering for initialization unwind. */ 3813 if (dev->reg_state == NETREG_UNINITIALIZED) { 3814 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 3815 "was registered\n", dev->name, dev); 3816 3817 WARN_ON(1); 3818 return; 3819 } 3820 3821 BUG_ON(dev->reg_state != NETREG_REGISTERED); 3822 3823 /* If device is running, close it first. */ 3824 if (dev->flags & IFF_UP) 3825 dev_close(dev); 3826 3827 /* And unlink it from device chain. */ 3828 write_lock_bh(&dev_base_lock); 3829 list_del(&dev->dev_list); 3830 hlist_del(&dev->name_hlist); 3831 hlist_del(&dev->index_hlist); 3832 write_unlock_bh(&dev_base_lock); 3833 3834 dev->reg_state = NETREG_UNREGISTERING; 3835 3836 synchronize_net(); 3837 3838 /* Shutdown queueing discipline. */ 3839 dev_shutdown(dev); 3840 3841 3842 /* Notify protocols, that we are about to destroy 3843 this device. They should clean all the things. 3844 */ 3845 raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); 3846 3847 /* 3848 * Flush the multicast chain 3849 */ 3850 dev_mc_discard(dev); 3851 3852 if (dev->uninit) 3853 dev->uninit(dev); 3854 3855 /* Notifier chain MUST detach us from master device. */ 3856 BUG_TRAP(!dev->master); 3857 3858 /* Remove entries from sysfs */ 3859 netdev_unregister_sysfs(dev); 3860 3861 /* Finish processing unregister after unlock */ 3862 net_set_todo(dev); 3863 3864 synchronize_net(); 3865 3866 dev_put(dev); 3867} 3868 3869/** 3870 * unregister_netdev - remove device from the kernel 3871 * @dev: device 3872 * 3873 * This function shuts down a device interface and removes it 3874 * from the kernel tables. On success 0 is returned, on a failure 3875 * a negative errno code is returned. 3876 * 3877 * This is just a wrapper for unregister_netdevice that takes 3878 * the rtnl semaphore. In general you want to use this and not 3879 * unregister_netdevice. 3880 */ 3881void unregister_netdev(struct net_device *dev) 3882{ 3883 rtnl_lock(); 3884 unregister_netdevice(dev); 3885 rtnl_unlock(); 3886} 3887 3888EXPORT_SYMBOL(unregister_netdev); 3889 3890static int dev_cpu_callback(struct notifier_block *nfb, 3891 unsigned long action, 3892 void *ocpu) 3893{ 3894 struct sk_buff **list_skb; 3895 struct net_device **list_net; 3896 struct sk_buff *skb; 3897 unsigned int cpu, oldcpu = (unsigned long)ocpu; 3898 struct softnet_data *sd, *oldsd; 3899 3900 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) 3901 return NOTIFY_OK; 3902 3903 local_irq_disable(); 3904 cpu = smp_processor_id(); 3905 sd = &per_cpu(softnet_data, cpu); 3906 oldsd = &per_cpu(softnet_data, oldcpu); 3907 3908 /* Find end of our completion_queue. */ 3909 list_skb = &sd->completion_queue; 3910 while (*list_skb) 3911 list_skb = &(*list_skb)->next; 3912 /* Append completion queue from offline CPU. */ 3913 *list_skb = oldsd->completion_queue; 3914 oldsd->completion_queue = NULL; 3915 3916 /* Find end of our output_queue. */ 3917 list_net = &sd->output_queue; 3918 while (*list_net) 3919 list_net = &(*list_net)->next_sched; 3920 /* Append output queue from offline CPU. */ 3921 *list_net = oldsd->output_queue; 3922 oldsd->output_queue = NULL; 3923 3924 raise_softirq_irqoff(NET_TX_SOFTIRQ); 3925 local_irq_enable(); 3926 3927 /* Process offline CPU's input_pkt_queue */ 3928 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 3929 netif_rx(skb); 3930 3931 return NOTIFY_OK; 3932} 3933 3934#ifdef CONFIG_NET_DMA 3935/** 3936 * net_dma_rebalance - 3937 * This is called when the number of channels allocated to the net_dma_client 3938 * changes. The net_dma_client tries to have one DMA channel per CPU. 3939 */ 3940static void net_dma_rebalance(void) 3941{ 3942 unsigned int cpu, i, n; 3943 struct dma_chan *chan; 3944 3945 if (net_dma_count == 0) { 3946 for_each_online_cpu(cpu) 3947 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); 3948 return; 3949 } 3950 3951 i = 0; 3952 cpu = first_cpu(cpu_online_map); 3953 3954 rcu_read_lock(); 3955 list_for_each_entry(chan, &net_dma_client->channels, client_node) { 3956 n = ((num_online_cpus() / net_dma_count) 3957 + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); 3958 3959 while(n) { 3960 per_cpu(softnet_data, cpu).net_dma = chan; 3961 cpu = next_cpu(cpu, cpu_online_map); 3962 n--; 3963 } 3964 i++; 3965 } 3966 rcu_read_unlock(); 3967} 3968 3969/** 3970 * netdev_dma_event - event callback for the net_dma_client 3971 * @client: should always be net_dma_client 3972 * @chan: DMA channel for the event 3973 * @event: event type 3974 */ 3975static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 3976 enum dma_event event) 3977{ 3978 spin_lock(&net_dma_event_lock); 3979 switch (event) { 3980 case DMA_RESOURCE_ADDED: 3981 net_dma_count++; 3982 net_dma_rebalance(); 3983 break; 3984 case DMA_RESOURCE_REMOVED: 3985 net_dma_count--; 3986 net_dma_rebalance(); 3987 break; 3988 default: 3989 break; 3990 } 3991 spin_unlock(&net_dma_event_lock); 3992} 3993 3994/** 3995 * netdev_dma_regiser - register the networking subsystem as a DMA client 3996 */ 3997static int __init netdev_dma_register(void) 3998{ 3999 spin_lock_init(&net_dma_event_lock); 4000 net_dma_client = dma_async_client_register(netdev_dma_event); 4001 if (net_dma_client == NULL) 4002 return -ENOMEM; 4003 4004 dma_async_client_chan_request(net_dma_client, num_online_cpus()); 4005 return 0; 4006} 4007 4008#else 4009static int __init netdev_dma_register(void) { return -ENODEV; } 4010#endif /* CONFIG_NET_DMA */ 4011 4012/* 4013 * Initialize the DEV module. At boot time this walks the device list and 4014 * unhooks any devices that fail to initialise (normally hardware not 4015 * present) and leaves us with a valid list of present and active devices. 4016 * 4017 */ 4018 4019/* 4020 * This is called single threaded during boot, so no need 4021 * to take the rtnl semaphore. 4022 */ 4023static int __init net_dev_init(void) 4024{ 4025 int i, rc = -ENOMEM; 4026 4027 BUG_ON(!dev_boot_phase); 4028 4029 if (dev_proc_init()) 4030 goto out; 4031 4032 if (netdev_sysfs_init()) 4033 goto out; 4034 4035 INIT_LIST_HEAD(&ptype_all); 4036 for (i = 0; i < 16; i++) 4037 INIT_LIST_HEAD(&ptype_base[i]); 4038 4039 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) 4040 INIT_HLIST_HEAD(&dev_name_head[i]); 4041 4042 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) 4043 INIT_HLIST_HEAD(&dev_index_head[i]); 4044 4045 /* 4046 * Initialise the packet receive queues. 4047 */ 4048 4049 for_each_possible_cpu(i) { 4050 struct softnet_data *queue; 4051 4052 queue = &per_cpu(softnet_data, i); 4053 skb_queue_head_init(&queue->input_pkt_queue); 4054 queue->completion_queue = NULL; 4055 INIT_LIST_HEAD(&queue->poll_list); 4056 set_bit(__LINK_STATE_START, &queue->backlog_dev.state); 4057 queue->backlog_dev.weight = weight_p; 4058 queue->backlog_dev.poll = process_backlog; 4059 atomic_set(&queue->backlog_dev.refcnt, 1); 4060 } 4061 4062 netdev_dma_register(); 4063 4064 dev_boot_phase = 0; 4065 4066 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); 4067 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); 4068 4069 hotcpu_notifier(dev_cpu_callback, 0); 4070 dst_init(); 4071 dev_mcast_init(); 4072 rc = 0; 4073 4074 /* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 4075 memset(&pptp_ip_addr, 0, sizeof(struct addr_info)); 4076 /* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 4077out: 4078 return rc; 4079} 4080 4081/* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 4082void 4083dev_import_addr_info(unsigned long *saddr, unsigned long *daddr) 4084{ 4085 pptp_ip_addr.src_addr = *saddr; 4086 pptp_ip_addr.dst_addr = *daddr; 4087} 4088/* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 4089 4090subsys_initcall(net_dev_init); 4091 4092EXPORT_SYMBOL(__dev_get_by_index); 4093EXPORT_SYMBOL(__dev_get_by_name); 4094EXPORT_SYMBOL(__dev_remove_pack); 4095EXPORT_SYMBOL(dev_valid_name); 4096EXPORT_SYMBOL(dev_add_pack); 4097EXPORT_SYMBOL(dev_alloc_name); 4098EXPORT_SYMBOL(dev_close); 4099EXPORT_SYMBOL(dev_get_by_flags); 4100EXPORT_SYMBOL(dev_get_by_index); 4101EXPORT_SYMBOL(dev_get_by_name); 4102EXPORT_SYMBOL(dev_open); 4103EXPORT_SYMBOL(dev_queue_xmit); 4104EXPORT_SYMBOL(dev_remove_pack); 4105EXPORT_SYMBOL(dev_set_allmulti); 4106EXPORT_SYMBOL(dev_set_promiscuity); 4107EXPORT_SYMBOL(dev_change_flags); 4108EXPORT_SYMBOL(dev_set_mtu); 4109EXPORT_SYMBOL(dev_set_mac_address); 4110EXPORT_SYMBOL(free_netdev); 4111EXPORT_SYMBOL(netdev_boot_setup_check); 4112EXPORT_SYMBOL(netdev_set_master); 4113EXPORT_SYMBOL(netdev_state_change); 4114EXPORT_SYMBOL(netif_receive_skb); 4115EXPORT_SYMBOL(netif_rx); 4116EXPORT_SYMBOL(register_gifconf); 4117EXPORT_SYMBOL(register_netdevice); 4118EXPORT_SYMBOL(register_netdevice_notifier); 4119EXPORT_SYMBOL(skb_checksum_help); 4120EXPORT_SYMBOL(synchronize_net); 4121EXPORT_SYMBOL(unregister_netdevice); 4122EXPORT_SYMBOL(unregister_netdevice_notifier); 4123EXPORT_SYMBOL(net_enable_timestamp); 4124EXPORT_SYMBOL(net_disable_timestamp); 4125EXPORT_SYMBOL(dev_get_flags); 4126 4127#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 4128EXPORT_SYMBOL(br_handle_frame_hook); 4129EXPORT_SYMBOL(br_fdb_get_hook); 4130EXPORT_SYMBOL(br_fdb_put_hook); 4131#endif 4132 4133#ifdef CONFIG_KMOD 4134EXPORT_SYMBOL(dev_load); 4135#endif 4136 4137EXPORT_PER_CPU_SYMBOL(softnet_data); 4138