1/* 2 * NET3 Protocol independent device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the non IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Florian la Roche <rzsfl@rz.uni-sb.de> 16 * Alan Cox <gw4pts@gw4pts.ampr.org> 17 * David Hinds <dahinds@users.sourceforge.net> 18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 19 * Adam Sulmicki <adam@cfar.umd.edu> 20 * Pekka Riikonen <priikone@poesidon.pspt.fi> 21 * 22 * Changes: 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set 24 * to 2 if register_netdev gets called 25 * before net_dev_init & also removed a 26 * few lines of code in the process. 27 * Alan Cox : device private ioctl copies fields back. 28 * Alan Cox : Transmit queue code does relevant 29 * stunts to keep the queue safe. 30 * Alan Cox : Fixed double lock. 31 * Alan Cox : Fixed promisc NULL pointer trap 32 * ???????? : Support the full private ioctl range 33 * Alan Cox : Moved ioctl permission check into 34 * drivers 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI 36 * Alan Cox : 100 backlog just doesn't cut it when 37 * you start doing multicast video 8) 38 * Alan Cox : Rewrote net_bh and list manager. 39 * Alan Cox : Fix ETH_P_ALL echoback lengths. 40 * Alan Cox : Took out transmit every packet pass 41 * Saved a few bytes in the ioctl handler 42 * Alan Cox : Network driver sets packet type before 43 * calling netif_rx. Saves a function 44 * call a packet. 45 * Alan Cox : Hashed net_bh() 46 * Richard Kooijman: Timestamp fixes. 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR 48 * Alan Cox : Device lock protection. 49 * Alan Cox : Fixed nasty side effect of device close 50 * changes. 51 * Rudi Cilibrasi : Pass the right thing to 52 * set_mac_address() 53 * Dave Miller : 32bit quantity for the device lock to 54 * make it work out on a Sparc. 55 * Bjorn Ekwall : Added KERNELD hack. 56 * Alan Cox : Cleaned up the backlog initialise. 57 * Craig Metz : SIOCGIFCONF fix if space for under 58 * 1 device. 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there 60 * is no device open function. 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF 63 * Cyrus Durgin : Cleaned for KMOD 64 * Adam Sulmicki : Bug Fix : Network Device Unload 65 * A network device unload needs to purge 66 * the backlog queue. 67 * Paul Rusty Russell : SIOCSIFNAME 68 * Pekka Riikonen : Netdev boot-time settings code 69 * Andrew Morton : Make unregister_netdevice wait 70 * indefinitely on dev->refcnt 71 * J Hadi Salim : - Backlog queue sampling 72 * - netif_rx() feedback 73 */ 74 75#include <asm/uaccess.h> 76#include <asm/system.h> 77#include <linux/bitops.h> 78#include <linux/capability.h> 79#include <linux/cpu.h> 80#include <linux/types.h> 81#include <linux/kernel.h> 82#include <linux/sched.h> 83#include <linux/mutex.h> 84#include <linux/string.h> 85#include <linux/mm.h> 86#include <linux/socket.h> 87#include <linux/sockios.h> 88#include <linux/errno.h> 89#include <linux/interrupt.h> 90#include <linux/if_ether.h> 91#include <linux/netdevice.h> 92#include <linux/etherdevice.h> 93#include <linux/notifier.h> 94#include <linux/skbuff.h> 95#include <net/sock.h> 96#include <linux/rtnetlink.h> 97#include <linux/proc_fs.h> 98#include <linux/seq_file.h> 99#include <linux/stat.h> 100#include <linux/if_bridge.h> 101#include <net/dst.h> 102#include <net/pkt_sched.h> 103#include <net/checksum.h> 104#include <linux/highmem.h> 105#include <linux/init.h> 106#include <linux/kmod.h> 107#include <linux/module.h> 108#include <linux/kallsyms.h> 109#include <linux/netpoll.h> 110#include <linux/rcupdate.h> 111#include <linux/delay.h> 112#include <net/wext.h> 113#include <net/iw_handler.h> 114#include <asm/current.h> 115#include <linux/audit.h> 116#include <linux/dmaengine.h> 117#include <linux/err.h> 118#include <linux/ctype.h> 119#include <linux/if_arp.h> 120#include <typedefs.h> 121#include <bcmdefs.h> 122/* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 123#include <linux/if_pppox.h> 124#include <linux/ppp_comm.h> 125/* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 126 127#ifdef CONFIG_INET_GRO 128/* Instead of increasing this, you should create a hash table. */ 129#define MAX_GRO_SKBS 8 130 131/* This should be increased if a protocol with a bigger head is added. */ 132#define GRO_MAX_HEAD (MAX_HEADER + 128) 133#endif /* CONFIG_INET_GRO */ 134 135/* 136 * The list of packet types we will receive (as opposed to discard) 137 * and the routines to invoke. 138 * 139 * Why 16. Because with 16 the only overlap we get on a hash of the 140 * low nibble of the protocol value is RARP/SNAP/X.25. 141 * 142 * NOTE: That is no longer true with the addition of VLAN tags. Not 143 * sure which should go first, but I bet it won't make much 144 * difference if we are running VLANs. The good news is that 145 * this protocol won't be in the list unless compiled in, so 146 * the average user (w/out VLANs) will not be adversely affected. 147 * --BLG 148 * 149 * 0800 IP 150 * 8100 802.1Q VLAN 151 * 0001 802.3 152 * 0002 AX.25 153 * 0004 802.2 154 * 8035 RARP 155 * 0005 SNAP 156 * 0805 X.25 157 * 0806 ARP 158 * 8137 IPX 159 * 0009 Localtalk 160 * 86DD IPv6 161 */ 162 163static DEFINE_SPINLOCK(ptype_lock); 164static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ 165static struct list_head ptype_all __read_mostly; /* Taps */ 166 167/* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 168static struct addr_info pptp_ip_addr; 169 170#define NTOHS_ETH_P_PPTP_GRE ntohs(ETH_P_PPTP_GRE) 171#define NTOHS_ETH_P_IP ntohs(ETH_P_IP) 172#define NTOHS_ETH_P_PPP_SES ntohs(ETH_P_PPP_SES) 173#define NTOHS_ETH_P_PPPOE_SESS ntohs(ETH_P_PPPOE_SESS) 174/* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 175 176#ifdef CONFIG_NET_DMA 177static struct dma_client *net_dma_client; 178static unsigned int net_dma_count; 179static spinlock_t net_dma_event_lock; 180#endif 181 182/* 183 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 184 * semaphore. 185 * 186 * Pure readers hold dev_base_lock for reading. 187 * 188 * Writers must hold the rtnl semaphore while they loop through the 189 * dev_base_head list, and hold dev_base_lock for writing when they do the 190 * actual updates. This allows pure readers to access the list even 191 * while a writer is preparing to update it. 192 * 193 * To put it another way, dev_base_lock is held for writing only to 194 * protect against pure readers; the rtnl semaphore provides the 195 * protection against other writers. 196 * 197 * See, for example usages, register_netdevice() and 198 * unregister_netdevice(), which must be called with the rtnl 199 * semaphore held. 200 */ 201LIST_HEAD(dev_base_head); 202DEFINE_RWLOCK(dev_base_lock); 203 204EXPORT_SYMBOL(dev_base_head); 205EXPORT_SYMBOL(dev_base_lock); 206 207#define NETDEV_HASHBITS 8 208static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; 209static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; 210 211static inline struct hlist_head *dev_name_hash(const char *name) 212{ 213 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 214 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; 215} 216 217static inline struct hlist_head *dev_index_hash(int ifindex) 218{ 219 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; 220} 221 222/* 223 * Our notifier list 224 */ 225 226static RAW_NOTIFIER_HEAD(netdev_chain); 227 228/* 229 * Device drivers call our routines to queue packets here. We empty the 230 * queue in the local softnet handler. 231 */ 232DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; 233 234#ifdef CONFIG_SYSFS 235extern int netdev_sysfs_init(void); 236extern int netdev_register_sysfs(struct net_device *); 237extern void netdev_unregister_sysfs(struct net_device *); 238#else 239#define netdev_sysfs_init() (0) 240#define netdev_register_sysfs(dev) (0) 241#define netdev_unregister_sysfs(dev) do { } while(0) 242#endif 243 244#ifdef CONFIG_DEBUG_LOCK_ALLOC 245/* 246 * register_netdevice() inits dev->_xmit_lock and sets lockdep class 247 * according to dev->type 248 */ 249static const unsigned short netdev_lock_type[] = 250 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, 251 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, 252 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, 253 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, 254 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, 255 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, 256 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, 257 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, 258 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, 259 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, 260 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, 261 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 262 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, 263 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, 264 ARPHRD_NONE}; 265 266static const char *netdev_lock_name[] = 267 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 268 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", 269 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", 270 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", 271 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", 272 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", 273 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", 274 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", 275 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", 276 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", 277 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", 278 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 279 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", 280 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", 281 "_xmit_NONE"}; 282 283static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 284 285static inline unsigned short netdev_lock_pos(unsigned short dev_type) 286{ 287 int i; 288 289 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) 290 if (netdev_lock_type[i] == dev_type) 291 return i; 292 /* the last key is used by default */ 293 return ARRAY_SIZE(netdev_lock_type) - 1; 294} 295 296static inline void netdev_set_lockdep_class(spinlock_t *lock, 297 unsigned short dev_type) 298{ 299 int i; 300 301 i = netdev_lock_pos(dev_type); 302 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], 303 netdev_lock_name[i]); 304} 305#else 306static inline void netdev_set_lockdep_class(spinlock_t *lock, 307 unsigned short dev_type) 308{ 309} 310#endif 311 312/******************************************************************************* 313 314 Protocol management and registration routines 315 316*******************************************************************************/ 317 318/* 319 * Add a protocol ID to the list. Now that the input handler is 320 * smarter we can dispense with all the messy stuff that used to be 321 * here. 322 * 323 * BEWARE!!! Protocol handlers, mangling input packets, 324 * MUST BE last in hash buckets and checking protocol handlers 325 * MUST start from promiscuous ptype_all chain in net_bh. 326 * It is true now, do not change it. 327 * Explanation follows: if protocol handler, mangling packet, will 328 * be the first on list, it is not able to sense, that packet 329 * is cloned and should be copied-on-write, so that it will 330 * change it and subsequent readers will get broken packet. 331 * --ANK (980803) 332 */ 333 334/** 335 * dev_add_pack - add packet handler 336 * @pt: packet type declaration 337 * 338 * Add a protocol handler to the networking stack. The passed &packet_type 339 * is linked into kernel lists and may not be freed until it has been 340 * removed from the kernel lists. 341 * 342 * This call does not sleep therefore it can not 343 * guarantee all CPU's that are in middle of receiving packets 344 * will see the new packet type (until the next received packet). 345 */ 346 347void dev_add_pack(struct packet_type *pt) 348{ 349 int hash; 350 351 spin_lock_bh(&ptype_lock); 352 if (pt->type == htons(ETH_P_ALL)) 353 list_add_rcu(&pt->list, &ptype_all); 354 else { 355 hash = ntohs(pt->type) & 15; 356 list_add_rcu(&pt->list, &ptype_base[hash]); 357 } 358 spin_unlock_bh(&ptype_lock); 359} 360 361/** 362 * __dev_remove_pack - remove packet handler 363 * @pt: packet type declaration 364 * 365 * Remove a protocol handler that was previously added to the kernel 366 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 367 * from the kernel lists and can be freed or reused once this function 368 * returns. 369 * 370 * The packet type might still be in use by receivers 371 * and must not be freed until after all the CPU's have gone 372 * through a quiescent state. 373 */ 374void __dev_remove_pack(struct packet_type *pt) 375{ 376 struct list_head *head; 377 struct packet_type *pt1; 378 379 spin_lock_bh(&ptype_lock); 380 381 if (pt->type == htons(ETH_P_ALL)) 382 head = &ptype_all; 383 else 384 head = &ptype_base[ntohs(pt->type) & 15]; 385 386 list_for_each_entry(pt1, head, list) { 387 if (pt == pt1) { 388 list_del_rcu(&pt->list); 389 goto out; 390 } 391 } 392 393 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 394out: 395 spin_unlock_bh(&ptype_lock); 396} 397/** 398 * dev_remove_pack - remove packet handler 399 * @pt: packet type declaration 400 * 401 * Remove a protocol handler that was previously added to the kernel 402 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 403 * from the kernel lists and can be freed or reused once this function 404 * returns. 405 * 406 * This call sleeps to guarantee that no CPU is looking at the packet 407 * type after return. 408 */ 409void dev_remove_pack(struct packet_type *pt) 410{ 411 __dev_remove_pack(pt); 412 413 synchronize_net(); 414} 415 416/****************************************************************************** 417 418 Device Boot-time Settings Routines 419 420*******************************************************************************/ 421 422/* Boot time configuration table */ 423static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; 424 425/** 426 * netdev_boot_setup_add - add new setup entry 427 * @name: name of the device 428 * @map: configured settings for the device 429 * 430 * Adds new setup entry to the dev_boot_setup list. The function 431 * returns 0 on error and 1 on success. This is a generic routine to 432 * all netdevices. 433 */ 434static int netdev_boot_setup_add(char *name, struct ifmap *map) 435{ 436 struct netdev_boot_setup *s; 437 int i; 438 439 s = dev_boot_setup; 440 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 441 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { 442 memset(s[i].name, 0, sizeof(s[i].name)); 443 strcpy(s[i].name, name); 444 memcpy(&s[i].map, map, sizeof(s[i].map)); 445 break; 446 } 447 } 448 449 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; 450} 451 452/** 453 * netdev_boot_setup_check - check boot time settings 454 * @dev: the netdevice 455 * 456 * Check boot time settings for the device. 457 * The found settings are set for the device to be used 458 * later in the device probing. 459 * Returns 0 if no settings found, 1 if they are. 460 */ 461int netdev_boot_setup_check(struct net_device *dev) 462{ 463 struct netdev_boot_setup *s = dev_boot_setup; 464 int i; 465 466 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 467 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && 468 !strncmp(dev->name, s[i].name, strlen(s[i].name))) { 469 dev->irq = s[i].map.irq; 470 dev->base_addr = s[i].map.base_addr; 471 dev->mem_start = s[i].map.mem_start; 472 dev->mem_end = s[i].map.mem_end; 473 return 1; 474 } 475 } 476 return 0; 477} 478 479 480/** 481 * netdev_boot_base - get address from boot time settings 482 * @prefix: prefix for network device 483 * @unit: id for network device 484 * 485 * Check boot time settings for the base address of device. 486 * The found settings are set for the device to be used 487 * later in the device probing. 488 * Returns 0 if no settings found. 489 */ 490unsigned long netdev_boot_base(const char *prefix, int unit) 491{ 492 const struct netdev_boot_setup *s = dev_boot_setup; 493 char name[IFNAMSIZ]; 494 int i; 495 496 sprintf(name, "%s%d", prefix, unit); 497 498 /* 499 * If device already registered then return base of 1 500 * to indicate not to probe for this interface 501 */ 502 if (__dev_get_by_name(name)) 503 return 1; 504 505 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) 506 if (!strcmp(name, s[i].name)) 507 return s[i].map.base_addr; 508 return 0; 509} 510 511/* 512 * Saves at boot time configured settings for any netdevice. 513 */ 514int __init netdev_boot_setup(char *str) 515{ 516 int ints[5]; 517 struct ifmap map; 518 519 str = get_options(str, ARRAY_SIZE(ints), ints); 520 if (!str || !*str) 521 return 0; 522 523 /* Save settings */ 524 memset(&map, 0, sizeof(map)); 525 if (ints[0] > 0) 526 map.irq = ints[1]; 527 if (ints[0] > 1) 528 map.base_addr = ints[2]; 529 if (ints[0] > 2) 530 map.mem_start = ints[3]; 531 if (ints[0] > 3) 532 map.mem_end = ints[4]; 533 534 /* Add new entry to the list */ 535 return netdev_boot_setup_add(str, &map); 536} 537 538__setup("netdev=", netdev_boot_setup); 539 540/******************************************************************************* 541 542 Device Interface Subroutines 543 544*******************************************************************************/ 545 546/** 547 * __dev_get_by_name - find a device by its name 548 * @name: name to find 549 * 550 * Find an interface by name. Must be called under RTNL semaphore 551 * or @dev_base_lock. If the name is found a pointer to the device 552 * is returned. If the name is not found then %NULL is returned. The 553 * reference counters are not incremented so the caller must be 554 * careful with locks. 555 */ 556 557struct net_device *__dev_get_by_name(const char *name) 558{ 559 struct hlist_node *p; 560 561 hlist_for_each(p, dev_name_hash(name)) { 562 struct net_device *dev 563 = hlist_entry(p, struct net_device, name_hlist); 564 if (!strncmp(dev->name, name, IFNAMSIZ)) 565 return dev; 566 } 567 return NULL; 568} 569 570/** 571 * dev_get_by_name - find a device by its name 572 * @name: name to find 573 * 574 * Find an interface by name. This can be called from any 575 * context and does its own locking. The returned handle has 576 * the usage count incremented and the caller must use dev_put() to 577 * release it when it is no longer needed. %NULL is returned if no 578 * matching device is found. 579 */ 580 581struct net_device *dev_get_by_name(const char *name) 582{ 583 struct net_device *dev; 584 585 read_lock(&dev_base_lock); 586 dev = __dev_get_by_name(name); 587 if (dev) 588 dev_hold(dev); 589 read_unlock(&dev_base_lock); 590 return dev; 591} 592 593/** 594 * __dev_get_by_index - find a device by its ifindex 595 * @ifindex: index of device 596 * 597 * Search for an interface by index. Returns %NULL if the device 598 * is not found or a pointer to the device. The device has not 599 * had its reference counter increased so the caller must be careful 600 * about locking. The caller must hold either the RTNL semaphore 601 * or @dev_base_lock. 602 */ 603 604struct net_device *__dev_get_by_index(int ifindex) 605{ 606 struct hlist_node *p; 607 608 hlist_for_each(p, dev_index_hash(ifindex)) { 609 struct net_device *dev 610 = hlist_entry(p, struct net_device, index_hlist); 611 if (dev->ifindex == ifindex) 612 return dev; 613 } 614 return NULL; 615} 616 617 618/** 619 * dev_get_by_index - find a device by its ifindex 620 * @ifindex: index of device 621 * 622 * Search for an interface by index. Returns NULL if the device 623 * is not found or a pointer to the device. The device returned has 624 * had a reference added and the pointer is safe until the user calls 625 * dev_put to indicate they have finished with it. 626 */ 627 628struct net_device *dev_get_by_index(int ifindex) 629{ 630 struct net_device *dev; 631 632 read_lock(&dev_base_lock); 633 dev = __dev_get_by_index(ifindex); 634 if (dev) 635 dev_hold(dev); 636 read_unlock(&dev_base_lock); 637 return dev; 638} 639 640/** 641 * dev_getbyhwaddr - find a device by its hardware address 642 * @type: media type of device 643 * @ha: hardware address 644 * 645 * Search for an interface by MAC address. Returns NULL if the device 646 * is not found or a pointer to the device. The caller must hold the 647 * rtnl semaphore. The returned device has not had its ref count increased 648 * and the caller must therefore be careful about locking 649 * 650 * BUGS: 651 * If the API was consistent this would be __dev_get_by_hwaddr 652 */ 653 654struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) 655{ 656 struct net_device *dev; 657 658 ASSERT_RTNL(); 659 660 for_each_netdev(dev) 661 if (dev->type == type && 662 !memcmp(dev->dev_addr, ha, dev->addr_len)) 663 return dev; 664 665 return NULL; 666} 667 668EXPORT_SYMBOL(dev_getbyhwaddr); 669 670struct net_device *__dev_getfirstbyhwtype(unsigned short type) 671{ 672 struct net_device *dev; 673 674 ASSERT_RTNL(); 675 for_each_netdev(dev) 676 if (dev->type == type) 677 return dev; 678 679 return NULL; 680} 681 682EXPORT_SYMBOL(__dev_getfirstbyhwtype); 683 684struct net_device *dev_getfirstbyhwtype(unsigned short type) 685{ 686 struct net_device *dev; 687 688 rtnl_lock(); 689 dev = __dev_getfirstbyhwtype(type); 690 if (dev) 691 dev_hold(dev); 692 rtnl_unlock(); 693 return dev; 694} 695 696EXPORT_SYMBOL(dev_getfirstbyhwtype); 697 698/** 699 * dev_get_by_flags - find any device with given flags 700 * @if_flags: IFF_* values 701 * @mask: bitmask of bits in if_flags to check 702 * 703 * Search for any interface with the given flags. Returns NULL if a device 704 * is not found or a pointer to the device. The device returned has 705 * had a reference added and the pointer is safe until the user calls 706 * dev_put to indicate they have finished with it. 707 */ 708 709struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) 710{ 711 struct net_device *dev, *ret; 712 713 ret = NULL; 714 read_lock(&dev_base_lock); 715 for_each_netdev(dev) { 716 if (((dev->flags ^ if_flags) & mask) == 0) { 717 dev_hold(dev); 718 ret = dev; 719 break; 720 } 721 } 722 read_unlock(&dev_base_lock); 723 return ret; 724} 725 726/** 727 * dev_valid_name - check if name is okay for network device 728 * @name: name string 729 * 730 * Network device names need to be valid file names to 731 * to allow sysfs to work. We also disallow any kind of 732 * whitespace. 733 */ 734int dev_valid_name(const char *name) 735{ 736 if (*name == '\0') 737 return 0; 738 if (strlen(name) >= IFNAMSIZ) 739 return 0; 740 if (!strcmp(name, ".") || !strcmp(name, "..")) 741 return 0; 742 743 while (*name) { 744 if (*name == '/' || isspace(*name)) 745 return 0; 746 name++; 747 } 748 return 1; 749} 750 751/** 752 * dev_alloc_name - allocate a name for a device 753 * @dev: device 754 * @name: name format string 755 * 756 * Passed a format string - eg "lt%d" it will try and find a suitable 757 * id. It scans list of devices to build up a free map, then chooses 758 * the first empty slot. The caller must hold the dev_base or rtnl lock 759 * while allocating the name and adding the device in order to avoid 760 * duplicates. 761 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 762 * Returns the number of the unit assigned or a negative errno code. 763 */ 764 765int dev_alloc_name(struct net_device *dev, const char *name) 766{ 767 int i = 0; 768 char buf[IFNAMSIZ]; 769 const char *p; 770 const int max_netdevices = 8*PAGE_SIZE; 771 long *inuse; 772 struct net_device *d; 773 774 p = strnchr(name, IFNAMSIZ-1, '%'); 775 if (p) { 776 /* 777 * Verify the string as this thing may have come from 778 * the user. There must be either one "%d" and no other "%" 779 * characters. 780 */ 781 if (p[1] != 'd' || strchr(p + 2, '%')) 782 return -EINVAL; 783 784 /* Use one page as a bit array of possible slots */ 785 inuse = (long *) get_zeroed_page(GFP_ATOMIC); 786 if (!inuse) 787 return -ENOMEM; 788 789 for_each_netdev(d) { 790 if (!sscanf(d->name, name, &i)) 791 continue; 792 if (i < 0 || i >= max_netdevices) 793 continue; 794 795 /* avoid cases where sscanf is not exact inverse of printf */ 796 snprintf(buf, sizeof(buf), name, i); 797 if (!strncmp(buf, d->name, IFNAMSIZ)) 798 set_bit(i, inuse); 799 } 800 801 i = find_first_zero_bit(inuse, max_netdevices); 802 free_page((unsigned long) inuse); 803 } 804 805 snprintf(buf, sizeof(buf), name, i); 806 if (!__dev_get_by_name(buf)) { 807 strlcpy(dev->name, buf, IFNAMSIZ); 808 return i; 809 } 810 811 /* It is possible to run out of possible slots 812 * when the name is long and there isn't enough space left 813 * for the digits, or if all bits are used. 814 */ 815 return -ENFILE; 816} 817 818 819/** 820 * dev_change_name - change name of a device 821 * @dev: device 822 * @newname: name (or format string) must be at least IFNAMSIZ 823 * 824 * Change name of a device, can pass format strings "eth%d". 825 * for wildcarding. 826 */ 827int dev_change_name(struct net_device *dev, char *newname) 828{ 829 int err = 0; 830 831 ASSERT_RTNL(); 832 833 if (dev->flags & IFF_UP) 834 return -EBUSY; 835 836 if (!dev_valid_name(newname)) 837 return -EINVAL; 838 839 if (strchr(newname, '%')) { 840 err = dev_alloc_name(dev, newname); 841 if (err < 0) 842 return err; 843 strcpy(newname, dev->name); 844 } 845 else if (__dev_get_by_name(newname)) 846 return -EEXIST; 847 else 848 strlcpy(dev->name, newname, IFNAMSIZ); 849 850 device_rename(&dev->dev, dev->name); 851 hlist_del(&dev->name_hlist); 852 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); 853 raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); 854 855 return err; 856} 857 858/** 859 * netdev_features_change - device changes features 860 * @dev: device to cause notification 861 * 862 * Called to indicate a device has changed features. 863 */ 864void netdev_features_change(struct net_device *dev) 865{ 866 raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); 867} 868EXPORT_SYMBOL(netdev_features_change); 869 870/** 871 * netdev_state_change - device changes state 872 * @dev: device to cause notification 873 * 874 * Called to indicate a device has changed state. This function calls 875 * the notifier chains for netdev_chain and sends a NEWLINK message 876 * to the routing socket. 877 */ 878void netdev_state_change(struct net_device *dev) 879{ 880 if (dev->flags & IFF_UP) { 881 raw_notifier_call_chain(&netdev_chain, 882 NETDEV_CHANGE, dev); 883 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 884 } 885} 886 887/** 888 * dev_load - load a network module 889 * @name: name of interface 890 * 891 * If a network interface is not present and the process has suitable 892 * privileges this function loads the module. If module loading is not 893 * available in this kernel then it becomes a nop. 894 */ 895 896void dev_load(const char *name) 897{ 898 struct net_device *dev; 899 900 read_lock(&dev_base_lock); 901 dev = __dev_get_by_name(name); 902 read_unlock(&dev_base_lock); 903 904 if (!dev && capable(CAP_SYS_MODULE)) 905 request_module("%s", name); 906} 907 908static int default_rebuild_header(struct sk_buff *skb) 909{ 910 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", 911 skb->dev ? skb->dev->name : "NULL!!!"); 912 kfree_skb(skb); 913 return 1; 914} 915 916/* Foxconn added start pling 10/27/2009 */ 917#ifdef CONFIG_IPV6 918extern const char lan_if_name[]; 919extern const char wan_if_name[]; 920extern int lan_dad_detected; 921extern int wan_dad_detected; 922#endif 923/* Foxconn added end pling 10/27/2009 */ 924 925/** 926 * dev_open - prepare an interface for use. 927 * @dev: device to open 928 * 929 * Takes a device from down to up state. The device's private open 930 * function is invoked and then the multicast lists are loaded. Finally 931 * the device is moved into the up state and a %NETDEV_UP message is 932 * sent to the netdev notifier chain. 933 * 934 * Calling this function on an active interface is a nop. On a failure 935 * a negative errno code is returned. 936 */ 937int dev_open(struct net_device *dev) 938{ 939 int ret = 0; 940 941 /* 942 * Is it already up? 943 */ 944 945 if (dev->flags & IFF_UP) 946 return 0; 947 948 /* 949 * Is it even present? 950 */ 951 if (!netif_device_present(dev)) 952 return -ENODEV; 953 954 /* 955 * Call device private open method 956 */ 957 set_bit(__LINK_STATE_START, &dev->state); 958 if (dev->open) { 959 ret = dev->open(dev); 960 if (ret) 961 clear_bit(__LINK_STATE_START, &dev->state); 962 } 963 964 /* 965 * If it went open OK then: 966 */ 967 968 if (!ret) { 969 /* 970 * Set the flags. 971 */ 972 dev->flags |= IFF_UP; 973 974 /* 975 * Initialize multicasting status 976 */ 977 dev_mc_upload(dev); 978 979 /* 980 * Wakeup transmit queue engine 981 */ 982 dev_activate(dev); 983 984 /* 985 * ... and announce new interface. 986 */ 987 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); 988 } 989 return ret; 990} 991 992/** 993 * dev_close - shutdown an interface. 994 * @dev: device to shutdown 995 * 996 * This function moves an active device into down state. A 997 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 998 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 999 * chain. 1000 */ 1001int dev_close(struct net_device *dev) 1002{ 1003 if (!(dev->flags & IFF_UP)) 1004 return 0; 1005 1006 /* 1007 * Tell people we are going down, so that they can 1008 * prepare to death, when device is still operating. 1009 */ 1010 raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); 1011 1012 dev_deactivate(dev); 1013 1014 clear_bit(__LINK_STATE_START, &dev->state); 1015 1016 /* Synchronize to scheduled poll. We cannot touch poll list, 1017 * it can be even on different cpu. So just clear netif_running(), 1018 * and wait when poll really will happen. Actually, the best place 1019 * for this is inside dev->stop() after device stopped its irq 1020 * engine, but this requires more changes in devices. */ 1021 1022 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1023 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { 1024 /* No hurry. */ 1025 msleep(1); 1026 } 1027 1028 /* 1029 * Call the device specific close. This cannot fail. 1030 * Only if device is UP 1031 * 1032 * We allow it to be called even after a DETACH hot-plug 1033 * event. 1034 */ 1035 if (dev->stop) 1036 dev->stop(dev); 1037 1038 /* 1039 * Device is now down. 1040 */ 1041 1042 dev->flags &= ~IFF_UP; 1043 1044 /* 1045 * Tell people we are down 1046 */ 1047 raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); 1048 1049 /* Foxconn added start pling 10/29/2009 */ 1050 /* Clear the IPv6 DAD flags when interface is down */ 1051#ifdef CONFIG_IPV6 1052 if (strcmp(dev->name, lan_if_name) == 0) 1053 lan_dad_detected = 0; 1054 else if (strcmp(dev->name, wan_if_name) == 0) 1055 wan_dad_detected = 0; 1056 1057 /* Foxconn added start pling 09/01/2010 */ 1058 /* Restore IPv6 forwarding that might be disabled previously by DAD */ 1059 extern int restore_ipv6_forwarding(struct net_device *dev); 1060 restore_ipv6_forwarding(dev); 1061 /* Foxconn added end pling 09/01/2010 */ 1062#endif 1063 /* Foxconn added end pling 10/29/2009 */ 1064 1065 return 0; 1066} 1067 1068 1069/* 1070 * Device change register/unregister. These are not inline or static 1071 * as we export them to the world. 1072 */ 1073 1074/** 1075 * register_netdevice_notifier - register a network notifier block 1076 * @nb: notifier 1077 * 1078 * Register a notifier to be called when network device events occur. 1079 * The notifier passed is linked into the kernel structures and must 1080 * not be reused until it has been unregistered. A negative errno code 1081 * is returned on a failure. 1082 * 1083 * When registered all registration and up events are replayed 1084 * to the new notifier to allow device to have a race free 1085 * view of the network device list. 1086 */ 1087 1088int register_netdevice_notifier(struct notifier_block *nb) 1089{ 1090 struct net_device *dev; 1091 int err; 1092 1093 rtnl_lock(); 1094 err = raw_notifier_chain_register(&netdev_chain, nb); 1095 if (!err) { 1096 for_each_netdev(dev) { 1097 nb->notifier_call(nb, NETDEV_REGISTER, dev); 1098 1099 if (dev->flags & IFF_UP) 1100 nb->notifier_call(nb, NETDEV_UP, dev); 1101 } 1102 } 1103 rtnl_unlock(); 1104 return err; 1105} 1106 1107/** 1108 * unregister_netdevice_notifier - unregister a network notifier block 1109 * @nb: notifier 1110 * 1111 * Unregister a notifier previously registered by 1112 * register_netdevice_notifier(). The notifier is unlinked into the 1113 * kernel structures and may then be reused. A negative errno code 1114 * is returned on a failure. 1115 */ 1116 1117int unregister_netdevice_notifier(struct notifier_block *nb) 1118{ 1119 int err; 1120 1121 rtnl_lock(); 1122 err = raw_notifier_chain_unregister(&netdev_chain, nb); 1123 rtnl_unlock(); 1124 return err; 1125} 1126 1127/** 1128 * call_netdevice_notifiers - call all network notifier blocks 1129 * @val: value passed unmodified to notifier function 1130 * @v: pointer passed unmodified to notifier function 1131 * 1132 * Call all network notifier blocks. Parameters and return value 1133 * are as for raw_notifier_call_chain(). 1134 */ 1135 1136int call_netdevice_notifiers(unsigned long val, void *v) 1137{ 1138 return raw_notifier_call_chain(&netdev_chain, val, v); 1139} 1140 1141/* When > 0 there are consumers of rx skb time stamps */ 1142static atomic_t netstamp_needed = ATOMIC_INIT(0); 1143 1144void net_enable_timestamp(void) 1145{ 1146 atomic_inc(&netstamp_needed); 1147} 1148 1149void net_disable_timestamp(void) 1150{ 1151 atomic_dec(&netstamp_needed); 1152} 1153 1154static inline void net_timestamp(struct sk_buff *skb) 1155{ 1156 if (atomic_read(&netstamp_needed)) 1157 __net_timestamp(skb); 1158 else 1159 skb->tstamp.tv64 = 0; 1160} 1161 1162/* 1163 * Support routine. Sends outgoing frames to any network 1164 * taps currently in use. 1165 */ 1166 1167static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1168{ 1169 struct packet_type *ptype; 1170 1171 net_timestamp(skb); 1172 1173 rcu_read_lock(); 1174 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1175 /* Never send packets back to the socket 1176 * they originated from - MvS (miquels@drinkel.ow.org) 1177 */ 1178 if ((ptype->dev == dev || !ptype->dev) && 1179 (ptype->af_packet_priv == NULL || 1180 (struct sock *)ptype->af_packet_priv != skb->sk)) { 1181 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); 1182 if (!skb2) 1183 break; 1184 1185 /* skb->nh should be correctly 1186 set by sender, so that the second statement is 1187 just protection against buggy protocols. 1188 */ 1189 skb_reset_mac_header(skb2); 1190 1191 if (skb_network_header(skb2) < skb2->data || 1192 skb2->network_header > skb2->tail) { 1193 if (net_ratelimit()) 1194 printk(KERN_CRIT "protocol %04x is " 1195 "buggy, dev %s\n", 1196 skb2->protocol, dev->name); 1197 skb_reset_network_header(skb2); 1198 } 1199 1200 skb2->transport_header = skb2->network_header; 1201 skb2->pkt_type = PACKET_OUTGOING; 1202 ptype->func(skb2, skb->dev, ptype, skb->dev); 1203 } 1204 } 1205 rcu_read_unlock(); 1206} 1207 1208 1209void __netif_schedule(struct net_device *dev) 1210{ 1211 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { 1212 unsigned long flags; 1213 struct softnet_data *sd; 1214 1215 local_irq_save(flags); 1216 sd = &__get_cpu_var(softnet_data); 1217 dev->next_sched = sd->output_queue; 1218 sd->output_queue = dev; 1219 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1220 local_irq_restore(flags); 1221 } 1222} 1223EXPORT_SYMBOL(__netif_schedule); 1224 1225void __netif_rx_schedule(struct net_device *dev) 1226{ 1227 unsigned long flags; 1228 1229 local_irq_save(flags); 1230 dev_hold(dev); 1231 list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); 1232 if (dev->quota < 0) 1233 dev->quota += dev->weight; 1234 else 1235 dev->quota = dev->weight; 1236 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 1237 local_irq_restore(flags); 1238} 1239EXPORT_SYMBOL(__netif_rx_schedule); 1240 1241void dev_kfree_skb_any(struct sk_buff *skb) 1242{ 1243 if (in_irq() || irqs_disabled()) 1244 dev_kfree_skb_irq(skb); 1245 else 1246 dev_kfree_skb(skb); 1247} 1248EXPORT_SYMBOL(dev_kfree_skb_any); 1249 1250 1251/* Hot-plugging. */ 1252void netif_device_detach(struct net_device *dev) 1253{ 1254 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1255 netif_running(dev)) { 1256 netif_stop_queue(dev); 1257 } 1258} 1259EXPORT_SYMBOL(netif_device_detach); 1260 1261void netif_device_attach(struct net_device *dev) 1262{ 1263 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1264 netif_running(dev)) { 1265 netif_wake_queue(dev); 1266 __netdev_watchdog_up(dev); 1267 } 1268} 1269EXPORT_SYMBOL(netif_device_attach); 1270 1271 1272/* 1273 * Invalidate hardware checksum when packet is to be mangled, and 1274 * complete checksum manually on outgoing path. 1275 */ 1276int skb_checksum_help(struct sk_buff *skb) 1277{ 1278 __wsum csum; 1279 int ret = 0, offset; 1280 1281 if (skb->ip_summed == CHECKSUM_COMPLETE) 1282 goto out_set_summed; 1283 1284 if (unlikely(skb_shinfo(skb)->gso_size)) { 1285 /* Let GSO fix up the checksum. */ 1286 goto out_set_summed; 1287 } 1288 1289 if (skb_cloned(skb)) { 1290 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1291 if (ret) 1292 goto out; 1293 } 1294 1295 offset = skb->csum_start - skb_headroom(skb); 1296 BUG_ON(offset > (int)skb->len); 1297 csum = skb_checksum(skb, offset, skb->len-offset, 0); 1298 1299 offset = skb_headlen(skb) - offset; 1300 BUG_ON(offset <= 0); 1301 BUG_ON(skb->csum_offset + 2 > offset); 1302 1303 *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = 1304 csum_fold(csum); 1305out_set_summed: 1306 skb->ip_summed = CHECKSUM_NONE; 1307out: 1308 return ret; 1309} 1310 1311/** 1312 * skb_gso_segment - Perform segmentation on skb. 1313 * @skb: buffer to segment 1314 * @features: features for the output path (see dev->features) 1315 * 1316 * This function segments the given skb and returns a list of segments. 1317 * 1318 * It may return NULL if the skb requires no segmentation. This is 1319 * only possible when GSO is used for verifying header integrity. 1320 */ 1321struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1322{ 1323 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1324 struct packet_type *ptype; 1325 __be16 type = skb->protocol; 1326 int err; 1327 1328 BUG_ON(skb_shinfo(skb)->frag_list); 1329 1330 skb_reset_mac_header(skb); 1331 skb->mac_len = skb->network_header - skb->mac_header; 1332 __skb_pull(skb, skb->mac_len); 1333 1334 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { 1335 if (skb_header_cloned(skb) && 1336 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 1337 return ERR_PTR(err); 1338 } 1339 1340 rcu_read_lock(); 1341 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { 1342 if (ptype->type == type && !ptype->dev && ptype->gso_segment) { 1343 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 1344 err = ptype->gso_send_check(skb); 1345 segs = ERR_PTR(err); 1346 if (err || skb_gso_ok(skb, features)) 1347 break; 1348 __skb_push(skb, (skb->data - 1349 skb_network_header(skb))); 1350 } 1351 segs = ptype->gso_segment(skb, features); 1352 break; 1353 } 1354 } 1355 rcu_read_unlock(); 1356 1357 __skb_push(skb, skb->data - skb_mac_header(skb)); 1358 1359 return segs; 1360} 1361 1362EXPORT_SYMBOL(skb_gso_segment); 1363 1364/* Take action when hardware reception checksum errors are detected. */ 1365#ifdef CONFIG_BUG 1366void netdev_rx_csum_fault(struct net_device *dev) 1367{ 1368 if (net_ratelimit()) { 1369 printk(KERN_ERR "%s: hw csum failure.\n", 1370 dev ? dev->name : "<unknown>"); 1371 dump_stack(); 1372 } 1373} 1374EXPORT_SYMBOL(netdev_rx_csum_fault); 1375#endif 1376 1377/* Actually, we should eliminate this check as soon as we know, that: 1378 * 1. IOMMU is present and allows to map all the memory. 1379 * 2. No high memory really exists on this machine. 1380 */ 1381 1382static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1383{ 1384#ifdef CONFIG_HIGHMEM 1385 int i; 1386 1387 if (dev->features & NETIF_F_HIGHDMA) 1388 return 0; 1389 1390 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1391 if (PageHighMem(skb_shinfo(skb)->frags[i].page)) 1392 return 1; 1393 1394#endif 1395 return 0; 1396} 1397 1398struct dev_gso_cb { 1399 void (*destructor)(struct sk_buff *skb); 1400}; 1401 1402#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) 1403 1404static void dev_gso_skb_destructor(struct sk_buff *skb) 1405{ 1406 struct dev_gso_cb *cb; 1407 1408 do { 1409 struct sk_buff *nskb = skb->next; 1410 1411 skb->next = nskb->next; 1412 nskb->next = NULL; 1413 kfree_skb(nskb); 1414 } while (skb->next); 1415 1416 cb = DEV_GSO_CB(skb); 1417 if (cb->destructor) 1418 cb->destructor(skb); 1419} 1420 1421/** 1422 * dev_gso_segment - Perform emulated hardware segmentation on skb. 1423 * @skb: buffer to segment 1424 * 1425 * This function segments the given skb and stores the list of segments 1426 * in skb->next. 1427 */ 1428static int dev_gso_segment(struct sk_buff *skb) 1429{ 1430 struct net_device *dev = skb->dev; 1431 struct sk_buff *segs; 1432 int features = dev->features & ~(illegal_highdma(dev, skb) ? 1433 NETIF_F_SG : 0); 1434 1435 segs = skb_gso_segment(skb, features); 1436 1437 /* Verifying header integrity only. */ 1438 if (!segs) 1439 return 0; 1440 1441 if (unlikely(IS_ERR(segs))) 1442 return PTR_ERR(segs); 1443 1444 skb->next = segs; 1445 DEV_GSO_CB(skb)->destructor = skb->destructor; 1446 skb->destructor = dev_gso_skb_destructor; 1447 1448 return 0; 1449} 1450 1451int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 1452{ 1453 if (likely(!skb->next)) { 1454 if (!list_empty(&ptype_all)) 1455 dev_queue_xmit_nit(skb, dev); 1456 1457 if (netif_needs_gso(dev, skb)) { 1458 if (unlikely(dev_gso_segment(skb))) 1459 goto out_kfree_skb; 1460 if (skb->next) 1461 goto gso; 1462 } 1463 1464 return dev->hard_start_xmit(skb, dev); 1465 } 1466 1467gso: 1468 do { 1469 struct sk_buff *nskb = skb->next; 1470 int rc; 1471 1472 skb->next = nskb->next; 1473 nskb->next = NULL; 1474 rc = dev->hard_start_xmit(nskb, dev); 1475 if (unlikely(rc)) { 1476 nskb->next = skb->next; 1477 skb->next = nskb; 1478 return rc; 1479 } 1480 if (unlikely(netif_queue_stopped(dev) && skb->next)) 1481 return NETDEV_TX_BUSY; 1482 } while (skb->next); 1483 1484 skb->destructor = DEV_GSO_CB(skb)->destructor; 1485 1486out_kfree_skb: 1487 kfree_skb(skb); 1488 return 0; 1489} 1490 1491#define HARD_TX_LOCK(dev, cpu) { \ 1492 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1493 netif_tx_lock(dev); \ 1494 } \ 1495} 1496 1497#define HARD_TX_UNLOCK(dev) { \ 1498 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1499 netif_tx_unlock(dev); \ 1500 } \ 1501} 1502 1503/** 1504 * dev_queue_xmit - transmit a buffer 1505 * @skb: buffer to transmit 1506 * 1507 * Queue a buffer for transmission to a network device. The caller must 1508 * have set the device and priority and built the buffer before calling 1509 * this function. The function can be called from an interrupt. 1510 * 1511 * A negative errno code is returned on a failure. A success does not 1512 * guarantee the frame will be transmitted as it may be dropped due 1513 * to congestion or traffic shaping. 1514 * 1515 * ----------------------------------------------------------------------------------- 1516 * I notice this method can also return errors from the queue disciplines, 1517 * including NET_XMIT_DROP, which is a positive value. So, errors can also 1518 * be positive. 1519 * 1520 * Regardless of the return value, the skb is consumed, so it is currently 1521 * difficult to retry a send to this method. (You can bump the ref count 1522 * before sending to hold a reference for retry if you are careful.) 1523 * 1524 * When calling this method, interrupts MUST be enabled. This is because 1525 * the BH enable code must have IRQs enabled so that it will not deadlock. 1526 * --BLG 1527 */ 1528int BCMFASTPATH dev_queue_xmit(struct sk_buff *skb) 1529{ 1530 struct net_device *dev = skb->dev; 1531 struct Qdisc *q; 1532 int rc = -ENOMEM; 1533 unsigned short proto; 1534 1535 /* GSO will handle the following emulations directly. */ 1536 if (netif_needs_gso(dev, skb)) 1537 goto gso; 1538 1539 if (skb_shinfo(skb)->frag_list && 1540 !(dev->features & NETIF_F_FRAGLIST) && 1541 __skb_linearize(skb)) 1542 goto out_kfree_skb; 1543 1544 /* Fragmented skb is linearized if device does not support SG, 1545 * or if at least one of fragments is in highmem and device 1546 * does not support DMA from it. 1547 */ 1548 if (skb_shinfo(skb)->nr_frags && 1549 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && 1550 __skb_linearize(skb)) 1551 goto out_kfree_skb; 1552 1553 /* If packet is not checksummed and device does not support 1554 * checksumming for this protocol, complete checksumming here. 1555 */ 1556 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1557 skb_set_transport_header(skb, skb->csum_start - 1558 skb_headroom(skb)); 1559 1560 if (!(dev->features & NETIF_F_GEN_CSUM) && 1561 (!(dev->features & NETIF_F_IP_CSUM) || 1562 skb->protocol != htons(ETH_P_IP))) 1563 if (skb_checksum_help(skb)) 1564 goto out_kfree_skb; 1565 } 1566 1567gso: 1568 spin_lock_prefetch(&dev->queue_lock); 1569 1570 /* Disable soft irqs for various locks below. Also 1571 * stops preemption for RCU. 1572 */ 1573 rcu_read_lock_bh(); 1574 1575 /* Updates of qdisc are serialized by queue_lock. 1576 * The struct Qdisc which is pointed to by qdisc is now a 1577 * rcu structure - it may be accessed without acquiring 1578 * a lock (but the structure may be stale.) The freeing of the 1579 * qdisc will be deferred until it's known that there are no 1580 * more references to it. 1581 * 1582 * If the qdisc has an enqueue function, we still need to 1583 * hold the queue_lock before calling it, since queue_lock 1584 * also serializes access to the device queue. 1585 */ 1586 1587 proto = *(unsigned short *)(skb->data + ETH_ALEN + ETH_ALEN); /* foxconn added Bob, 10/30/2008 */ 1588 q = rcu_dereference(dev->qdisc); 1589#ifdef CONFIG_NET_CLS_ACT 1590 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1591#endif 1592 if (q->enqueue) { 1593 /* Grab device queue */ 1594 spin_lock(&dev->queue_lock); 1595 q = dev->qdisc; 1596 //if (q->enqueue) { 1597 if ( (q->enqueue) && (htons(proto) != ETH_P_8021Q)) { /* foxconn added Bob, 10/30/2008, check 802.1q vlan type */ 1598 rc = q->enqueue(skb, q); 1599 qdisc_run(dev); 1600 spin_unlock(&dev->queue_lock); 1601 1602 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; 1603 goto out; 1604 } 1605 spin_unlock(&dev->queue_lock); 1606 } 1607 1608 /* The device has no queue. Common case for software devices: 1609 loopback, all the sorts of tunnels... 1610 1611 Really, it is unlikely that netif_tx_lock protection is necessary 1612 here. (f.e. loopback and IP tunnels are clean ignoring statistics 1613 counters.) 1614 However, it is possible, that they rely on protection 1615 made by us here. 1616 1617 Check this and shot the lock. It is not prone from deadlocks. 1618 Either shot noqueue qdisc, it is even simpler 8) 1619 */ 1620 if (dev->flags & IFF_UP) { 1621 int cpu = smp_processor_id(); /* ok because BHs are off */ 1622 1623 if (dev->xmit_lock_owner != cpu) { 1624 1625 HARD_TX_LOCK(dev, cpu); 1626 1627 if (!netif_queue_stopped(dev)) { 1628 rc = 0; 1629 if (!dev_hard_start_xmit(skb, dev)) { 1630 HARD_TX_UNLOCK(dev); 1631 goto out; 1632 } 1633 } 1634 HARD_TX_UNLOCK(dev); 1635 if (net_ratelimit()) 1636 printk(KERN_CRIT "Virtual device %s asks to " 1637 "queue packet!\n", dev->name); 1638 } else { 1639 /* Recursion is detected! It is possible, 1640 * unfortunately */ 1641 if (net_ratelimit()) 1642 printk(KERN_CRIT "Dead loop on virtual device " 1643 "%s, fix it urgently!\n", dev->name); 1644 } 1645 } 1646 1647 rc = -ENETDOWN; 1648 rcu_read_unlock_bh(); 1649 1650out_kfree_skb: 1651 kfree_skb(skb); 1652 return rc; 1653out: 1654 rcu_read_unlock_bh(); 1655 return rc; 1656} 1657 1658 1659/*======================================================================= 1660 Receiver routines 1661 =======================================================================*/ 1662 1663int netdev_max_backlog __read_mostly = 1000; 1664int netdev_budget __read_mostly = 300; 1665int weight_p __read_mostly = 64; /* old backlog weight */ 1666 1667DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 1668 1669 1670/** 1671 * netif_rx - post buffer to the network code 1672 * @skb: buffer to post 1673 * 1674 * This function receives a packet from a device driver and queues it for 1675 * the upper (protocol) levels to process. It always succeeds. The buffer 1676 * may be dropped during processing for congestion control or by the 1677 * protocol layers. 1678 * 1679 * return values: 1680 * NET_RX_SUCCESS (no congestion) 1681 * NET_RX_CN_LOW (low congestion) 1682 * NET_RX_CN_MOD (moderate congestion) 1683 * NET_RX_CN_HIGH (high congestion) 1684 * NET_RX_DROP (packet was dropped) 1685 * 1686 */ 1687 1688int netif_rx(struct sk_buff *skb) 1689{ 1690 struct softnet_data *queue; 1691 unsigned long flags; 1692 1693 /* if netpoll wants it, pretend we never saw it */ 1694 if (netpoll_rx(skb)) 1695 return NET_RX_DROP; 1696 1697 if (!skb->tstamp.tv64) 1698 net_timestamp(skb); 1699 1700 /* 1701 * The code is rearranged so that the path is the most 1702 * short when CPU is congested, but is still operating. 1703 */ 1704 local_irq_save(flags); 1705 queue = &__get_cpu_var(softnet_data); 1706 1707 __get_cpu_var(netdev_rx_stat).total++; 1708 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 1709 if (queue->input_pkt_queue.qlen) { 1710enqueue: 1711 dev_hold(skb->dev); 1712 __skb_queue_tail(&queue->input_pkt_queue, skb); 1713 local_irq_restore(flags); 1714 return NET_RX_SUCCESS; 1715 } 1716 1717 netif_rx_schedule(&queue->backlog_dev); 1718 goto enqueue; 1719 } 1720 1721 __get_cpu_var(netdev_rx_stat).dropped++; 1722 local_irq_restore(flags); 1723 1724 kfree_skb(skb); 1725 return NET_RX_DROP; 1726} 1727 1728int netif_rx_ni(struct sk_buff *skb) 1729{ 1730 int err; 1731 1732 preempt_disable(); 1733 err = netif_rx(skb); 1734 if (local_softirq_pending()) 1735 do_softirq(); 1736 preempt_enable(); 1737 1738 return err; 1739} 1740 1741EXPORT_SYMBOL(netif_rx_ni); 1742 1743static inline struct net_device *skb_bond(struct sk_buff *skb) 1744{ 1745 struct net_device *dev = skb->dev; 1746 1747 if (dev->master) { 1748 if (skb_bond_should_drop(skb)) { 1749 kfree_skb(skb); 1750 return NULL; 1751 } 1752 skb->dev = dev->master; 1753 } 1754 1755 return dev; 1756} 1757 1758static void net_tx_action(struct softirq_action *h) 1759{ 1760 struct softnet_data *sd = &__get_cpu_var(softnet_data); 1761 1762 if (sd->completion_queue) { 1763 struct sk_buff *clist; 1764 1765 local_irq_disable(); 1766 clist = sd->completion_queue; 1767 sd->completion_queue = NULL; 1768 local_irq_enable(); 1769 1770 while (clist) { 1771 struct sk_buff *skb = clist; 1772 clist = clist->next; 1773 1774 BUG_TRAP(!atomic_read(&skb->users)); 1775 __kfree_skb(skb); 1776 } 1777 } 1778 1779 if (sd->output_queue) { 1780 struct net_device *head; 1781 1782 local_irq_disable(); 1783 head = sd->output_queue; 1784 sd->output_queue = NULL; 1785 local_irq_enable(); 1786 1787 while (head) { 1788 struct net_device *dev = head; 1789 head = head->next_sched; 1790 1791 smp_mb__before_clear_bit(); 1792 clear_bit(__LINK_STATE_SCHED, &dev->state); 1793 1794 if (spin_trylock(&dev->queue_lock)) { 1795 qdisc_run(dev); 1796 spin_unlock(&dev->queue_lock); 1797 } else { 1798 netif_schedule(dev); 1799 } 1800 } 1801 } 1802} 1803 1804static inline int deliver_skb(struct sk_buff *skb, 1805 struct packet_type *pt_prev, 1806 struct net_device *orig_dev) 1807{ 1808 atomic_inc(&skb->users); 1809 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1810} 1811 1812#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 1813/* These hooks defined here for ATM */ 1814struct net_bridge; 1815struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, 1816 unsigned char *addr); 1817void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; 1818 1819/* 1820 * If bridge module is loaded call bridging hook. 1821 * returns NULL if packet was consumed. 1822 */ 1823struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, 1824 struct sk_buff *skb) __read_mostly; 1825static inline struct sk_buff *handle_bridge(struct sk_buff *skb, 1826 struct packet_type **pt_prev, int *ret, 1827 struct net_device *orig_dev) 1828{ 1829 struct net_bridge_port *port; 1830 1831 if (skb->pkt_type == PACKET_LOOPBACK || 1832 (port = rcu_dereference(skb->dev->br_port)) == NULL) 1833 return skb; 1834 1835 if (*pt_prev) { 1836 *ret = deliver_skb(skb, *pt_prev, orig_dev); 1837 *pt_prev = NULL; 1838 } 1839 1840 return br_handle_frame_hook(port, skb); 1841} 1842#else 1843#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) 1844#endif 1845 1846#ifdef CONFIG_NET_CLS_ACT 1847/* TODO: Maybe we should just force sch_ingress to be compiled in 1848 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 1849 * a compare and 2 stores extra right now if we dont have it on 1850 * but have CONFIG_NET_CLS_ACT 1851 * NOTE: This doesnt stop any functionality; if you dont have 1852 * the ingress scheduler, you just cant add policies on ingress. 1853 * 1854 */ 1855static int ing_filter(struct sk_buff *skb) 1856{ 1857 struct Qdisc *q; 1858 struct net_device *dev = skb->dev; 1859 int result = TC_ACT_OK; 1860 1861 if (dev->qdisc_ingress) { 1862 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); 1863 if (MAX_RED_LOOP < ttl++) { 1864 printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", 1865 skb->iif, skb->dev->ifindex); 1866 return TC_ACT_SHOT; 1867 } 1868 1869 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); 1870 1871 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); 1872 1873 spin_lock(&dev->ingress_lock); 1874 if ((q = dev->qdisc_ingress) != NULL) 1875 result = q->enqueue(skb, q); 1876 spin_unlock(&dev->ingress_lock); 1877 1878 } 1879 1880 return result; 1881} 1882#endif 1883 1884static unsigned long call_id = 0; 1885static unsigned long peer_call_id = 0; 1886void dev_import_call_id(unsigned long pptp_call_id, unsigned long pptp_peer_call_id) 1887{ 1888 call_id = pptp_call_id; 1889 peer_call_id = pptp_peer_call_id; 1890} 1891int netif_receive_skb(struct sk_buff *skb) 1892{ 1893 struct packet_type *ptype, *pt_prev; 1894 struct net_device *orig_dev; 1895 int ret = NET_RX_DROP; 1896 __be16 type; 1897 1898 /* if we've gotten here through NAPI, check netpoll */ 1899 if (skb->dev->poll && netpoll_rx(skb)) 1900 return NET_RX_DROP; 1901 1902 if (!skb->tstamp.tv64) 1903 net_timestamp(skb); 1904 1905 if (!skb->iif) 1906 skb->iif = skb->dev->ifindex; 1907 1908 orig_dev = skb_bond(skb); 1909 1910 if (!orig_dev) 1911 return NET_RX_DROP; 1912 1913 __get_cpu_var(netdev_rx_stat).total++; 1914 1915 skb_reset_network_header(skb); 1916 skb_reset_transport_header(skb); 1917 skb->mac_len = skb->network_header - skb->mac_header; 1918 1919 pt_prev = NULL; 1920 1921 rcu_read_lock(); 1922 1923#ifdef CONFIG_NET_CLS_ACT 1924 if (skb->tc_verd & TC_NCLS) { 1925 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 1926 goto ncls; 1927 } 1928#endif 1929 1930 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1931 if (!ptype->dev || ptype->dev == skb->dev) { 1932 if (pt_prev) 1933 ret = deliver_skb(skb, pt_prev, orig_dev); 1934 pt_prev = ptype; 1935 } 1936 } 1937 1938#ifdef CONFIG_NET_CLS_ACT 1939 if (pt_prev) { 1940 ret = deliver_skb(skb, pt_prev, orig_dev); 1941 pt_prev = NULL; /* noone else should process this after*/ 1942 } else { 1943 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); 1944 } 1945 1946 ret = ing_filter(skb); 1947 1948 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { 1949 kfree_skb(skb); 1950 goto out; 1951 } 1952 1953 skb->tc_verd = 0; 1954ncls: 1955#endif 1956 1957 /* Foxconn added start pling 03/14/2011 */ 1958 /* For SamKnows briding: bridge "eth0" under "br0" with "vlan1" and "eth1". 1959 * If packet comes from eth0 with VID1, then bypass bridge handling. 1960 * ps. we only check the 4th byte of interface name: 1961 * eth0 1962 * eth1 1963 * vlan1 1964 * ^ 1965 * +-- 4th byte='0' means packet is from eth0. 1966 */ 1967 if ((skb->dev->name[3] == '0') && 1968 (skb->protocol == htons(ETH_P_8021Q)) && 1969 (skb->data[0] == 0x00) && 1970 (skb->data[1] == 0x01)) { 1971 goto bypass_handle_bridge; 1972 } 1973 /* Foxconn added end pling 03/14/2011 */ 1974 1975 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 1976 if (!skb) 1977 goto out; 1978 1979 /* Foxconn added start pling 03/14/2011 */ 1980bypass_handle_bridge: 1981 /* Foxconn added end pling 03/14/2011 */ 1982 1983 type = skb->protocol; 1984 /* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 1985 int rsttype = 0; 1986 /* Check dst_addr & src_addr, if PPTP was active */ 1987 if (pptp_ip_addr.dst_addr && pptp_ip_addr.src_addr) { 1988 if (type == NTOHS_ETH_P_IP) { 1989 struct pptp_ip_hdr *iphdr; 1990 iphdr = (struct pptp_ip_hdr *)(skb->data); 1991 if ((iphdr->saddr == pptp_ip_addr.dst_addr) && 1992 (iphdr->daddr == pptp_ip_addr.src_addr)) { 1993 /* Check if GRE header presented */ 1994 if (iphdr->protocol == IP_PROTOCOL_GRE) { 1995 struct pptp_gre_hdr *grehdr; 1996 int grehdrlen = 8, iphdrlen = (int)(iphdr->ihl * 4), hdrlen; 1997 unsigned short ppp_proto; 1998 1999 grehdr = (struct pptp_gre_hdr *)((char *)(iphdr) + iphdrlen); 2000 if (GRE_IS_S(grehdr->flags)) grehdrlen += 4; 2001 if (GRE_IS_A(grehdr->version)) grehdrlen += 4; 2002 if (htons(grehdr->call_id) != call_id) { 2003 goto reset_type; 2004 } 2005 hdrlen = iphdrlen + grehdrlen; 2006 if ((skb->data[hdrlen] != 0xff) || skb->data[hdrlen+1] != 0x3) { 2007#if (defined RU_VERSION) 2008 if (1) 2009#elif (defined WW_VERSION) 2010 if (strcmp(nvram_get("gui_region"), "Russian") == 0) 2011#else /* Other FW, don't apply this patch */ 2012 if (0) 2013#endif 2014 { 2015 ppp_proto = ntohs(*(unsigned short *)((unsigned char *)skb->data + hdrlen)); 2016 goto check_header; 2017 } 2018 } 2019 hdrlen = iphdrlen + grehdrlen + 2; 2020 ppp_proto = 2021 ntohs(*(unsigned short *)((unsigned char *)skb->data + hdrlen)); 2022check_header: 2023 2024 /* Check if PPP header presented */ 2025 if ((grehdr->protocol == GRE_PROTOCOL_PPTP) && 2026 ((int)(ntohs(grehdr->payload_len)) > 0) && 2027 ((int)(ntohs(iphdr->tot_len)) > hdrlen) && 2028 (ppp_proto <= PPP_NETWORK_LAYER) && (ppp_proto > 0)) { 2029 /* Set packet type == pptp */ 2030 type = NTOHS_ETH_P_PPTP_GRE; /* Foxconn defined (0x082F) */ 2031 } 2032 } /* End if (IP_PROTOCOL_GRE) */ 2033 } /* End if (src_addr, dst_addr) */ 2034 } /* End if (ETH_P_IP) */ 2035 } /* End if (src_addr && dst_addr) */ 2036 2037 if (type == NTOHS_ETH_P_PPP_SES) { // PPPoE Session packet 2038 if ((*((unsigned char *)skb->data + 6) <= PPP_NW_LAYER) && 2039 (*((unsigned short *)((unsigned char *)skb->data + 6)) > 0)) { 2040 type = NTOHS_ETH_P_PPPOE_SESS; 2041 } 2042 } 2043reset_type: 2044 if ((rsttype == 1) && (ret == NET_RX_BYPASS) && (type == NTOHS_ETH_P_PPTP_GRE)) { 2045 type = NTOHS_ETH_P_IP; 2046 rsttype = 0; 2047 } 2048 /* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 2049 2050 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { 2051 if (ptype->type == type && 2052 (!ptype->dev || ptype->dev == skb->dev)) { 2053 if (pt_prev) 2054 ret = deliver_skb(skb, pt_prev, orig_dev); 2055 pt_prev = ptype; 2056 } 2057 } 2058 2059 if (pt_prev) { 2060 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2061 /* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 2062 if ((ret == NET_RX_BYPASS) && (pt_prev->type == NTOHS_ETH_P_PPTP_GRE)) { 2063 rsttype = 1; 2064 goto reset_type; 2065 } 2066 /* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 2067 } else { 2068 kfree_skb(skb); 2069 /* Jamal, now you will not able to escape explaining 2070 * me how you were going to use this. :-) 2071 */ 2072 ret = NET_RX_DROP; 2073 } 2074 2075out: 2076 rcu_read_unlock(); 2077 return ret; 2078} 2079 2080#ifdef CONFIG_INET_GRO 2081static int BCMFASTPATH napi_gro_complete(struct sk_buff *skb) 2082{ 2083 struct packet_type *ptype; 2084 __be16 type = skb->protocol; 2085 struct list_head *head = &ptype_base[ntohs(type) & 15]; 2086 int err = -ENOENT; 2087 2088 if (NAPI_GRO_CB(skb)->count == 1) { 2089 skb_shinfo(skb)->gso_size = 0; 2090 goto out; 2091 } 2092 2093 rcu_read_lock(); 2094 list_for_each_entry_rcu(ptype, head, list) { 2095 if (ptype->type != type || ptype->dev || !ptype->gro_complete) 2096 continue; 2097 2098 err = ptype->gro_complete(skb); 2099 break; 2100 } 2101 rcu_read_unlock(); 2102 2103 if (err) { 2104 WARN_ON(&ptype->list == head); 2105 kfree_skb(skb); 2106 return NET_RX_SUCCESS; 2107 } 2108 2109out: 2110 return netif_receive_skb(skb); 2111} 2112 2113void BCMFASTPATH napi_gro_flush(struct net_device *gro_dev) 2114{ 2115 struct sk_buff *skb, *next; 2116 2117 for (skb = gro_dev->gro_list; skb; skb = next) { 2118 next = skb->next; 2119 skb->next = NULL; 2120 napi_gro_complete(skb); 2121 } 2122 2123 gro_dev->gro_count = 0; 2124 gro_dev->gro_list = NULL; 2125} 2126EXPORT_SYMBOL(napi_gro_flush); 2127 2128void * BCMFASTPATH skb_gro_header(struct sk_buff *skb, unsigned int hlen) 2129{ 2130 unsigned int offset = skb_gro_offset(skb); 2131 2132 hlen += offset; 2133 if (hlen <= skb_headlen(skb)) 2134 return skb->data + offset; 2135 2136 if (unlikely(!skb_shinfo(skb)->nr_frags || 2137 skb_shinfo(skb)->frags[0].size <= 2138 hlen - skb_headlen(skb) || 2139 PageHighMem(skb_shinfo(skb)->frags[0].page))) 2140 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; 2141 2142 return page_address(skb_shinfo(skb)->frags[0].page) + 2143 skb_shinfo(skb)->frags[0].page_offset + 2144 offset - skb_headlen(skb); 2145} 2146EXPORT_SYMBOL(skb_gro_header); 2147 2148int BCMFASTPATH dev_gro_receive(struct net_device *gro_dev, struct sk_buff *skb) 2149{ 2150 struct sk_buff **pp = NULL; 2151 struct packet_type *ptype; 2152 __be16 type = skb->protocol; 2153 struct list_head *head = &ptype_base[ntohs(type) & 15]; 2154 int same_flow; 2155 int mac_len; 2156 int ret; 2157 2158 if (type != ntohs(ETH_P_IP)) 2159 goto normal; 2160 2161 if (!(skb->dev->features & NETIF_F_GRO)) 2162 goto normal; 2163 2164 if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) 2165 goto normal; 2166 2167 rcu_read_lock(); 2168 list_for_each_entry_rcu(ptype, head, list) { 2169 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2170 continue; 2171 2172 skb_set_network_header(skb, skb_gro_offset(skb)); 2173 mac_len = skb->network_header - skb->mac_header; 2174 skb->mac_len = mac_len; 2175 NAPI_GRO_CB(skb)->same_flow = 0; 2176 NAPI_GRO_CB(skb)->flush = 0; 2177 NAPI_GRO_CB(skb)->free = 0; 2178 2179 pp = ptype->gro_receive(&gro_dev->gro_list, skb); 2180 break; 2181 } 2182 rcu_read_unlock(); 2183 2184 if (&ptype->list == head) 2185 goto normal; 2186 2187 same_flow = NAPI_GRO_CB(skb)->same_flow; 2188 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; 2189 2190 if (pp) { 2191 struct sk_buff *nskb = *pp; 2192 2193 *pp = nskb->next; 2194 nskb->next = NULL; 2195 napi_gro_complete(nskb); 2196 gro_dev->gro_count--; 2197 } 2198 2199 if (same_flow) 2200 goto ok; 2201 2202 if (NAPI_GRO_CB(skb)->flush || gro_dev->gro_count >= MAX_GRO_SKBS) 2203 goto normal; 2204 2205 gro_dev->gro_count++; 2206 NAPI_GRO_CB(skb)->count = 1; 2207 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 2208 skb->next = gro_dev->gro_list; 2209 gro_dev->gro_list = skb; 2210 ret = GRO_HELD; 2211 2212pull: 2213 if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { 2214 if (gro_dev->gro_list == skb) 2215 gro_dev->gro_list = skb->next; 2216 ret = GRO_DROP; 2217 } 2218 2219ok: 2220 return ret; 2221 2222normal: 2223 ret = GRO_NORMAL; 2224 goto pull; 2225} 2226EXPORT_SYMBOL(dev_gro_receive); 2227 2228static int BCMFASTPATH __napi_gro_receive(struct net_device *gro_dev, struct sk_buff *skb) 2229{ 2230 struct sk_buff *p; 2231 2232 for (p = gro_dev->gro_list; p; p = p->next) { 2233 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) 2234 && !compare_ether_header(skb_mac_header(p), 2235 skb_gro_mac_header(skb)); 2236 NAPI_GRO_CB(p)->flush = 0; 2237 } 2238 2239 return dev_gro_receive(gro_dev, skb); 2240} 2241 2242int BCMFASTPATH napi_skb_finish(int ret, struct sk_buff *skb) 2243{ 2244 int err = NET_RX_SUCCESS; 2245 2246 switch (ret) { 2247 case GRO_NORMAL: 2248 return netif_receive_skb(skb); 2249 2250 case GRO_DROP: 2251 err = NET_RX_DROP; 2252 /* fall through */ 2253 2254 case GRO_MERGED_FREE: 2255 kfree_skb(skb); 2256 break; 2257 } 2258 2259 return err; 2260} 2261EXPORT_SYMBOL(napi_skb_finish); 2262 2263int BCMFASTPATH napi_gro_receive(struct net_device *gro_dev, struct sk_buff *skb) 2264{ 2265 skb_gro_reset_offset(skb); 2266 2267 return napi_skb_finish(__napi_gro_receive(gro_dev, skb), skb); 2268} 2269EXPORT_SYMBOL(napi_gro_receive); 2270#endif /* CONFIG_INET_GRO */ 2271 2272static int process_backlog(struct net_device *backlog_dev, int *budget) 2273{ 2274 int work = 0; 2275 int quota = min(backlog_dev->quota, *budget); 2276 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2277 unsigned long start_time = jiffies; 2278 2279 backlog_dev->weight = weight_p; 2280 for (;;) { 2281 struct sk_buff *skb; 2282 struct net_device *dev; 2283 2284 local_irq_disable(); 2285 skb = __skb_dequeue(&queue->input_pkt_queue); 2286 if (!skb) 2287 goto job_done; 2288 local_irq_enable(); 2289 2290 dev = skb->dev; 2291 2292#ifdef CONFIG_INET_GRO 2293 napi_gro_receive(skb->dev, skb); 2294#else 2295 netif_receive_skb(skb); 2296#endif /* CONFIG_INET_GRO */ 2297 2298 dev_put(dev); 2299 2300 work++; 2301 2302 if (work >= quota || jiffies - start_time > 1) 2303 break; 2304 2305 } 2306 2307 backlog_dev->quota -= work; 2308 *budget -= work; 2309 return -1; 2310 2311job_done: 2312 backlog_dev->quota -= work; 2313 *budget -= work; 2314 2315 list_del(&backlog_dev->poll_list); 2316 smp_mb__before_clear_bit(); 2317 netif_poll_enable(backlog_dev); 2318 2319 local_irq_enable(); 2320 return 0; 2321} 2322 2323static void net_rx_action(struct softirq_action *h) 2324{ 2325 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2326 unsigned long start_time = jiffies; 2327 int budget = netdev_budget; 2328 void *have; 2329 2330 local_irq_disable(); 2331 2332 while (!list_empty(&queue->poll_list)) { 2333 struct net_device *dev; 2334 2335 if (budget <= 0 || jiffies - start_time > 1) 2336 goto softnet_break; 2337 2338 local_irq_enable(); 2339 2340 dev = list_entry(queue->poll_list.next, 2341 struct net_device, poll_list); 2342 have = netpoll_poll_lock(dev); 2343 2344 if (dev->quota <= 0 || dev->poll(dev, &budget)) { 2345 netpoll_poll_unlock(have); 2346 local_irq_disable(); 2347 list_move_tail(&dev->poll_list, &queue->poll_list); 2348 if (dev->quota < 0) 2349 dev->quota += dev->weight; 2350 else 2351 dev->quota = dev->weight; 2352 } else { 2353 netpoll_poll_unlock(have); 2354 dev_put(dev); 2355 local_irq_disable(); 2356 } 2357 } 2358out: 2359 local_irq_enable(); 2360#ifdef CONFIG_NET_DMA 2361 /* 2362 * There may not be any more sk_buffs coming right now, so push 2363 * any pending DMA copies to hardware 2364 */ 2365 if (net_dma_client) { 2366 struct dma_chan *chan; 2367 rcu_read_lock(); 2368 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) 2369 dma_async_memcpy_issue_pending(chan); 2370 rcu_read_unlock(); 2371 } 2372#endif 2373 return; 2374 2375softnet_break: 2376 __get_cpu_var(netdev_rx_stat).time_squeeze++; 2377 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2378 goto out; 2379} 2380 2381static gifconf_func_t * gifconf_list [NPROTO]; 2382 2383/** 2384 * register_gifconf - register a SIOCGIF handler 2385 * @family: Address family 2386 * @gifconf: Function handler 2387 * 2388 * Register protocol dependent address dumping routines. The handler 2389 * that is passed must not be freed or reused until it has been replaced 2390 * by another handler. 2391 */ 2392int register_gifconf(unsigned int family, gifconf_func_t * gifconf) 2393{ 2394 if (family >= NPROTO) 2395 return -EINVAL; 2396 gifconf_list[family] = gifconf; 2397 return 0; 2398} 2399 2400 2401/* 2402 * Map an interface index to its name (SIOCGIFNAME) 2403 */ 2404 2405/* 2406 * We need this ioctl for efficient implementation of the 2407 * if_indextoname() function required by the IPv6 API. Without 2408 * it, we would have to search all the interfaces to find a 2409 * match. --pb 2410 */ 2411 2412static int dev_ifname(struct ifreq __user *arg) 2413{ 2414 struct net_device *dev; 2415 struct ifreq ifr; 2416 2417 /* 2418 * Fetch the caller's info block. 2419 */ 2420 2421 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 2422 return -EFAULT; 2423 2424 read_lock(&dev_base_lock); 2425 dev = __dev_get_by_index(ifr.ifr_ifindex); 2426 if (!dev) { 2427 read_unlock(&dev_base_lock); 2428 return -ENODEV; 2429 } 2430 2431 strcpy(ifr.ifr_name, dev->name); 2432 read_unlock(&dev_base_lock); 2433 2434 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 2435 return -EFAULT; 2436 return 0; 2437} 2438 2439/* 2440 * Perform a SIOCGIFCONF call. This structure will change 2441 * size eventually, and there is nothing I can do about it. 2442 * Thus we will need a 'compatibility mode'. 2443 */ 2444 2445static int dev_ifconf(char __user *arg) 2446{ 2447 struct ifconf ifc; 2448 struct net_device *dev; 2449 char __user *pos; 2450 int len; 2451 int total; 2452 int i; 2453 2454 /* 2455 * Fetch the caller's info block. 2456 */ 2457 2458 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 2459 return -EFAULT; 2460 2461 pos = ifc.ifc_buf; 2462 len = ifc.ifc_len; 2463 2464 /* 2465 * Loop over the interfaces, and write an info block for each. 2466 */ 2467 2468 total = 0; 2469 for_each_netdev(dev) { 2470 for (i = 0; i < NPROTO; i++) { 2471 if (gifconf_list[i]) { 2472 int done; 2473 if (!pos) 2474 done = gifconf_list[i](dev, NULL, 0); 2475 else 2476 done = gifconf_list[i](dev, pos + total, 2477 len - total); 2478 if (done < 0) 2479 return -EFAULT; 2480 total += done; 2481 } 2482 } 2483 } 2484 2485 /* 2486 * All done. Write the updated control block back to the caller. 2487 */ 2488 ifc.ifc_len = total; 2489 2490 /* 2491 * Both BSD and Solaris return 0 here, so we do too. 2492 */ 2493 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; 2494} 2495 2496#ifdef CONFIG_PROC_FS 2497/* 2498 * This is invoked by the /proc filesystem handler to display a device 2499 * in detail. 2500 */ 2501void *dev_seq_start(struct seq_file *seq, loff_t *pos) 2502{ 2503 loff_t off; 2504 struct net_device *dev; 2505 2506 read_lock(&dev_base_lock); 2507 if (!*pos) 2508 return SEQ_START_TOKEN; 2509 2510 off = 1; 2511 for_each_netdev(dev) 2512 if (off++ == *pos) 2513 return dev; 2514 2515 return NULL; 2516} 2517 2518void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2519{ 2520 ++*pos; 2521 return v == SEQ_START_TOKEN ? 2522 first_net_device() : next_net_device((struct net_device *)v); 2523} 2524 2525void dev_seq_stop(struct seq_file *seq, void *v) 2526{ 2527 read_unlock(&dev_base_lock); 2528} 2529 2530static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 2531{ 2532 struct net_device_stats *stats = dev->get_stats(dev); 2533 2534 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 2535 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 2536 dev->name, stats->rx_bytes, stats->rx_packets, 2537 stats->rx_errors, 2538 stats->rx_dropped + stats->rx_missed_errors, 2539 stats->rx_fifo_errors, 2540 stats->rx_length_errors + stats->rx_over_errors + 2541 stats->rx_crc_errors + stats->rx_frame_errors, 2542 stats->rx_compressed, stats->multicast, 2543 stats->tx_bytes, stats->tx_packets, 2544 stats->tx_errors, stats->tx_dropped, 2545 stats->tx_fifo_errors, stats->collisions, 2546 stats->tx_carrier_errors + 2547 stats->tx_aborted_errors + 2548 stats->tx_window_errors + 2549 stats->tx_heartbeat_errors, 2550 stats->tx_compressed); 2551} 2552 2553/* 2554 * Called from the PROCfs module. This now uses the new arbitrary sized 2555 * /proc/net interface to create /proc/net/dev 2556 */ 2557static int dev_seq_show(struct seq_file *seq, void *v) 2558{ 2559 if (v == SEQ_START_TOKEN) 2560 seq_puts(seq, "Inter-| Receive " 2561 " | Transmit\n" 2562 " face |bytes packets errs drop fifo frame " 2563 "compressed multicast|bytes packets errs " 2564 "drop fifo colls carrier compressed\n"); 2565 else 2566 dev_seq_printf_stats(seq, v); 2567 return 0; 2568} 2569 2570static struct netif_rx_stats *softnet_get_online(loff_t *pos) 2571{ 2572 struct netif_rx_stats *rc = NULL; 2573 2574 while (*pos < NR_CPUS) 2575 if (cpu_online(*pos)) { 2576 rc = &per_cpu(netdev_rx_stat, *pos); 2577 break; 2578 } else 2579 ++*pos; 2580 return rc; 2581} 2582 2583static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 2584{ 2585 return softnet_get_online(pos); 2586} 2587 2588static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2589{ 2590 ++*pos; 2591 return softnet_get_online(pos); 2592} 2593 2594static void softnet_seq_stop(struct seq_file *seq, void *v) 2595{ 2596} 2597 2598static int softnet_seq_show(struct seq_file *seq, void *v) 2599{ 2600 struct netif_rx_stats *s = v; 2601 2602 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 2603 s->total, s->dropped, s->time_squeeze, 0, 2604 0, 0, 0, 0, /* was fastroute */ 2605 s->cpu_collision ); 2606 return 0; 2607} 2608 2609static const struct seq_operations dev_seq_ops = { 2610 .start = dev_seq_start, 2611 .next = dev_seq_next, 2612 .stop = dev_seq_stop, 2613 .show = dev_seq_show, 2614}; 2615 2616static int dev_seq_open(struct inode *inode, struct file *file) 2617{ 2618 return seq_open(file, &dev_seq_ops); 2619} 2620 2621static const struct file_operations dev_seq_fops = { 2622 .owner = THIS_MODULE, 2623 .open = dev_seq_open, 2624 .read = seq_read, 2625 .llseek = seq_lseek, 2626 .release = seq_release, 2627}; 2628 2629static const struct seq_operations softnet_seq_ops = { 2630 .start = softnet_seq_start, 2631 .next = softnet_seq_next, 2632 .stop = softnet_seq_stop, 2633 .show = softnet_seq_show, 2634}; 2635 2636static int softnet_seq_open(struct inode *inode, struct file *file) 2637{ 2638 return seq_open(file, &softnet_seq_ops); 2639} 2640 2641static const struct file_operations softnet_seq_fops = { 2642 .owner = THIS_MODULE, 2643 .open = softnet_seq_open, 2644 .read = seq_read, 2645 .llseek = seq_lseek, 2646 .release = seq_release, 2647}; 2648 2649static void *ptype_get_idx(loff_t pos) 2650{ 2651 struct packet_type *pt = NULL; 2652 loff_t i = 0; 2653 int t; 2654 2655 list_for_each_entry_rcu(pt, &ptype_all, list) { 2656 if (i == pos) 2657 return pt; 2658 ++i; 2659 } 2660 2661 for (t = 0; t < 16; t++) { 2662 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 2663 if (i == pos) 2664 return pt; 2665 ++i; 2666 } 2667 } 2668 return NULL; 2669} 2670 2671static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 2672{ 2673 rcu_read_lock(); 2674 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; 2675} 2676 2677static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2678{ 2679 struct packet_type *pt; 2680 struct list_head *nxt; 2681 int hash; 2682 2683 ++*pos; 2684 if (v == SEQ_START_TOKEN) 2685 return ptype_get_idx(0); 2686 2687 pt = v; 2688 nxt = pt->list.next; 2689 if (pt->type == htons(ETH_P_ALL)) { 2690 if (nxt != &ptype_all) 2691 goto found; 2692 hash = 0; 2693 nxt = ptype_base[0].next; 2694 } else 2695 hash = ntohs(pt->type) & 15; 2696 2697 while (nxt == &ptype_base[hash]) { 2698 if (++hash >= 16) 2699 return NULL; 2700 nxt = ptype_base[hash].next; 2701 } 2702found: 2703 return list_entry(nxt, struct packet_type, list); 2704} 2705 2706static void ptype_seq_stop(struct seq_file *seq, void *v) 2707{ 2708 rcu_read_unlock(); 2709} 2710 2711static void ptype_seq_decode(struct seq_file *seq, void *sym) 2712{ 2713#ifdef CONFIG_KALLSYMS 2714 unsigned long offset = 0, symsize; 2715 const char *symname; 2716 char *modname; 2717 char namebuf[128]; 2718 2719 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, 2720 &modname, namebuf); 2721 2722 if (symname) { 2723 char *delim = ":"; 2724 2725 if (!modname) 2726 modname = delim = ""; 2727 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, 2728 symname, offset); 2729 return; 2730 } 2731#endif 2732 2733 seq_printf(seq, "[%p]", sym); 2734} 2735 2736static int ptype_seq_show(struct seq_file *seq, void *v) 2737{ 2738 struct packet_type *pt = v; 2739 2740 if (v == SEQ_START_TOKEN) 2741 seq_puts(seq, "Type Device Function\n"); 2742 else { 2743 if (pt->type == htons(ETH_P_ALL)) 2744 seq_puts(seq, "ALL "); 2745 else 2746 seq_printf(seq, "%04x", ntohs(pt->type)); 2747 2748 seq_printf(seq, " %-8s ", 2749 pt->dev ? pt->dev->name : ""); 2750 ptype_seq_decode(seq, pt->func); 2751 seq_putc(seq, '\n'); 2752 } 2753 2754 return 0; 2755} 2756 2757static const struct seq_operations ptype_seq_ops = { 2758 .start = ptype_seq_start, 2759 .next = ptype_seq_next, 2760 .stop = ptype_seq_stop, 2761 .show = ptype_seq_show, 2762}; 2763 2764static int ptype_seq_open(struct inode *inode, struct file *file) 2765{ 2766 return seq_open(file, &ptype_seq_ops); 2767} 2768 2769static const struct file_operations ptype_seq_fops = { 2770 .owner = THIS_MODULE, 2771 .open = ptype_seq_open, 2772 .read = seq_read, 2773 .llseek = seq_lseek, 2774 .release = seq_release, 2775}; 2776 2777 2778static int __init dev_proc_init(void) 2779{ 2780 int rc = -ENOMEM; 2781 2782 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) 2783 goto out; 2784 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) 2785 goto out_dev; 2786 if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) 2787 goto out_dev2; 2788 2789 if (wext_proc_init()) 2790 goto out_softnet; 2791 rc = 0; 2792out: 2793 return rc; 2794out_softnet: 2795 proc_net_remove("ptype"); 2796out_dev2: 2797 proc_net_remove("softnet_stat"); 2798out_dev: 2799 proc_net_remove("dev"); 2800 goto out; 2801} 2802#else 2803#define dev_proc_init() 0 2804#endif /* CONFIG_PROC_FS */ 2805 2806 2807/** 2808 * netdev_set_master - set up master/slave pair 2809 * @slave: slave device 2810 * @master: new master device 2811 * 2812 * Changes the master device of the slave. Pass %NULL to break the 2813 * bonding. The caller must hold the RTNL semaphore. On a failure 2814 * a negative errno code is returned. On success the reference counts 2815 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 2816 * function returns zero. 2817 */ 2818int netdev_set_master(struct net_device *slave, struct net_device *master) 2819{ 2820 struct net_device *old = slave->master; 2821 2822 ASSERT_RTNL(); 2823 2824 if (master) { 2825 if (old) 2826 return -EBUSY; 2827 dev_hold(master); 2828 } 2829 2830 slave->master = master; 2831 2832 synchronize_net(); 2833 2834 if (old) 2835 dev_put(old); 2836 2837 if (master) 2838 slave->flags |= IFF_SLAVE; 2839 else 2840 slave->flags &= ~IFF_SLAVE; 2841 2842 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 2843 return 0; 2844} 2845 2846/** 2847 * dev_set_promiscuity - update promiscuity count on a device 2848 * @dev: device 2849 * @inc: modifier 2850 * 2851 * Add or remove promiscuity from a device. While the count in the device 2852 * remains above zero the interface remains promiscuous. Once it hits zero 2853 * the device reverts back to normal filtering operation. A negative inc 2854 * value is used to drop promiscuity on the device. 2855 */ 2856void dev_set_promiscuity(struct net_device *dev, int inc) 2857{ 2858 unsigned short old_flags = dev->flags; 2859 2860 if ((dev->promiscuity += inc) == 0) 2861 dev->flags &= ~IFF_PROMISC; 2862 else 2863 dev->flags |= IFF_PROMISC; 2864 if (dev->flags != old_flags) { 2865 dev_mc_upload(dev); 2866 printk(KERN_INFO "device %s %s promiscuous mode\n", 2867 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2868 "left"); 2869 audit_log(current->audit_context, GFP_ATOMIC, 2870 AUDIT_ANOM_PROMISCUOUS, 2871 "dev=%s prom=%d old_prom=%d auid=%u", 2872 dev->name, (dev->flags & IFF_PROMISC), 2873 (old_flags & IFF_PROMISC), 2874 audit_get_loginuid(current->audit_context)); 2875 } 2876} 2877 2878/** 2879 * dev_set_allmulti - update allmulti count on a device 2880 * @dev: device 2881 * @inc: modifier 2882 * 2883 * Add or remove reception of all multicast frames to a device. While the 2884 * count in the device remains above zero the interface remains listening 2885 * to all interfaces. Once it hits zero the device reverts back to normal 2886 * filtering operation. A negative @inc value is used to drop the counter 2887 * when releasing a resource needing all multicasts. 2888 */ 2889 2890void dev_set_allmulti(struct net_device *dev, int inc) 2891{ 2892 unsigned short old_flags = dev->flags; 2893 2894 dev->flags |= IFF_ALLMULTI; 2895 if ((dev->allmulti += inc) == 0) 2896 dev->flags &= ~IFF_ALLMULTI; 2897 if (dev->flags ^ old_flags) 2898 dev_mc_upload(dev); 2899} 2900 2901unsigned dev_get_flags(const struct net_device *dev) 2902{ 2903 unsigned flags; 2904 2905 flags = (dev->flags & ~(IFF_PROMISC | 2906 IFF_ALLMULTI | 2907 IFF_RUNNING | 2908 IFF_LOWER_UP | 2909 IFF_DORMANT)) | 2910 (dev->gflags & (IFF_PROMISC | 2911 IFF_ALLMULTI)); 2912 2913 if (netif_running(dev)) { 2914 if (netif_oper_up(dev)) 2915 flags |= IFF_RUNNING; 2916 if (netif_carrier_ok(dev)) 2917 flags |= IFF_LOWER_UP; 2918 if (netif_dormant(dev)) 2919 flags |= IFF_DORMANT; 2920 } 2921 2922 return flags; 2923} 2924 2925int dev_change_flags(struct net_device *dev, unsigned flags) 2926{ 2927 int ret, changes; 2928 int old_flags = dev->flags; 2929 2930 /* 2931 * Set the flags on our device. 2932 */ 2933 2934 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | 2935 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | 2936 IFF_AUTOMEDIA)) | 2937 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | 2938 IFF_ALLMULTI)); 2939 2940 /* 2941 * Load in the correct multicast list now the flags have changed. 2942 */ 2943 2944 dev_mc_upload(dev); 2945 2946 /* 2947 * Have we downed the interface. We handle IFF_UP ourselves 2948 * according to user attempts to set it, rather than blindly 2949 * setting it. 2950 */ 2951 2952 ret = 0; 2953 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 2954 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 2955 2956 if (!ret) 2957 dev_mc_upload(dev); 2958 } 2959 2960 if (dev->flags & IFF_UP && 2961 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | 2962 IFF_VOLATILE))) 2963 raw_notifier_call_chain(&netdev_chain, 2964 NETDEV_CHANGE, dev); 2965 2966 if ((flags ^ dev->gflags) & IFF_PROMISC) { 2967 int inc = (flags & IFF_PROMISC) ? +1 : -1; 2968 dev->gflags ^= IFF_PROMISC; 2969 dev_set_promiscuity(dev, inc); 2970 } 2971 2972 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 2973 is important. Some (broken) drivers set IFF_PROMISC, when 2974 IFF_ALLMULTI is requested not asking us and not reporting. 2975 */ 2976 if ((flags ^ dev->gflags) & IFF_ALLMULTI) { 2977 int inc = (flags & IFF_ALLMULTI) ? +1 : -1; 2978 dev->gflags ^= IFF_ALLMULTI; 2979 dev_set_allmulti(dev, inc); 2980 } 2981 2982 /* Exclude state transition flags, already notified */ 2983 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); 2984 if (changes) 2985 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 2986 2987 return ret; 2988} 2989 2990int dev_set_mtu(struct net_device *dev, int new_mtu) 2991{ 2992 int err; 2993 2994 if (new_mtu == dev->mtu) 2995 return 0; 2996 2997 /* MTU must be positive. */ 2998 if (new_mtu < 0) 2999 return -EINVAL; 3000 3001 if (!netif_device_present(dev)) 3002 return -ENODEV; 3003 3004 err = 0; 3005 if (dev->change_mtu) 3006 err = dev->change_mtu(dev, new_mtu); 3007 else 3008 dev->mtu = new_mtu; 3009 if (!err && dev->flags & IFF_UP) 3010 raw_notifier_call_chain(&netdev_chain, 3011 NETDEV_CHANGEMTU, dev); 3012 return err; 3013} 3014 3015int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 3016{ 3017 int err; 3018 3019 if (!dev->set_mac_address) 3020 return -EOPNOTSUPP; 3021 if (sa->sa_family != dev->type) 3022 return -EINVAL; 3023 if (!netif_device_present(dev)) 3024 return -ENODEV; 3025 err = dev->set_mac_address(dev, sa); 3026 if (!err) 3027 raw_notifier_call_chain(&netdev_chain, 3028 NETDEV_CHANGEADDR, dev); 3029 return err; 3030} 3031 3032/* 3033 * Perform the SIOCxIFxxx calls. 3034 */ 3035static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) 3036{ 3037 int err; 3038 struct net_device *dev = __dev_get_by_name(ifr->ifr_name); 3039 3040 if (!dev) 3041 return -ENODEV; 3042 3043 switch (cmd) { 3044 case SIOCGIFFLAGS: /* Get interface flags */ 3045 ifr->ifr_flags = dev_get_flags(dev); 3046 return 0; 3047 3048 case SIOCSIFFLAGS: /* Set interface flags */ 3049 return dev_change_flags(dev, ifr->ifr_flags); 3050 3051 case SIOCGIFMETRIC: /* Get the metric on the interface 3052 (currently unused) */ 3053 ifr->ifr_metric = 0; 3054 return 0; 3055 3056 case SIOCSIFMETRIC: /* Set the metric on the interface 3057 (currently unused) */ 3058 return -EOPNOTSUPP; 3059 3060 case SIOCGIFMTU: /* Get the MTU of a device */ 3061 ifr->ifr_mtu = dev->mtu; 3062 return 0; 3063 3064 case SIOCSIFMTU: /* Set the MTU of a device */ 3065 return dev_set_mtu(dev, ifr->ifr_mtu); 3066 3067 case SIOCGIFHWADDR: 3068 if (!dev->addr_len) 3069 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); 3070 else 3071 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 3072 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3073 ifr->ifr_hwaddr.sa_family = dev->type; 3074 return 0; 3075 3076 case SIOCSIFHWADDR: 3077 return dev_set_mac_address(dev, &ifr->ifr_hwaddr); 3078 3079 case SIOCSIFHWBROADCAST: 3080 if (ifr->ifr_hwaddr.sa_family != dev->type) 3081 return -EINVAL; 3082 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, 3083 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 3084 raw_notifier_call_chain(&netdev_chain, 3085 NETDEV_CHANGEADDR, dev); 3086 return 0; 3087 3088 case SIOCGIFMAP: 3089 ifr->ifr_map.mem_start = dev->mem_start; 3090 ifr->ifr_map.mem_end = dev->mem_end; 3091 ifr->ifr_map.base_addr = dev->base_addr; 3092 ifr->ifr_map.irq = dev->irq; 3093 ifr->ifr_map.dma = dev->dma; 3094 ifr->ifr_map.port = dev->if_port; 3095 return 0; 3096 3097 case SIOCSIFMAP: 3098 if (dev->set_config) { 3099 if (!netif_device_present(dev)) 3100 return -ENODEV; 3101 return dev->set_config(dev, &ifr->ifr_map); 3102 } 3103 return -EOPNOTSUPP; 3104 3105 case SIOCADDMULTI: 3106 if (!dev->set_multicast_list || 3107 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3108 return -EINVAL; 3109 if (!netif_device_present(dev)) 3110 return -ENODEV; 3111 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 3112 dev->addr_len, 1); 3113 3114 case SIOCDELMULTI: 3115 if (!dev->set_multicast_list || 3116 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3117 return -EINVAL; 3118 if (!netif_device_present(dev)) 3119 return -ENODEV; 3120 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 3121 dev->addr_len, 1); 3122 3123 case SIOCGIFINDEX: 3124 ifr->ifr_ifindex = dev->ifindex; 3125 return 0; 3126 3127 case SIOCGIFTXQLEN: 3128 ifr->ifr_qlen = dev->tx_queue_len; 3129 return 0; 3130 3131 case SIOCSIFTXQLEN: 3132 if (ifr->ifr_qlen < 0) 3133 return -EINVAL; 3134 dev->tx_queue_len = ifr->ifr_qlen; 3135 return 0; 3136 3137 case SIOCSIFNAME: 3138 ifr->ifr_newname[IFNAMSIZ-1] = '\0'; 3139 return dev_change_name(dev, ifr->ifr_newname); 3140 3141 /* 3142 * Unknown or private ioctl 3143 */ 3144 3145 default: 3146 if ((cmd >= SIOCDEVPRIVATE && 3147 cmd <= SIOCDEVPRIVATE + 15) || 3148 cmd == SIOCBONDENSLAVE || 3149 cmd == SIOCBONDRELEASE || 3150 cmd == SIOCBONDSETHWADDR || 3151 cmd == SIOCBONDSLAVEINFOQUERY || 3152 cmd == SIOCBONDINFOQUERY || 3153 cmd == SIOCBONDCHANGEACTIVE || 3154 cmd == SIOCGMIIPHY || 3155 cmd == SIOCGMIIREG || 3156 cmd == SIOCSMIIREG || 3157 cmd == SIOCBRADDIF || 3158 cmd == SIOCBRDELIF || 3159 cmd == SIOCWANDEV) { 3160 err = -EOPNOTSUPP; 3161 if (dev->do_ioctl) { 3162 if (netif_device_present(dev)) 3163 err = dev->do_ioctl(dev, ifr, 3164 cmd); 3165 else 3166 err = -ENODEV; 3167 } 3168 } else 3169 err = -EINVAL; 3170 3171 } 3172 return err; 3173} 3174 3175/* 3176 * This function handles all "interface"-type I/O control requests. The actual 3177 * 'doing' part of this is dev_ifsioc above. 3178 */ 3179 3180/** 3181 * dev_ioctl - network device ioctl 3182 * @cmd: command to issue 3183 * @arg: pointer to a struct ifreq in user space 3184 * 3185 * Issue ioctl functions to devices. This is normally called by the 3186 * user space syscall interfaces but can sometimes be useful for 3187 * other purposes. The return value is the return from the syscall if 3188 * positive or a negative errno code on error. 3189 */ 3190 3191int dev_ioctl(unsigned int cmd, void __user *arg) 3192{ 3193 struct ifreq ifr; 3194 int ret; 3195 char *colon; 3196 3197 /* One special case: SIOCGIFCONF takes ifconf argument 3198 and requires shared lock, because it sleeps writing 3199 to user space. 3200 */ 3201 3202 if (cmd == SIOCGIFCONF) { 3203 rtnl_lock(); 3204 ret = dev_ifconf((char __user *) arg); 3205 rtnl_unlock(); 3206 return ret; 3207 } 3208 if (cmd == SIOCGIFNAME) 3209 return dev_ifname((struct ifreq __user *)arg); 3210 3211 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3212 return -EFAULT; 3213 3214 ifr.ifr_name[IFNAMSIZ-1] = 0; 3215 3216 colon = strchr(ifr.ifr_name, ':'); 3217 if (colon) 3218 *colon = 0; 3219 3220 /* 3221 * See which interface the caller is talking about. 3222 */ 3223 3224 switch (cmd) { 3225 /* 3226 * These ioctl calls: 3227 * - can be done by all. 3228 * - atomic and do not require locking. 3229 * - return a value 3230 */ 3231 case SIOCGIFFLAGS: 3232 case SIOCGIFMETRIC: 3233 case SIOCGIFMTU: 3234 case SIOCGIFHWADDR: 3235 case SIOCGIFSLAVE: 3236 case SIOCGIFMAP: 3237 case SIOCGIFINDEX: 3238 case SIOCGIFTXQLEN: 3239 dev_load(ifr.ifr_name); 3240 read_lock(&dev_base_lock); 3241 ret = dev_ifsioc(&ifr, cmd); 3242 read_unlock(&dev_base_lock); 3243 if (!ret) { 3244 if (colon) 3245 *colon = ':'; 3246 if (copy_to_user(arg, &ifr, 3247 sizeof(struct ifreq))) 3248 ret = -EFAULT; 3249 } 3250 return ret; 3251 3252 case SIOCETHTOOL: 3253 dev_load(ifr.ifr_name); 3254 rtnl_lock(); 3255 ret = dev_ethtool(&ifr); 3256 rtnl_unlock(); 3257 if (!ret) { 3258 if (colon) 3259 *colon = ':'; 3260 if (copy_to_user(arg, &ifr, 3261 sizeof(struct ifreq))) 3262 ret = -EFAULT; 3263 } 3264 return ret; 3265 3266 /* 3267 * These ioctl calls: 3268 * - require superuser power. 3269 * - require strict serialization. 3270 * - return a value 3271 */ 3272 case SIOCGMIIPHY: 3273 case SIOCGMIIREG: 3274 case SIOCSIFNAME: 3275 if (!capable(CAP_NET_ADMIN)) 3276 return -EPERM; 3277 dev_load(ifr.ifr_name); 3278 rtnl_lock(); 3279 ret = dev_ifsioc(&ifr, cmd); 3280 rtnl_unlock(); 3281 if (!ret) { 3282 if (colon) 3283 *colon = ':'; 3284 if (copy_to_user(arg, &ifr, 3285 sizeof(struct ifreq))) 3286 ret = -EFAULT; 3287 } 3288 return ret; 3289 3290 /* 3291 * These ioctl calls: 3292 * - require superuser power. 3293 * - require strict serialization. 3294 * - do not return a value 3295 */ 3296 case SIOCSIFFLAGS: 3297 case SIOCSIFMETRIC: 3298 case SIOCSIFMTU: 3299 case SIOCSIFMAP: 3300 case SIOCSIFHWADDR: 3301 case SIOCSIFSLAVE: 3302 case SIOCADDMULTI: 3303 case SIOCDELMULTI: 3304 case SIOCSIFHWBROADCAST: 3305 case SIOCSIFTXQLEN: 3306 case SIOCSMIIREG: 3307 case SIOCBONDENSLAVE: 3308 case SIOCBONDRELEASE: 3309 case SIOCBONDSETHWADDR: 3310 case SIOCBONDCHANGEACTIVE: 3311 case SIOCBRADDIF: 3312 case SIOCBRDELIF: 3313 if (!capable(CAP_NET_ADMIN)) 3314 return -EPERM; 3315 /* fall through */ 3316 case SIOCBONDSLAVEINFOQUERY: 3317 case SIOCBONDINFOQUERY: 3318 dev_load(ifr.ifr_name); 3319 rtnl_lock(); 3320 ret = dev_ifsioc(&ifr, cmd); 3321 rtnl_unlock(); 3322 return ret; 3323 3324 case SIOCGIFMEM: 3325 /* Get the per device memory space. We can add this but 3326 * currently do not support it */ 3327 case SIOCSIFMEM: 3328 /* Set the per device memory buffer space. 3329 * Not applicable in our case */ 3330 case SIOCSIFLINK: 3331 return -EINVAL; 3332 3333 /* 3334 * Unknown or private ioctl. 3335 */ 3336 default: 3337 if (cmd == SIOCWANDEV || 3338 (cmd >= SIOCDEVPRIVATE && 3339 cmd <= SIOCDEVPRIVATE + 15)) { 3340 dev_load(ifr.ifr_name); 3341 rtnl_lock(); 3342 ret = dev_ifsioc(&ifr, cmd); 3343 rtnl_unlock(); 3344 if (!ret && copy_to_user(arg, &ifr, 3345 sizeof(struct ifreq))) 3346 ret = -EFAULT; 3347 return ret; 3348 } 3349 /* Take care of Wireless Extensions */ 3350 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) 3351 return wext_handle_ioctl(&ifr, cmd, arg); 3352 return -EINVAL; 3353 } 3354} 3355 3356 3357/** 3358 * dev_new_index - allocate an ifindex 3359 * 3360 * Returns a suitable unique value for a new device interface 3361 * number. The caller must hold the rtnl semaphore or the 3362 * dev_base_lock to be sure it remains unique. 3363 */ 3364static int dev_new_index(void) 3365{ 3366 static int ifindex; 3367 for (;;) { 3368 if (++ifindex <= 0) 3369 ifindex = 1; 3370 if (!__dev_get_by_index(ifindex)) 3371 return ifindex; 3372 } 3373} 3374 3375static int dev_boot_phase = 1; 3376 3377/* Delayed registration/unregisteration */ 3378static DEFINE_SPINLOCK(net_todo_list_lock); 3379static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); 3380 3381static void net_set_todo(struct net_device *dev) 3382{ 3383 spin_lock(&net_todo_list_lock); 3384 list_add_tail(&dev->todo_list, &net_todo_list); 3385 spin_unlock(&net_todo_list_lock); 3386} 3387 3388/** 3389 * register_netdevice - register a network device 3390 * @dev: device to register 3391 * 3392 * Take a completed network device structure and add it to the kernel 3393 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 3394 * chain. 0 is returned on success. A negative errno code is returned 3395 * on a failure to set up the device, or if the name is a duplicate. 3396 * 3397 * Callers must hold the rtnl semaphore. You may want 3398 * register_netdev() instead of this. 3399 * 3400 * BUGS: 3401 * The locking appears insufficient to guarantee two parallel registers 3402 * will not get the same name. 3403 */ 3404 3405int register_netdevice(struct net_device *dev) 3406{ 3407 struct hlist_head *head; 3408 struct hlist_node *p; 3409 int ret; 3410 3411 BUG_ON(dev_boot_phase); 3412 ASSERT_RTNL(); 3413 3414 might_sleep(); 3415 3416 /* When net_device's are persistent, this will be fatal. */ 3417 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 3418 3419 spin_lock_init(&dev->queue_lock); 3420 spin_lock_init(&dev->_xmit_lock); 3421 netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); 3422 dev->xmit_lock_owner = -1; 3423 spin_lock_init(&dev->ingress_lock); 3424 3425 dev->iflink = -1; 3426 3427 /* Init, if this function is available */ 3428 if (dev->init) { 3429 ret = dev->init(dev); 3430 if (ret) { 3431 if (ret > 0) 3432 ret = -EIO; 3433 goto out; 3434 } 3435 } 3436 3437 if (!dev_valid_name(dev->name)) { 3438 ret = -EINVAL; 3439 goto out; 3440 } 3441 3442 dev->ifindex = dev_new_index(); 3443 if (dev->iflink == -1) 3444 dev->iflink = dev->ifindex; 3445 3446 /* Check for existence of name */ 3447 head = dev_name_hash(dev->name); 3448 hlist_for_each(p, head) { 3449 struct net_device *d 3450 = hlist_entry(p, struct net_device, name_hlist); 3451 if (!strncmp(d->name, dev->name, IFNAMSIZ)) { 3452 ret = -EEXIST; 3453 goto out; 3454 } 3455 } 3456 3457 /* Fix illegal SG+CSUM combinations. */ 3458 if ((dev->features & NETIF_F_SG) && 3459 !(dev->features & NETIF_F_ALL_CSUM)) { 3460 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", 3461 dev->name); 3462 dev->features &= ~NETIF_F_SG; 3463 } 3464 3465 /* TSO requires that SG is present as well. */ 3466 if ((dev->features & NETIF_F_TSO) && 3467 !(dev->features & NETIF_F_SG)) { 3468 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", 3469 dev->name); 3470 dev->features &= ~NETIF_F_TSO; 3471 } 3472 if (dev->features & NETIF_F_UFO) { 3473 if (!(dev->features & NETIF_F_HW_CSUM)) { 3474 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " 3475 "NETIF_F_HW_CSUM feature.\n", 3476 dev->name); 3477 dev->features &= ~NETIF_F_UFO; 3478 } 3479 if (!(dev->features & NETIF_F_SG)) { 3480 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " 3481 "NETIF_F_SG feature.\n", 3482 dev->name); 3483 dev->features &= ~NETIF_F_UFO; 3484 } 3485 } 3486 3487 /* 3488 * nil rebuild_header routine, 3489 * that should be never called and used as just bug trap. 3490 */ 3491 3492 if (!dev->rebuild_header) 3493 dev->rebuild_header = default_rebuild_header; 3494 3495 ret = netdev_register_sysfs(dev); 3496 if (ret) 3497 goto out; 3498 dev->reg_state = NETREG_REGISTERED; 3499 3500 /* 3501 * Default initial state at registry is that the 3502 * device is present. 3503 */ 3504 3505 set_bit(__LINK_STATE_PRESENT, &dev->state); 3506 3507 dev_init_scheduler(dev); 3508 write_lock_bh(&dev_base_lock); 3509 list_add_tail(&dev->dev_list, &dev_base_head); 3510 hlist_add_head(&dev->name_hlist, head); 3511 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); 3512 dev_hold(dev); 3513 write_unlock_bh(&dev_base_lock); 3514 3515 /* Notify protocols, that a new device appeared. */ 3516 raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); 3517 3518 ret = 0; 3519 3520out: 3521 return ret; 3522} 3523 3524/** 3525 * register_netdev - register a network device 3526 * @dev: device to register 3527 * 3528 * Take a completed network device structure and add it to the kernel 3529 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 3530 * chain. 0 is returned on success. A negative errno code is returned 3531 * on a failure to set up the device, or if the name is a duplicate. 3532 * 3533 * This is a wrapper around register_netdevice that takes the rtnl semaphore 3534 * and expands the device name if you passed a format string to 3535 * alloc_netdev. 3536 */ 3537int register_netdev(struct net_device *dev) 3538{ 3539 int err; 3540 3541 rtnl_lock(); 3542 3543 /* 3544 * If the name is a format string the caller wants us to do a 3545 * name allocation. 3546 */ 3547 if (strchr(dev->name, '%')) { 3548 err = dev_alloc_name(dev, dev->name); 3549 if (err < 0) 3550 goto out; 3551 } 3552 3553 err = register_netdevice(dev); 3554out: 3555 rtnl_unlock(); 3556 return err; 3557} 3558EXPORT_SYMBOL(register_netdev); 3559 3560/* 3561 * netdev_wait_allrefs - wait until all references are gone. 3562 * 3563 * This is called when unregistering network devices. 3564 * 3565 * Any protocol or device that holds a reference should register 3566 * for netdevice notification, and cleanup and put back the 3567 * reference if they receive an UNREGISTER event. 3568 * We can get stuck here if buggy protocols don't correctly 3569 * call dev_put. 3570 */ 3571static void netdev_wait_allrefs(struct net_device *dev) 3572{ 3573 unsigned long rebroadcast_time, warning_time; 3574 3575 rebroadcast_time = warning_time = jiffies; 3576 while (atomic_read(&dev->refcnt) != 0) { 3577 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 3578 rtnl_lock(); 3579 3580 /* Rebroadcast unregister notification */ 3581 raw_notifier_call_chain(&netdev_chain, 3582 NETDEV_UNREGISTER, dev); 3583 3584 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 3585 &dev->state)) { 3586 /* We must not have linkwatch events 3587 * pending on unregister. If this 3588 * happens, we simply run the queue 3589 * unscheduled, resulting in a noop 3590 * for this device. 3591 */ 3592 linkwatch_run_queue(); 3593 } 3594 3595 __rtnl_unlock(); 3596 3597 rebroadcast_time = jiffies; 3598 } 3599 3600 msleep(250); 3601 3602 if (time_after(jiffies, warning_time + 10 * HZ)) { 3603 printk(KERN_EMERG "unregister_netdevice: " 3604 "waiting for %s to become free. Usage " 3605 "count = %d\n", 3606 dev->name, atomic_read(&dev->refcnt)); 3607 warning_time = jiffies; 3608 } 3609 } 3610} 3611 3612/* The sequence is: 3613 * 3614 * rtnl_lock(); 3615 * ... 3616 * register_netdevice(x1); 3617 * register_netdevice(x2); 3618 * ... 3619 * unregister_netdevice(y1); 3620 * unregister_netdevice(y2); 3621 * ... 3622 * rtnl_unlock(); 3623 * free_netdev(y1); 3624 * free_netdev(y2); 3625 * 3626 * We are invoked by rtnl_unlock() after it drops the semaphore. 3627 * This allows us to deal with problems: 3628 * 1) We can delete sysfs objects which invoke hotplug 3629 * without deadlocking with linkwatch via keventd. 3630 * 2) Since we run with the RTNL semaphore not held, we can sleep 3631 * safely in order to wait for the netdev refcnt to drop to zero. 3632 */ 3633static DEFINE_MUTEX(net_todo_run_mutex); 3634void netdev_run_todo(void) 3635{ 3636 struct list_head list; 3637 3638 /* Need to guard against multiple cpu's getting out of order. */ 3639 mutex_lock(&net_todo_run_mutex); 3640 3641 /* Not safe to do outside the semaphore. We must not return 3642 * until all unregister events invoked by the local processor 3643 * have been completed (either by this todo run, or one on 3644 * another cpu). 3645 */ 3646 if (list_empty(&net_todo_list)) 3647 goto out; 3648 3649 /* Snapshot list, allow later requests */ 3650 spin_lock(&net_todo_list_lock); 3651 list_replace_init(&net_todo_list, &list); 3652 spin_unlock(&net_todo_list_lock); 3653 3654 while (!list_empty(&list)) { 3655 struct net_device *dev 3656 = list_entry(list.next, struct net_device, todo_list); 3657 list_del(&dev->todo_list); 3658 3659 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 3660 printk(KERN_ERR "network todo '%s' but state %d\n", 3661 dev->name, dev->reg_state); 3662 dump_stack(); 3663 continue; 3664 } 3665 3666 dev->reg_state = NETREG_UNREGISTERED; 3667 3668 netdev_wait_allrefs(dev); 3669 3670 /* paranoia */ 3671 BUG_ON(atomic_read(&dev->refcnt)); 3672 BUG_TRAP(!dev->ip_ptr); 3673 BUG_TRAP(!dev->ip6_ptr); 3674 BUG_TRAP(!dev->dn_ptr); 3675 3676 if (dev->destructor) 3677 dev->destructor(dev); 3678 3679 /* Free network device */ 3680 kobject_put(&dev->dev.kobj); 3681 } 3682 3683out: 3684 mutex_unlock(&net_todo_run_mutex); 3685} 3686 3687static struct net_device_stats *internal_stats(struct net_device *dev) 3688{ 3689 return &dev->stats; 3690} 3691 3692/** 3693 * alloc_netdev - allocate network device 3694 * @sizeof_priv: size of private data to allocate space for 3695 * @name: device name format string 3696 * @setup: callback to initialize device 3697 * 3698 * Allocates a struct net_device with private data area for driver use 3699 * and performs basic initialization. 3700 */ 3701struct net_device *alloc_netdev(int sizeof_priv, const char *name, 3702 void (*setup)(struct net_device *)) 3703{ 3704 void *p; 3705 struct net_device *dev; 3706 int alloc_size; 3707 3708 BUG_ON(strlen(name) >= sizeof(dev->name)); 3709 3710 /* ensure 32-byte alignment of both the device and private area */ 3711 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 3712 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; 3713 3714 p = kzalloc(alloc_size, GFP_KERNEL); 3715 if (!p) { 3716 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); 3717 return NULL; 3718 } 3719 3720 dev = (struct net_device *) 3721 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); 3722 dev->padded = (char *)dev - (char *)p; 3723 3724 if (sizeof_priv) 3725 dev->priv = netdev_priv(dev); 3726 3727 dev->get_stats = internal_stats; 3728 setup(dev); 3729 strcpy(dev->name, name); 3730 return dev; 3731} 3732EXPORT_SYMBOL(alloc_netdev); 3733 3734/** 3735 * free_netdev - free network device 3736 * @dev: device 3737 * 3738 * This function does the last stage of destroying an allocated device 3739 * interface. The reference to the device object is released. 3740 * If this is the last reference then it will be freed. 3741 */ 3742void free_netdev(struct net_device *dev) 3743{ 3744#ifdef CONFIG_SYSFS 3745 /* Compatibility with error handling in drivers */ 3746 if (dev->reg_state == NETREG_UNINITIALIZED) { 3747 kfree((char *)dev - dev->padded); 3748 return; 3749 } 3750 3751 BUG_ON(dev->reg_state != NETREG_UNREGISTERED); 3752 dev->reg_state = NETREG_RELEASED; 3753 3754 /* will free via device release */ 3755 put_device(&dev->dev); 3756#else 3757 kfree((char *)dev - dev->padded); 3758#endif 3759} 3760 3761/* Synchronize with packet receive processing. */ 3762void synchronize_net(void) 3763{ 3764 might_sleep(); 3765 synchronize_rcu(); 3766} 3767 3768/** 3769 * unregister_netdevice - remove device from the kernel 3770 * @dev: device 3771 * 3772 * This function shuts down a device interface and removes it 3773 * from the kernel tables. On success 0 is returned, on a failure 3774 * a negative errno code is returned. 3775 * 3776 * Callers must hold the rtnl semaphore. You may want 3777 * unregister_netdev() instead of this. 3778 */ 3779 3780void unregister_netdevice(struct net_device *dev) 3781{ 3782 BUG_ON(dev_boot_phase); 3783 ASSERT_RTNL(); 3784 3785 /* Some devices call without registering for initialization unwind. */ 3786 if (dev->reg_state == NETREG_UNINITIALIZED) { 3787 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 3788 "was registered\n", dev->name, dev); 3789 3790 WARN_ON(1); 3791 return; 3792 } 3793 3794 BUG_ON(dev->reg_state != NETREG_REGISTERED); 3795 3796 /* If device is running, close it first. */ 3797 if (dev->flags & IFF_UP) 3798 dev_close(dev); 3799 3800 /* And unlink it from device chain. */ 3801 write_lock_bh(&dev_base_lock); 3802 list_del(&dev->dev_list); 3803 hlist_del(&dev->name_hlist); 3804 hlist_del(&dev->index_hlist); 3805 write_unlock_bh(&dev_base_lock); 3806 3807 dev->reg_state = NETREG_UNREGISTERING; 3808 3809 synchronize_net(); 3810 3811 /* Shutdown queueing discipline. */ 3812 dev_shutdown(dev); 3813 3814 3815 /* Notify protocols, that we are about to destroy 3816 this device. They should clean all the things. 3817 */ 3818 raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); 3819 3820 /* 3821 * Flush the multicast chain 3822 */ 3823 dev_mc_discard(dev); 3824 3825 if (dev->uninit) 3826 dev->uninit(dev); 3827 3828 /* Notifier chain MUST detach us from master device. */ 3829 BUG_TRAP(!dev->master); 3830 3831 /* Remove entries from sysfs */ 3832 netdev_unregister_sysfs(dev); 3833 3834 /* Finish processing unregister after unlock */ 3835 net_set_todo(dev); 3836 3837 synchronize_net(); 3838 3839 dev_put(dev); 3840} 3841 3842/** 3843 * unregister_netdev - remove device from the kernel 3844 * @dev: device 3845 * 3846 * This function shuts down a device interface and removes it 3847 * from the kernel tables. On success 0 is returned, on a failure 3848 * a negative errno code is returned. 3849 * 3850 * This is just a wrapper for unregister_netdevice that takes 3851 * the rtnl semaphore. In general you want to use this and not 3852 * unregister_netdevice. 3853 */ 3854void unregister_netdev(struct net_device *dev) 3855{ 3856 rtnl_lock(); 3857 unregister_netdevice(dev); 3858 rtnl_unlock(); 3859} 3860 3861EXPORT_SYMBOL(unregister_netdev); 3862 3863static int dev_cpu_callback(struct notifier_block *nfb, 3864 unsigned long action, 3865 void *ocpu) 3866{ 3867 struct sk_buff **list_skb; 3868 struct net_device **list_net; 3869 struct sk_buff *skb; 3870 unsigned int cpu, oldcpu = (unsigned long)ocpu; 3871 struct softnet_data *sd, *oldsd; 3872 3873 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) 3874 return NOTIFY_OK; 3875 3876 local_irq_disable(); 3877 cpu = smp_processor_id(); 3878 sd = &per_cpu(softnet_data, cpu); 3879 oldsd = &per_cpu(softnet_data, oldcpu); 3880 3881 /* Find end of our completion_queue. */ 3882 list_skb = &sd->completion_queue; 3883 while (*list_skb) 3884 list_skb = &(*list_skb)->next; 3885 /* Append completion queue from offline CPU. */ 3886 *list_skb = oldsd->completion_queue; 3887 oldsd->completion_queue = NULL; 3888 3889 /* Find end of our output_queue. */ 3890 list_net = &sd->output_queue; 3891 while (*list_net) 3892 list_net = &(*list_net)->next_sched; 3893 /* Append output queue from offline CPU. */ 3894 *list_net = oldsd->output_queue; 3895 oldsd->output_queue = NULL; 3896 3897 raise_softirq_irqoff(NET_TX_SOFTIRQ); 3898 local_irq_enable(); 3899 3900 /* Process offline CPU's input_pkt_queue */ 3901 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 3902 netif_rx(skb); 3903 3904 return NOTIFY_OK; 3905} 3906 3907#ifdef CONFIG_NET_DMA 3908/** 3909 * net_dma_rebalance - 3910 * This is called when the number of channels allocated to the net_dma_client 3911 * changes. The net_dma_client tries to have one DMA channel per CPU. 3912 */ 3913static void net_dma_rebalance(void) 3914{ 3915 unsigned int cpu, i, n; 3916 struct dma_chan *chan; 3917 3918 if (net_dma_count == 0) { 3919 for_each_online_cpu(cpu) 3920 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); 3921 return; 3922 } 3923 3924 i = 0; 3925 cpu = first_cpu(cpu_online_map); 3926 3927 rcu_read_lock(); 3928 list_for_each_entry(chan, &net_dma_client->channels, client_node) { 3929 n = ((num_online_cpus() / net_dma_count) 3930 + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); 3931 3932 while(n) { 3933 per_cpu(softnet_data, cpu).net_dma = chan; 3934 cpu = next_cpu(cpu, cpu_online_map); 3935 n--; 3936 } 3937 i++; 3938 } 3939 rcu_read_unlock(); 3940} 3941 3942/** 3943 * netdev_dma_event - event callback for the net_dma_client 3944 * @client: should always be net_dma_client 3945 * @chan: DMA channel for the event 3946 * @event: event type 3947 */ 3948static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 3949 enum dma_event event) 3950{ 3951 spin_lock(&net_dma_event_lock); 3952 switch (event) { 3953 case DMA_RESOURCE_ADDED: 3954 net_dma_count++; 3955 net_dma_rebalance(); 3956 break; 3957 case DMA_RESOURCE_REMOVED: 3958 net_dma_count--; 3959 net_dma_rebalance(); 3960 break; 3961 default: 3962 break; 3963 } 3964 spin_unlock(&net_dma_event_lock); 3965} 3966 3967/** 3968 * netdev_dma_regiser - register the networking subsystem as a DMA client 3969 */ 3970static int __init netdev_dma_register(void) 3971{ 3972 spin_lock_init(&net_dma_event_lock); 3973 net_dma_client = dma_async_client_register(netdev_dma_event); 3974 if (net_dma_client == NULL) 3975 return -ENOMEM; 3976 3977 dma_async_client_chan_request(net_dma_client, num_online_cpus()); 3978 return 0; 3979} 3980 3981#else 3982static int __init netdev_dma_register(void) { return -ENODEV; } 3983#endif /* CONFIG_NET_DMA */ 3984 3985/* 3986 * Initialize the DEV module. At boot time this walks the device list and 3987 * unhooks any devices that fail to initialise (normally hardware not 3988 * present) and leaves us with a valid list of present and active devices. 3989 * 3990 */ 3991 3992/* 3993 * This is called single threaded during boot, so no need 3994 * to take the rtnl semaphore. 3995 */ 3996static int __init net_dev_init(void) 3997{ 3998 int i, rc = -ENOMEM; 3999 4000 BUG_ON(!dev_boot_phase); 4001 4002 if (dev_proc_init()) 4003 goto out; 4004 4005 if (netdev_sysfs_init()) 4006 goto out; 4007 4008 INIT_LIST_HEAD(&ptype_all); 4009 for (i = 0; i < 16; i++) 4010 INIT_LIST_HEAD(&ptype_base[i]); 4011 4012 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) 4013 INIT_HLIST_HEAD(&dev_name_head[i]); 4014 4015 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) 4016 INIT_HLIST_HEAD(&dev_index_head[i]); 4017 4018 /* 4019 * Initialise the packet receive queues. 4020 */ 4021 4022 for_each_possible_cpu(i) { 4023 struct softnet_data *queue; 4024 4025 queue = &per_cpu(softnet_data, i); 4026 skb_queue_head_init(&queue->input_pkt_queue); 4027 queue->completion_queue = NULL; 4028 INIT_LIST_HEAD(&queue->poll_list); 4029 set_bit(__LINK_STATE_START, &queue->backlog_dev.state); 4030 queue->backlog_dev.weight = weight_p; 4031 queue->backlog_dev.poll = process_backlog; 4032 atomic_set(&queue->backlog_dev.refcnt, 1); 4033 } 4034 4035 netdev_dma_register(); 4036 4037 dev_boot_phase = 0; 4038 4039 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); 4040 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); 4041 4042 hotcpu_notifier(dev_cpu_callback, 0); 4043 dst_init(); 4044 dev_mcast_init(); 4045 rc = 0; 4046 4047 /* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 4048 memset(&pptp_ip_addr, 0, sizeof(struct addr_info)); 4049 /* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 4050out: 4051 return rc; 4052} 4053 4054/* Foxconn added start, pptp, Winster Chan, 06/26/2006 */ 4055void 4056dev_import_addr_info(unsigned long *saddr, unsigned long *daddr) 4057{ 4058 pptp_ip_addr.src_addr = *saddr; 4059 pptp_ip_addr.dst_addr = *daddr; 4060} 4061/* Foxconn added end, pptp, Winster Chan, 06/26/2006 */ 4062 4063subsys_initcall(net_dev_init); 4064 4065EXPORT_SYMBOL(__dev_get_by_index); 4066EXPORT_SYMBOL(__dev_get_by_name); 4067EXPORT_SYMBOL(__dev_remove_pack); 4068EXPORT_SYMBOL(dev_valid_name); 4069EXPORT_SYMBOL(dev_add_pack); 4070EXPORT_SYMBOL(dev_alloc_name); 4071EXPORT_SYMBOL(dev_close); 4072EXPORT_SYMBOL(dev_get_by_flags); 4073EXPORT_SYMBOL(dev_get_by_index); 4074EXPORT_SYMBOL(dev_get_by_name); 4075EXPORT_SYMBOL(dev_open); 4076EXPORT_SYMBOL(dev_queue_xmit); 4077EXPORT_SYMBOL(dev_remove_pack); 4078EXPORT_SYMBOL(dev_set_allmulti); 4079EXPORT_SYMBOL(dev_set_promiscuity); 4080EXPORT_SYMBOL(dev_change_flags); 4081EXPORT_SYMBOL(dev_set_mtu); 4082EXPORT_SYMBOL(dev_set_mac_address); 4083EXPORT_SYMBOL(free_netdev); 4084EXPORT_SYMBOL(netdev_boot_setup_check); 4085EXPORT_SYMBOL(netdev_set_master); 4086EXPORT_SYMBOL(netdev_state_change); 4087EXPORT_SYMBOL(netif_receive_skb); 4088EXPORT_SYMBOL(netif_rx); 4089EXPORT_SYMBOL(register_gifconf); 4090EXPORT_SYMBOL(register_netdevice); 4091EXPORT_SYMBOL(register_netdevice_notifier); 4092EXPORT_SYMBOL(skb_checksum_help); 4093EXPORT_SYMBOL(synchronize_net); 4094EXPORT_SYMBOL(unregister_netdevice); 4095EXPORT_SYMBOL(unregister_netdevice_notifier); 4096EXPORT_SYMBOL(net_enable_timestamp); 4097EXPORT_SYMBOL(net_disable_timestamp); 4098EXPORT_SYMBOL(dev_get_flags); 4099 4100#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 4101EXPORT_SYMBOL(br_handle_frame_hook); 4102EXPORT_SYMBOL(br_fdb_get_hook); 4103EXPORT_SYMBOL(br_fdb_put_hook); 4104#endif 4105 4106#ifdef CONFIG_KMOD 4107EXPORT_SYMBOL(dev_load); 4108#endif 4109 4110EXPORT_PER_CPU_SYMBOL(softnet_data); 4111