1/* 2 * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * for more details. 13 * 14 * You should have received a copy of the GNU General Public License along 15 * with this program; if not, write to the Free Software Foundation, Inc., 16 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * The full GNU General Public License is included in this distribution in the 19 * file called LICENSE. 20 * 21 */ 22 23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25#include <linux/skbuff.h> 26#include <linux/netdevice.h> 27#include <linux/etherdevice.h> 28#include <linux/pkt_sched.h> 29#include <linux/spinlock.h> 30#include <linux/slab.h> 31#include <linux/timer.h> 32#include <linux/ip.h> 33#include <linux/ipv6.h> 34#include <linux/if_arp.h> 35#include <linux/if_ether.h> 36#include <linux/if_bonding.h> 37#include <linux/if_vlan.h> 38#include <linux/in.h> 39#include <net/ipx.h> 40#include <net/arp.h> 41#include <net/ipv6.h> 42#include <asm/byteorder.h> 43#include "bonding.h" 44#include "bond_alb.h" 45 46 47#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */ 48#define BOND_TLB_REBALANCE_INTERVAL 10 /* In seconds, periodic re-balancing. 49 * Used for division - never set 50 * to zero !!! 51 */ 52#define BOND_ALB_LP_INTERVAL 1 /* In seconds, periodic send of 53 * learning packets to the switch 54 */ 55 56#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ 57 * ALB_TIMER_TICKS_PER_SEC) 58 59#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ 60 * ALB_TIMER_TICKS_PER_SEC) 61 62#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table. 63 * Note that this value MUST NOT be smaller 64 * because the key hash table is BYTE wide ! 65 */ 66 67 68#define TLB_NULL_INDEX 0xffffffff 69#define MAX_LP_BURST 3 70 71/* rlb defs */ 72#define RLB_HASH_TABLE_SIZE 256 73#define RLB_NULL_INDEX 0xffffffff 74#define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */ 75#define RLB_ARP_BURST_SIZE 2 76#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb 77 * rebalance interval (5 min). 78 */ 79/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is 80 * promiscuous after failover 81 */ 82#define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SEC 83 84#ifndef __long_aligned 85#define __long_aligned __attribute__((aligned((sizeof(long))))) 86#endif 87static const u8 mac_bcast[ETH_ALEN] __long_aligned = { 88 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 89}; 90static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = { 91 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 92}; 93static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; 94 95#pragma pack(1) 96struct learning_pkt { 97 u8 mac_dst[ETH_ALEN]; 98 u8 mac_src[ETH_ALEN]; 99 __be16 type; 100 u8 padding[ETH_ZLEN - ETH_HLEN]; 101}; 102 103struct arp_pkt { 104 __be16 hw_addr_space; 105 __be16 prot_addr_space; 106 u8 hw_addr_len; 107 u8 prot_addr_len; 108 __be16 op_code; 109 u8 mac_src[ETH_ALEN]; /* sender hardware address */ 110 __be32 ip_src; /* sender IP address */ 111 u8 mac_dst[ETH_ALEN]; /* target hardware address */ 112 __be32 ip_dst; /* target IP address */ 113}; 114#pragma pack() 115 116static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) 117{ 118 return (struct arp_pkt *)skb_network_header(skb); 119} 120 121/* Forward declaration */ 122static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); 123 124static inline u8 _simple_hash(const u8 *hash_start, int hash_size) 125{ 126 int i; 127 u8 hash = 0; 128 129 for (i = 0; i < hash_size; i++) { 130 hash ^= hash_start[i]; 131 } 132 133 return hash; 134} 135 136/*********************** tlb specific functions ***************************/ 137 138static inline void _lock_tx_hashtbl(struct bonding *bond) 139{ 140 spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 141} 142 143static inline void _unlock_tx_hashtbl(struct bonding *bond) 144{ 145 spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 146} 147 148/* Caller must hold tx_hashtbl lock */ 149static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) 150{ 151 if (save_load) { 152 entry->load_history = 1 + entry->tx_bytes / 153 BOND_TLB_REBALANCE_INTERVAL; 154 entry->tx_bytes = 0; 155 } 156 157 entry->tx_slave = NULL; 158 entry->next = TLB_NULL_INDEX; 159 entry->prev = TLB_NULL_INDEX; 160} 161 162static inline void tlb_init_slave(struct slave *slave) 163{ 164 SLAVE_TLB_INFO(slave).load = 0; 165 SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; 166} 167 168/* Caller must hold bond lock for read */ 169static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) 170{ 171 struct tlb_client_info *tx_hash_table; 172 u32 index; 173 174 _lock_tx_hashtbl(bond); 175 176 /* clear slave from tx_hashtbl */ 177 tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; 178 179 /* skip this if we've already freed the tx hash table */ 180 if (tx_hash_table) { 181 index = SLAVE_TLB_INFO(slave).head; 182 while (index != TLB_NULL_INDEX) { 183 u32 next_index = tx_hash_table[index].next; 184 tlb_init_table_entry(&tx_hash_table[index], save_load); 185 index = next_index; 186 } 187 } 188 189 tlb_init_slave(slave); 190 191 _unlock_tx_hashtbl(bond); 192} 193 194/* Must be called before starting the monitor timer */ 195static int tlb_initialize(struct bonding *bond) 196{ 197 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 198 int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); 199 struct tlb_client_info *new_hashtbl; 200 int i; 201 202 spin_lock_init(&(bond_info->tx_hashtbl_lock)); 203 204 new_hashtbl = kzalloc(size, GFP_KERNEL); 205 if (!new_hashtbl) { 206 pr_err("%s: Error: Failed to allocate TLB hash table\n", 207 bond->dev->name); 208 return -1; 209 } 210 _lock_tx_hashtbl(bond); 211 212 bond_info->tx_hashtbl = new_hashtbl; 213 214 for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) { 215 tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1); 216 } 217 218 _unlock_tx_hashtbl(bond); 219 220 return 0; 221} 222 223/* Must be called only after all slaves have been released */ 224static void tlb_deinitialize(struct bonding *bond) 225{ 226 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 227 228 _lock_tx_hashtbl(bond); 229 230 kfree(bond_info->tx_hashtbl); 231 bond_info->tx_hashtbl = NULL; 232 233 _unlock_tx_hashtbl(bond); 234} 235 236static long long compute_gap(struct slave *slave) 237{ 238 return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */ 239 (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ 240} 241 242/* Caller must hold bond lock for read */ 243static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) 244{ 245 struct slave *slave, *least_loaded; 246 long long max_gap; 247 int i; 248 249 least_loaded = NULL; 250 max_gap = LLONG_MIN; 251 252 /* Find the slave with the largest gap */ 253 bond_for_each_slave(bond, slave, i) { 254 if (SLAVE_IS_OK(slave)) { 255 long long gap = compute_gap(slave); 256 257 if (max_gap < gap) { 258 least_loaded = slave; 259 max_gap = gap; 260 } 261 } 262 } 263 264 return least_loaded; 265} 266 267/* Caller must hold bond lock for read */ 268static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) 269{ 270 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 271 struct tlb_client_info *hash_table; 272 struct slave *assigned_slave; 273 274 _lock_tx_hashtbl(bond); 275 276 hash_table = bond_info->tx_hashtbl; 277 assigned_slave = hash_table[hash_index].tx_slave; 278 if (!assigned_slave) { 279 assigned_slave = tlb_get_least_loaded_slave(bond); 280 281 if (assigned_slave) { 282 struct tlb_slave_info *slave_info = 283 &(SLAVE_TLB_INFO(assigned_slave)); 284 u32 next_index = slave_info->head; 285 286 hash_table[hash_index].tx_slave = assigned_slave; 287 hash_table[hash_index].next = next_index; 288 hash_table[hash_index].prev = TLB_NULL_INDEX; 289 290 if (next_index != TLB_NULL_INDEX) { 291 hash_table[next_index].prev = hash_index; 292 } 293 294 slave_info->head = hash_index; 295 slave_info->load += 296 hash_table[hash_index].load_history; 297 } 298 } 299 300 if (assigned_slave) { 301 hash_table[hash_index].tx_bytes += skb_len; 302 } 303 304 _unlock_tx_hashtbl(bond); 305 306 return assigned_slave; 307} 308 309/*********************** rlb specific functions ***************************/ 310static inline void _lock_rx_hashtbl(struct bonding *bond) 311{ 312 spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 313} 314 315static inline void _unlock_rx_hashtbl(struct bonding *bond) 316{ 317 spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 318} 319 320/* when an ARP REPLY is received from a client update its info 321 * in the rx_hashtbl 322 */ 323static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) 324{ 325 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 326 struct rlb_client_info *client_info; 327 u32 hash_index; 328 329 _lock_rx_hashtbl(bond); 330 331 hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); 332 client_info = &(bond_info->rx_hashtbl[hash_index]); 333 334 if ((client_info->assigned) && 335 (client_info->ip_src == arp->ip_dst) && 336 (client_info->ip_dst == arp->ip_src) && 337 (compare_ether_addr_64bits(client_info->mac_dst, arp->mac_src))) { 338 /* update the clients MAC address */ 339 memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); 340 client_info->ntt = 1; 341 bond_info->rx_ntt = 1; 342 } 343 344 _unlock_rx_hashtbl(bond); 345} 346 347static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) 348{ 349 struct bonding *bond; 350 struct arp_pkt *arp = (struct arp_pkt *)skb->data; 351 int res = NET_RX_DROP; 352 353 while (bond_dev->priv_flags & IFF_802_1Q_VLAN) 354 bond_dev = vlan_dev_real_dev(bond_dev); 355 356 if (!(bond_dev->priv_flags & IFF_BONDING) || 357 !(bond_dev->flags & IFF_MASTER)) 358 goto out; 359 360 if (!arp) { 361 pr_debug("Packet has no ARP data\n"); 362 goto out; 363 } 364 365 if (!pskb_may_pull(skb, arp_hdr_len(bond_dev))) 366 goto out; 367 368 if (skb->len < sizeof(struct arp_pkt)) { 369 pr_debug("Packet is too small to be an ARP\n"); 370 goto out; 371 } 372 373 if (arp->op_code == htons(ARPOP_REPLY)) { 374 /* update rx hash table for this ARP */ 375 bond = netdev_priv(bond_dev); 376 rlb_update_entry_from_arp(bond, arp); 377 pr_debug("Server received an ARP Reply from client\n"); 378 } 379 380 res = NET_RX_SUCCESS; 381 382out: 383 dev_kfree_skb(skb); 384 385 return res; 386} 387 388/* Caller must hold bond lock for read */ 389static struct slave *rlb_next_rx_slave(struct bonding *bond) 390{ 391 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 392 struct slave *rx_slave, *slave, *start_at; 393 int i = 0; 394 395 if (bond_info->next_rx_slave) { 396 start_at = bond_info->next_rx_slave; 397 } else { 398 start_at = bond->first_slave; 399 } 400 401 rx_slave = NULL; 402 403 bond_for_each_slave_from(bond, slave, i, start_at) { 404 if (SLAVE_IS_OK(slave)) { 405 if (!rx_slave) { 406 rx_slave = slave; 407 } else if (slave->speed > rx_slave->speed) { 408 rx_slave = slave; 409 } 410 } 411 } 412 413 if (rx_slave) { 414 bond_info->next_rx_slave = rx_slave->next; 415 } 416 417 return rx_slave; 418} 419 420/* teach the switch the mac of a disabled slave 421 * on the primary for fault tolerance 422 * 423 * Caller must hold bond->curr_slave_lock for write or bond lock for write 424 */ 425static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) 426{ 427 if (!bond->curr_active_slave) { 428 return; 429 } 430 431 if (!bond->alb_info.primary_is_promisc) { 432 if (!dev_set_promiscuity(bond->curr_active_slave->dev, 1)) 433 bond->alb_info.primary_is_promisc = 1; 434 else 435 bond->alb_info.primary_is_promisc = 0; 436 } 437 438 bond->alb_info.rlb_promisc_timeout_counter = 0; 439 440 alb_send_learning_packets(bond->curr_active_slave, addr); 441} 442 443/* slave being removed should not be active at this point 444 * 445 * Caller must hold bond lock for read 446 */ 447static void rlb_clear_slave(struct bonding *bond, struct slave *slave) 448{ 449 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 450 struct rlb_client_info *rx_hash_table; 451 u32 index, next_index; 452 453 /* clear slave from rx_hashtbl */ 454 _lock_rx_hashtbl(bond); 455 456 rx_hash_table = bond_info->rx_hashtbl; 457 index = bond_info->rx_hashtbl_head; 458 for (; index != RLB_NULL_INDEX; index = next_index) { 459 next_index = rx_hash_table[index].next; 460 if (rx_hash_table[index].slave == slave) { 461 struct slave *assigned_slave = rlb_next_rx_slave(bond); 462 463 if (assigned_slave) { 464 rx_hash_table[index].slave = assigned_slave; 465 if (compare_ether_addr_64bits(rx_hash_table[index].mac_dst, 466 mac_bcast)) { 467 bond_info->rx_hashtbl[index].ntt = 1; 468 bond_info->rx_ntt = 1; 469 /* A slave has been removed from the 470 * table because it is either disabled 471 * or being released. We must retry the 472 * update to avoid clients from not 473 * being updated & disconnecting when 474 * there is stress 475 */ 476 bond_info->rlb_update_retry_counter = 477 RLB_UPDATE_RETRY; 478 } 479 } else { /* there is no active slave */ 480 rx_hash_table[index].slave = NULL; 481 } 482 } 483 } 484 485 _unlock_rx_hashtbl(bond); 486 487 write_lock_bh(&bond->curr_slave_lock); 488 489 if (slave != bond->curr_active_slave) { 490 rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); 491 } 492 493 write_unlock_bh(&bond->curr_slave_lock); 494} 495 496static void rlb_update_client(struct rlb_client_info *client_info) 497{ 498 int i; 499 500 if (!client_info->slave) { 501 return; 502 } 503 504 for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { 505 struct sk_buff *skb; 506 507 skb = arp_create(ARPOP_REPLY, ETH_P_ARP, 508 client_info->ip_dst, 509 client_info->slave->dev, 510 client_info->ip_src, 511 client_info->mac_dst, 512 client_info->slave->dev->dev_addr, 513 client_info->mac_dst); 514 if (!skb) { 515 pr_err("%s: Error: failed to create an ARP packet\n", 516 client_info->slave->dev->master->name); 517 continue; 518 } 519 520 skb->dev = client_info->slave->dev; 521 522 if (client_info->tag) { 523 skb = vlan_put_tag(skb, client_info->vlan_id); 524 if (!skb) { 525 pr_err("%s: Error: failed to insert VLAN tag\n", 526 client_info->slave->dev->master->name); 527 continue; 528 } 529 } 530 531 arp_xmit(skb); 532 } 533} 534 535/* sends ARP REPLIES that update the clients that need updating */ 536static void rlb_update_rx_clients(struct bonding *bond) 537{ 538 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 539 struct rlb_client_info *client_info; 540 u32 hash_index; 541 542 _lock_rx_hashtbl(bond); 543 544 hash_index = bond_info->rx_hashtbl_head; 545 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { 546 client_info = &(bond_info->rx_hashtbl[hash_index]); 547 if (client_info->ntt) { 548 rlb_update_client(client_info); 549 if (bond_info->rlb_update_retry_counter == 0) { 550 client_info->ntt = 0; 551 } 552 } 553 } 554 555 /* do not update the entries again until this counter is zero so that 556 * not to confuse the clients. 557 */ 558 bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; 559 560 _unlock_rx_hashtbl(bond); 561} 562 563/* The slave was assigned a new mac address - update the clients */ 564static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) 565{ 566 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 567 struct rlb_client_info *client_info; 568 int ntt = 0; 569 u32 hash_index; 570 571 _lock_rx_hashtbl(bond); 572 573 hash_index = bond_info->rx_hashtbl_head; 574 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { 575 client_info = &(bond_info->rx_hashtbl[hash_index]); 576 577 if ((client_info->slave == slave) && 578 compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) { 579 client_info->ntt = 1; 580 ntt = 1; 581 } 582 } 583 584 // update the team's flag only after the whole iteration 585 if (ntt) { 586 bond_info->rx_ntt = 1; 587 //fasten the change 588 bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; 589 } 590 591 _unlock_rx_hashtbl(bond); 592} 593 594/* mark all clients using src_ip to be updated */ 595static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) 596{ 597 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 598 struct rlb_client_info *client_info; 599 u32 hash_index; 600 601 _lock_rx_hashtbl(bond); 602 603 hash_index = bond_info->rx_hashtbl_head; 604 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { 605 client_info = &(bond_info->rx_hashtbl[hash_index]); 606 607 if (!client_info->slave) { 608 pr_err("%s: Error: found a client with no channel in the client's hash table\n", 609 bond->dev->name); 610 continue; 611 } 612 /*update all clients using this src_ip, that are not assigned 613 * to the team's address (curr_active_slave) and have a known 614 * unicast mac address. 615 */ 616 if ((client_info->ip_src == src_ip) && 617 compare_ether_addr_64bits(client_info->slave->dev->dev_addr, 618 bond->dev->dev_addr) && 619 compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) { 620 client_info->ntt = 1; 621 bond_info->rx_ntt = 1; 622 } 623 } 624 625 _unlock_rx_hashtbl(bond); 626} 627 628/* Caller must hold both bond and ptr locks for read */ 629static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) 630{ 631 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 632 struct arp_pkt *arp = arp_pkt(skb); 633 struct slave *assigned_slave; 634 struct rlb_client_info *client_info; 635 u32 hash_index = 0; 636 637 _lock_rx_hashtbl(bond); 638 639 hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src)); 640 client_info = &(bond_info->rx_hashtbl[hash_index]); 641 642 if (client_info->assigned) { 643 if ((client_info->ip_src == arp->ip_src) && 644 (client_info->ip_dst == arp->ip_dst)) { 645 /* the entry is already assigned to this client */ 646 if (compare_ether_addr_64bits(arp->mac_dst, mac_bcast)) { 647 /* update mac address from arp */ 648 memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); 649 } 650 651 assigned_slave = client_info->slave; 652 if (assigned_slave) { 653 _unlock_rx_hashtbl(bond); 654 return assigned_slave; 655 } 656 } else { 657 /* the entry is already assigned to some other client, 658 * move the old client to primary (curr_active_slave) so 659 * that the new client can be assigned to this entry. 660 */ 661 if (bond->curr_active_slave && 662 client_info->slave != bond->curr_active_slave) { 663 client_info->slave = bond->curr_active_slave; 664 rlb_update_client(client_info); 665 } 666 } 667 } 668 /* assign a new slave */ 669 assigned_slave = rlb_next_rx_slave(bond); 670 671 if (assigned_slave) { 672 client_info->ip_src = arp->ip_src; 673 client_info->ip_dst = arp->ip_dst; 674 /* arp->mac_dst is broadcast for arp reqeusts. 675 * will be updated with clients actual unicast mac address 676 * upon receiving an arp reply. 677 */ 678 memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); 679 client_info->slave = assigned_slave; 680 681 if (compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) { 682 client_info->ntt = 1; 683 bond->alb_info.rx_ntt = 1; 684 } else { 685 client_info->ntt = 0; 686 } 687 688 if (bond->vlgrp) { 689 if (!vlan_get_tag(skb, &client_info->vlan_id)) 690 client_info->tag = 1; 691 } 692 693 if (!client_info->assigned) { 694 u32 prev_tbl_head = bond_info->rx_hashtbl_head; 695 bond_info->rx_hashtbl_head = hash_index; 696 client_info->next = prev_tbl_head; 697 if (prev_tbl_head != RLB_NULL_INDEX) { 698 bond_info->rx_hashtbl[prev_tbl_head].prev = 699 hash_index; 700 } 701 client_info->assigned = 1; 702 } 703 } 704 705 _unlock_rx_hashtbl(bond); 706 707 return assigned_slave; 708} 709 710/* chooses (and returns) transmit channel for arp reply 711 * does not choose channel for other arp types since they are 712 * sent on the curr_active_slave 713 */ 714static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) 715{ 716 struct arp_pkt *arp = arp_pkt(skb); 717 struct slave *tx_slave = NULL; 718 719 if (arp->op_code == htons(ARPOP_REPLY)) { 720 /* the arp must be sent on the selected 721 * rx channel 722 */ 723 tx_slave = rlb_choose_channel(skb, bond); 724 if (tx_slave) { 725 memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); 726 } 727 pr_debug("Server sent ARP Reply packet\n"); 728 } else if (arp->op_code == htons(ARPOP_REQUEST)) { 729 /* Create an entry in the rx_hashtbl for this client as a 730 * place holder. 731 * When the arp reply is received the entry will be updated 732 * with the correct unicast address of the client. 733 */ 734 rlb_choose_channel(skb, bond); 735 736 /* The ARP relpy packets must be delayed so that 737 * they can cancel out the influence of the ARP request. 738 */ 739 bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; 740 741 /* arp requests are broadcast and are sent on the primary 742 * the arp request will collapse all clients on the subnet to 743 * the primary slave. We must register these clients to be 744 * updated with their assigned mac. 745 */ 746 rlb_req_update_subnet_clients(bond, arp->ip_src); 747 pr_debug("Server sent ARP Request packet\n"); 748 } 749 750 return tx_slave; 751} 752 753/* Caller must hold bond lock for read */ 754static void rlb_rebalance(struct bonding *bond) 755{ 756 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 757 struct slave *assigned_slave; 758 struct rlb_client_info *client_info; 759 int ntt; 760 u32 hash_index; 761 762 _lock_rx_hashtbl(bond); 763 764 ntt = 0; 765 hash_index = bond_info->rx_hashtbl_head; 766 for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { 767 client_info = &(bond_info->rx_hashtbl[hash_index]); 768 assigned_slave = rlb_next_rx_slave(bond); 769 if (assigned_slave && (client_info->slave != assigned_slave)) { 770 client_info->slave = assigned_slave; 771 client_info->ntt = 1; 772 ntt = 1; 773 } 774 } 775 776 /* update the team's flag only after the whole iteration */ 777 if (ntt) { 778 bond_info->rx_ntt = 1; 779 } 780 _unlock_rx_hashtbl(bond); 781} 782 783/* Caller must hold rx_hashtbl lock */ 784static void rlb_init_table_entry(struct rlb_client_info *entry) 785{ 786 memset(entry, 0, sizeof(struct rlb_client_info)); 787 entry->next = RLB_NULL_INDEX; 788 entry->prev = RLB_NULL_INDEX; 789} 790 791static int rlb_initialize(struct bonding *bond) 792{ 793 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 794 struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type); 795 struct rlb_client_info *new_hashtbl; 796 int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); 797 int i; 798 799 spin_lock_init(&(bond_info->rx_hashtbl_lock)); 800 801 new_hashtbl = kmalloc(size, GFP_KERNEL); 802 if (!new_hashtbl) { 803 pr_err("%s: Error: Failed to allocate RLB hash table\n", 804 bond->dev->name); 805 return -1; 806 } 807 _lock_rx_hashtbl(bond); 808 809 bond_info->rx_hashtbl = new_hashtbl; 810 811 bond_info->rx_hashtbl_head = RLB_NULL_INDEX; 812 813 for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) { 814 rlb_init_table_entry(bond_info->rx_hashtbl + i); 815 } 816 817 _unlock_rx_hashtbl(bond); 818 819 /*initialize packet type*/ 820 pk_type->type = cpu_to_be16(ETH_P_ARP); 821 pk_type->dev = bond->dev; 822 pk_type->func = rlb_arp_recv; 823 824 /* register to receive ARPs */ 825 dev_add_pack(pk_type); 826 827 return 0; 828} 829 830static void rlb_deinitialize(struct bonding *bond) 831{ 832 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 833 834 dev_remove_pack(&(bond_info->rlb_pkt_type)); 835 836 _lock_rx_hashtbl(bond); 837 838 kfree(bond_info->rx_hashtbl); 839 bond_info->rx_hashtbl = NULL; 840 bond_info->rx_hashtbl_head = RLB_NULL_INDEX; 841 842 _unlock_rx_hashtbl(bond); 843} 844 845static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 846{ 847 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 848 u32 curr_index; 849 850 _lock_rx_hashtbl(bond); 851 852 curr_index = bond_info->rx_hashtbl_head; 853 while (curr_index != RLB_NULL_INDEX) { 854 struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); 855 u32 next_index = bond_info->rx_hashtbl[curr_index].next; 856 u32 prev_index = bond_info->rx_hashtbl[curr_index].prev; 857 858 if (curr->tag && (curr->vlan_id == vlan_id)) { 859 if (curr_index == bond_info->rx_hashtbl_head) { 860 bond_info->rx_hashtbl_head = next_index; 861 } 862 if (prev_index != RLB_NULL_INDEX) { 863 bond_info->rx_hashtbl[prev_index].next = next_index; 864 } 865 if (next_index != RLB_NULL_INDEX) { 866 bond_info->rx_hashtbl[next_index].prev = prev_index; 867 } 868 869 rlb_init_table_entry(curr); 870 } 871 872 curr_index = next_index; 873 } 874 875 _unlock_rx_hashtbl(bond); 876} 877 878/*********************** tlb/rlb shared functions *********************/ 879 880static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) 881{ 882 struct bonding *bond = bond_get_bond_by_slave(slave); 883 struct learning_pkt pkt; 884 int size = sizeof(struct learning_pkt); 885 int i; 886 887 memset(&pkt, 0, size); 888 memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); 889 memcpy(pkt.mac_src, mac_addr, ETH_ALEN); 890 pkt.type = cpu_to_be16(ETH_P_LOOP); 891 892 for (i = 0; i < MAX_LP_BURST; i++) { 893 struct sk_buff *skb; 894 char *data; 895 896 skb = dev_alloc_skb(size); 897 if (!skb) { 898 return; 899 } 900 901 data = skb_put(skb, size); 902 memcpy(data, &pkt, size); 903 904 skb_reset_mac_header(skb); 905 skb->network_header = skb->mac_header + ETH_HLEN; 906 skb->protocol = pkt.type; 907 skb->priority = TC_PRIO_CONTROL; 908 skb->dev = slave->dev; 909 910 if (bond->vlgrp) { 911 struct vlan_entry *vlan; 912 913 vlan = bond_next_vlan(bond, 914 bond->alb_info.current_alb_vlan); 915 916 bond->alb_info.current_alb_vlan = vlan; 917 if (!vlan) { 918 kfree_skb(skb); 919 continue; 920 } 921 922 skb = vlan_put_tag(skb, vlan->vlan_id); 923 if (!skb) { 924 pr_err("%s: Error: failed to insert VLAN tag\n", 925 bond->dev->name); 926 continue; 927 } 928 } 929 930 dev_queue_xmit(skb); 931 } 932} 933 934/* hw is a boolean parameter that determines whether we should try and 935 * set the hw address of the device as well as the hw address of the 936 * net_device 937 */ 938static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw) 939{ 940 struct net_device *dev = slave->dev; 941 struct sockaddr s_addr; 942 943 if (!hw) { 944 memcpy(dev->dev_addr, addr, dev->addr_len); 945 return 0; 946 } 947 948 /* for rlb each slave must have a unique hw mac addresses so that */ 949 /* each slave will receive packets destined to a different mac */ 950 memcpy(s_addr.sa_data, addr, dev->addr_len); 951 s_addr.sa_family = dev->type; 952 if (dev_set_mac_address(dev, &s_addr)) { 953 pr_err("%s: Error: dev_set_mac_address of dev %s failed!\n" 954 "ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n", 955 dev->master->name, dev->name); 956 return -EOPNOTSUPP; 957 } 958 return 0; 959} 960 961/* 962 * Swap MAC addresses between two slaves. 963 * 964 * Called with RTNL held, and no other locks. 965 * 966 */ 967 968static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct slave *slave2) 969{ 970 u8 tmp_mac_addr[ETH_ALEN]; 971 972 memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); 973 alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled); 974 alb_set_slave_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled); 975 976} 977 978/* 979 * Send learning packets after MAC address swap. 980 * 981 * Called with RTNL and no other locks 982 */ 983static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, 984 struct slave *slave2) 985{ 986 int slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); 987 struct slave *disabled_slave = NULL; 988 989 ASSERT_RTNL(); 990 991 /* fasten the change in the switch */ 992 if (SLAVE_IS_OK(slave1)) { 993 alb_send_learning_packets(slave1, slave1->dev->dev_addr); 994 if (bond->alb_info.rlb_enabled) { 995 /* inform the clients that the mac address 996 * has changed 997 */ 998 rlb_req_update_slave_clients(bond, slave1); 999 } 1000 } else { 1001 disabled_slave = slave1; 1002 } 1003 1004 if (SLAVE_IS_OK(slave2)) { 1005 alb_send_learning_packets(slave2, slave2->dev->dev_addr); 1006 if (bond->alb_info.rlb_enabled) { 1007 /* inform the clients that the mac address 1008 * has changed 1009 */ 1010 rlb_req_update_slave_clients(bond, slave2); 1011 } 1012 } else { 1013 disabled_slave = slave2; 1014 } 1015 1016 if (bond->alb_info.rlb_enabled && slaves_state_differ) { 1017 /* A disabled slave was assigned an active mac addr */ 1018 rlb_teach_disabled_mac_on_primary(bond, 1019 disabled_slave->dev->dev_addr); 1020 } 1021} 1022 1023/** 1024 * alb_change_hw_addr_on_detach 1025 * @bond: bonding we're working on 1026 * @slave: the slave that was just detached 1027 * 1028 * We assume that @slave was already detached from the slave list. 1029 * 1030 * If @slave's permanent hw address is different both from its current 1031 * address and from @bond's address, then somewhere in the bond there's 1032 * a slave that has @slave's permanet address as its current address. 1033 * We'll make sure that that slave no longer uses @slave's permanent address. 1034 * 1035 * Caller must hold RTNL and no other locks 1036 */ 1037static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) 1038{ 1039 int perm_curr_diff; 1040 int perm_bond_diff; 1041 1042 perm_curr_diff = compare_ether_addr_64bits(slave->perm_hwaddr, 1043 slave->dev->dev_addr); 1044 perm_bond_diff = compare_ether_addr_64bits(slave->perm_hwaddr, 1045 bond->dev->dev_addr); 1046 1047 if (perm_curr_diff && perm_bond_diff) { 1048 struct slave *tmp_slave; 1049 int i, found = 0; 1050 1051 bond_for_each_slave(bond, tmp_slave, i) { 1052 if (!compare_ether_addr_64bits(slave->perm_hwaddr, 1053 tmp_slave->dev->dev_addr)) { 1054 found = 1; 1055 break; 1056 } 1057 } 1058 1059 if (found) { 1060 /* locking: needs RTNL and nothing else */ 1061 alb_swap_mac_addr(bond, slave, tmp_slave); 1062 alb_fasten_mac_swap(bond, slave, tmp_slave); 1063 } 1064 } 1065} 1066 1067/** 1068 * alb_handle_addr_collision_on_attach 1069 * @bond: bonding we're working on 1070 * @slave: the slave that was just attached 1071 * 1072 * checks uniqueness of slave's mac address and handles the case the 1073 * new slave uses the bonds mac address. 1074 * 1075 * If the permanent hw address of @slave is @bond's hw address, we need to 1076 * find a different hw address to give @slave, that isn't in use by any other 1077 * slave in the bond. This address must be, of course, one of the premanent 1078 * addresses of the other slaves. 1079 * 1080 * We go over the slave list, and for each slave there we compare its 1081 * permanent hw address with the current address of all the other slaves. 1082 * If no match was found, then we've found a slave with a permanent address 1083 * that isn't used by any other slave in the bond, so we can assign it to 1084 * @slave. 1085 * 1086 * assumption: this function is called before @slave is attached to the 1087 * bond slave list. 1088 * 1089 * caller must hold the bond lock for write since the mac addresses are compared 1090 * and may be swapped. 1091 */ 1092static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) 1093{ 1094 struct slave *tmp_slave1, *tmp_slave2, *free_mac_slave; 1095 struct slave *has_bond_addr = bond->curr_active_slave; 1096 int i, j, found = 0; 1097 1098 if (bond->slave_cnt == 0) { 1099 /* this is the first slave */ 1100 return 0; 1101 } 1102 1103 /* if slave's mac address differs from bond's mac address 1104 * check uniqueness of slave's mac address against the other 1105 * slaves in the bond. 1106 */ 1107 if (compare_ether_addr_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { 1108 bond_for_each_slave(bond, tmp_slave1, i) { 1109 if (!compare_ether_addr_64bits(tmp_slave1->dev->dev_addr, 1110 slave->dev->dev_addr)) { 1111 found = 1; 1112 break; 1113 } 1114 } 1115 1116 if (!found) 1117 return 0; 1118 1119 /* Try setting slave mac to bond address and fall-through 1120 to code handling that situation below... */ 1121 alb_set_slave_mac_addr(slave, bond->dev->dev_addr, 1122 bond->alb_info.rlb_enabled); 1123 } 1124 1125 /* The slave's address is equal to the address of the bond. 1126 * Search for a spare address in the bond for this slave. 1127 */ 1128 free_mac_slave = NULL; 1129 1130 bond_for_each_slave(bond, tmp_slave1, i) { 1131 found = 0; 1132 bond_for_each_slave(bond, tmp_slave2, j) { 1133 if (!compare_ether_addr_64bits(tmp_slave1->perm_hwaddr, 1134 tmp_slave2->dev->dev_addr)) { 1135 found = 1; 1136 break; 1137 } 1138 } 1139 1140 if (!found) { 1141 /* no slave has tmp_slave1's perm addr 1142 * as its curr addr 1143 */ 1144 free_mac_slave = tmp_slave1; 1145 break; 1146 } 1147 1148 if (!has_bond_addr) { 1149 if (!compare_ether_addr_64bits(tmp_slave1->dev->dev_addr, 1150 bond->dev->dev_addr)) { 1151 1152 has_bond_addr = tmp_slave1; 1153 } 1154 } 1155 } 1156 1157 if (free_mac_slave) { 1158 alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, 1159 bond->alb_info.rlb_enabled); 1160 1161 pr_warning("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", 1162 bond->dev->name, slave->dev->name, 1163 free_mac_slave->dev->name); 1164 1165 } else if (has_bond_addr) { 1166 pr_err("%s: Error: the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n", 1167 bond->dev->name, slave->dev->name); 1168 return -EFAULT; 1169 } 1170 1171 return 0; 1172} 1173 1174/** 1175 * alb_set_mac_address 1176 * @bond: 1177 * @addr: 1178 * 1179 * In TLB mode all slaves are configured to the bond's hw address, but set 1180 * their dev_addr field to different addresses (based on their permanent hw 1181 * addresses). 1182 * 1183 * For each slave, this function sets the interface to the new address and then 1184 * changes its dev_addr field to its previous value. 1185 * 1186 * Unwinding assumes bond's mac address has not yet changed. 1187 */ 1188static int alb_set_mac_address(struct bonding *bond, void *addr) 1189{ 1190 struct sockaddr sa; 1191 struct slave *slave, *stop_at; 1192 char tmp_addr[ETH_ALEN]; 1193 int res; 1194 int i; 1195 1196 if (bond->alb_info.rlb_enabled) { 1197 return 0; 1198 } 1199 1200 bond_for_each_slave(bond, slave, i) { 1201 /* save net_device's current hw address */ 1202 memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); 1203 1204 res = dev_set_mac_address(slave->dev, addr); 1205 1206 /* restore net_device's hw address */ 1207 memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); 1208 1209 if (res) 1210 goto unwind; 1211 } 1212 1213 return 0; 1214 1215unwind: 1216 memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); 1217 sa.sa_family = bond->dev->type; 1218 1219 /* unwind from head to the slave that failed */ 1220 stop_at = slave; 1221 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 1222 memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); 1223 dev_set_mac_address(slave->dev, &sa); 1224 memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); 1225 } 1226 1227 return res; 1228} 1229 1230/************************ exported alb funcions ************************/ 1231 1232int bond_alb_initialize(struct bonding *bond, int rlb_enabled) 1233{ 1234 int res; 1235 1236 res = tlb_initialize(bond); 1237 if (res) { 1238 return res; 1239 } 1240 1241 if (rlb_enabled) { 1242 bond->alb_info.rlb_enabled = 1; 1243 /* initialize rlb */ 1244 res = rlb_initialize(bond); 1245 if (res) { 1246 tlb_deinitialize(bond); 1247 return res; 1248 } 1249 } else { 1250 bond->alb_info.rlb_enabled = 0; 1251 } 1252 1253 return 0; 1254} 1255 1256void bond_alb_deinitialize(struct bonding *bond) 1257{ 1258 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1259 1260 tlb_deinitialize(bond); 1261 1262 if (bond_info->rlb_enabled) { 1263 rlb_deinitialize(bond); 1264 } 1265} 1266 1267int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1268{ 1269 struct bonding *bond = netdev_priv(bond_dev); 1270 struct ethhdr *eth_data; 1271 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1272 struct slave *tx_slave = NULL; 1273 static const __be32 ip_bcast = htonl(0xffffffff); 1274 int hash_size = 0; 1275 int do_tx_balance = 1; 1276 u32 hash_index = 0; 1277 const u8 *hash_start = NULL; 1278 int res = 1; 1279 struct ipv6hdr *ip6hdr; 1280 1281 skb_reset_mac_header(skb); 1282 eth_data = eth_hdr(skb); 1283 1284 /* make sure that the curr_active_slave and the slaves list do 1285 * not change during tx 1286 */ 1287 read_lock(&bond->lock); 1288 read_lock(&bond->curr_slave_lock); 1289 1290 if (!BOND_IS_OK(bond)) { 1291 goto out; 1292 } 1293 1294 switch (ntohs(skb->protocol)) { 1295 case ETH_P_IP: { 1296 const struct iphdr *iph = ip_hdr(skb); 1297 1298 if (!compare_ether_addr_64bits(eth_data->h_dest, mac_bcast) || 1299 (iph->daddr == ip_bcast) || 1300 (iph->protocol == IPPROTO_IGMP)) { 1301 do_tx_balance = 0; 1302 break; 1303 } 1304 hash_start = (char *)&(iph->daddr); 1305 hash_size = sizeof(iph->daddr); 1306 } 1307 break; 1308 case ETH_P_IPV6: 1309 /* IPv6 doesn't really use broadcast mac address, but leave 1310 * that here just in case. 1311 */ 1312 if (!compare_ether_addr_64bits(eth_data->h_dest, mac_bcast)) { 1313 do_tx_balance = 0; 1314 break; 1315 } 1316 1317 /* IPv6 uses all-nodes multicast as an equivalent to 1318 * broadcasts in IPv4. 1319 */ 1320 if (!compare_ether_addr_64bits(eth_data->h_dest, mac_v6_allmcast)) { 1321 do_tx_balance = 0; 1322 break; 1323 } 1324 1325 /* Additianally, DAD probes should not be tx-balanced as that 1326 * will lead to false positives for duplicate addresses and 1327 * prevent address configuration from working. 1328 */ 1329 ip6hdr = ipv6_hdr(skb); 1330 if (ipv6_addr_any(&ip6hdr->saddr)) { 1331 do_tx_balance = 0; 1332 break; 1333 } 1334 1335 hash_start = (char *)&(ipv6_hdr(skb)->daddr); 1336 hash_size = sizeof(ipv6_hdr(skb)->daddr); 1337 break; 1338 case ETH_P_IPX: 1339 if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { 1340 /* something is wrong with this packet */ 1341 do_tx_balance = 0; 1342 break; 1343 } 1344 1345 if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { 1346 /* The only protocol worth balancing in 1347 * this family since it has an "ARP" like 1348 * mechanism 1349 */ 1350 do_tx_balance = 0; 1351 break; 1352 } 1353 1354 hash_start = (char*)eth_data->h_dest; 1355 hash_size = ETH_ALEN; 1356 break; 1357 case ETH_P_ARP: 1358 do_tx_balance = 0; 1359 if (bond_info->rlb_enabled) { 1360 tx_slave = rlb_arp_xmit(skb, bond); 1361 } 1362 break; 1363 default: 1364 do_tx_balance = 0; 1365 break; 1366 } 1367 1368 if (do_tx_balance) { 1369 hash_index = _simple_hash(hash_start, hash_size); 1370 tx_slave = tlb_choose_channel(bond, hash_index, skb->len); 1371 } 1372 1373 if (!tx_slave) { 1374 /* unbalanced or unassigned, send through primary */ 1375 tx_slave = bond->curr_active_slave; 1376 bond_info->unbalanced_load += skb->len; 1377 } 1378 1379 if (tx_slave && SLAVE_IS_OK(tx_slave)) { 1380 if (tx_slave != bond->curr_active_slave) { 1381 memcpy(eth_data->h_source, 1382 tx_slave->dev->dev_addr, 1383 ETH_ALEN); 1384 } 1385 1386 res = bond_dev_queue_xmit(bond, skb, tx_slave->dev); 1387 } else { 1388 if (tx_slave) { 1389 tlb_clear_slave(bond, tx_slave, 0); 1390 } 1391 } 1392 1393out: 1394 if (res) { 1395 /* no suitable interface, frame not sent */ 1396 dev_kfree_skb(skb); 1397 } 1398 read_unlock(&bond->curr_slave_lock); 1399 read_unlock(&bond->lock); 1400 return NETDEV_TX_OK; 1401} 1402 1403void bond_alb_monitor(struct work_struct *work) 1404{ 1405 struct bonding *bond = container_of(work, struct bonding, 1406 alb_work.work); 1407 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1408 struct slave *slave; 1409 int i; 1410 1411 read_lock(&bond->lock); 1412 1413 if (bond->kill_timers) { 1414 goto out; 1415 } 1416 1417 if (bond->slave_cnt == 0) { 1418 bond_info->tx_rebalance_counter = 0; 1419 bond_info->lp_counter = 0; 1420 goto re_arm; 1421 } 1422 1423 bond_info->tx_rebalance_counter++; 1424 bond_info->lp_counter++; 1425 1426 /* send learning packets */ 1427 if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) { 1428 /* change of curr_active_slave involves swapping of mac addresses. 1429 * in order to avoid this swapping from happening while 1430 * sending the learning packets, the curr_slave_lock must be held for 1431 * read. 1432 */ 1433 read_lock(&bond->curr_slave_lock); 1434 1435 bond_for_each_slave(bond, slave, i) { 1436 alb_send_learning_packets(slave, slave->dev->dev_addr); 1437 } 1438 1439 read_unlock(&bond->curr_slave_lock); 1440 1441 bond_info->lp_counter = 0; 1442 } 1443 1444 /* rebalance tx traffic */ 1445 if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { 1446 1447 read_lock(&bond->curr_slave_lock); 1448 1449 bond_for_each_slave(bond, slave, i) { 1450 tlb_clear_slave(bond, slave, 1); 1451 if (slave == bond->curr_active_slave) { 1452 SLAVE_TLB_INFO(slave).load = 1453 bond_info->unbalanced_load / 1454 BOND_TLB_REBALANCE_INTERVAL; 1455 bond_info->unbalanced_load = 0; 1456 } 1457 } 1458 1459 read_unlock(&bond->curr_slave_lock); 1460 1461 bond_info->tx_rebalance_counter = 0; 1462 } 1463 1464 /* handle rlb stuff */ 1465 if (bond_info->rlb_enabled) { 1466 if (bond_info->primary_is_promisc && 1467 (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { 1468 1469 /* 1470 * dev_set_promiscuity requires rtnl and 1471 * nothing else. 1472 */ 1473 read_unlock(&bond->lock); 1474 rtnl_lock(); 1475 1476 bond_info->rlb_promisc_timeout_counter = 0; 1477 1478 /* If the primary was set to promiscuous mode 1479 * because a slave was disabled then 1480 * it can now leave promiscuous mode. 1481 */ 1482 dev_set_promiscuity(bond->curr_active_slave->dev, -1); 1483 bond_info->primary_is_promisc = 0; 1484 1485 rtnl_unlock(); 1486 read_lock(&bond->lock); 1487 } 1488 1489 if (bond_info->rlb_rebalance) { 1490 bond_info->rlb_rebalance = 0; 1491 rlb_rebalance(bond); 1492 } 1493 1494 /* check if clients need updating */ 1495 if (bond_info->rx_ntt) { 1496 if (bond_info->rlb_update_delay_counter) { 1497 --bond_info->rlb_update_delay_counter; 1498 } else { 1499 rlb_update_rx_clients(bond); 1500 if (bond_info->rlb_update_retry_counter) { 1501 --bond_info->rlb_update_retry_counter; 1502 } else { 1503 bond_info->rx_ntt = 0; 1504 } 1505 } 1506 } 1507 } 1508 1509re_arm: 1510 queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); 1511out: 1512 read_unlock(&bond->lock); 1513} 1514 1515/* assumption: called before the slave is attached to the bond 1516 * and not locked by the bond lock 1517 */ 1518int bond_alb_init_slave(struct bonding *bond, struct slave *slave) 1519{ 1520 int res; 1521 1522 res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, 1523 bond->alb_info.rlb_enabled); 1524 if (res) { 1525 return res; 1526 } 1527 1528 /* caller must hold the bond lock for write since the mac addresses 1529 * are compared and may be swapped. 1530 */ 1531 read_lock(&bond->lock); 1532 1533 res = alb_handle_addr_collision_on_attach(bond, slave); 1534 1535 read_unlock(&bond->lock); 1536 1537 if (res) { 1538 return res; 1539 } 1540 1541 tlb_init_slave(slave); 1542 1543 /* order a rebalance ASAP */ 1544 bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1545 1546 if (bond->alb_info.rlb_enabled) { 1547 bond->alb_info.rlb_rebalance = 1; 1548 } 1549 1550 return 0; 1551} 1552 1553/* 1554 * Remove slave from tlb and rlb hash tables, and fix up MAC addresses 1555 * if necessary. 1556 * 1557 * Caller must hold RTNL and no other locks 1558 */ 1559void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) 1560{ 1561 if (bond->slave_cnt > 1) { 1562 alb_change_hw_addr_on_detach(bond, slave); 1563 } 1564 1565 tlb_clear_slave(bond, slave, 0); 1566 1567 if (bond->alb_info.rlb_enabled) { 1568 bond->alb_info.next_rx_slave = NULL; 1569 rlb_clear_slave(bond, slave); 1570 } 1571} 1572 1573/* Caller must hold bond lock for read */ 1574void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) 1575{ 1576 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1577 1578 if (link == BOND_LINK_DOWN) { 1579 tlb_clear_slave(bond, slave, 0); 1580 if (bond->alb_info.rlb_enabled) { 1581 rlb_clear_slave(bond, slave); 1582 } 1583 } else if (link == BOND_LINK_UP) { 1584 /* order a rebalance ASAP */ 1585 bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1586 if (bond->alb_info.rlb_enabled) { 1587 bond->alb_info.rlb_rebalance = 1; 1588 /* If the updelay module parameter is smaller than the 1589 * forwarding delay of the switch the rebalance will 1590 * not work because the rebalance arp replies will 1591 * not be forwarded to the clients.. 1592 */ 1593 } 1594 } 1595} 1596 1597/** 1598 * bond_alb_handle_active_change - assign new curr_active_slave 1599 * @bond: our bonding struct 1600 * @new_slave: new slave to assign 1601 * 1602 * Set the bond->curr_active_slave to @new_slave and handle 1603 * mac address swapping and promiscuity changes as needed. 1604 * 1605 * If new_slave is NULL, caller must hold curr_slave_lock or 1606 * bond->lock for write. 1607 * 1608 * If new_slave is not NULL, caller must hold RTNL, bond->lock for 1609 * read and curr_slave_lock for write. Processing here may sleep, so 1610 * no other locks may be held. 1611 */ 1612void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) 1613 __releases(&bond->curr_slave_lock) 1614 __releases(&bond->lock) 1615 __acquires(&bond->lock) 1616 __acquires(&bond->curr_slave_lock) 1617{ 1618 struct slave *swap_slave; 1619 int i; 1620 1621 if (bond->curr_active_slave == new_slave) { 1622 return; 1623 } 1624 1625 if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) { 1626 dev_set_promiscuity(bond->curr_active_slave->dev, -1); 1627 bond->alb_info.primary_is_promisc = 0; 1628 bond->alb_info.rlb_promisc_timeout_counter = 0; 1629 } 1630 1631 swap_slave = bond->curr_active_slave; 1632 bond->curr_active_slave = new_slave; 1633 1634 if (!new_slave || (bond->slave_cnt == 0)) { 1635 return; 1636 } 1637 1638 /* set the new curr_active_slave to the bonds mac address 1639 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave 1640 */ 1641 if (!swap_slave) { 1642 struct slave *tmp_slave; 1643 /* find slave that is holding the bond's mac address */ 1644 bond_for_each_slave(bond, tmp_slave, i) { 1645 if (!compare_ether_addr_64bits(tmp_slave->dev->dev_addr, 1646 bond->dev->dev_addr)) { 1647 swap_slave = tmp_slave; 1648 break; 1649 } 1650 } 1651 } 1652 1653 /* 1654 * Arrange for swap_slave and new_slave to temporarily be 1655 * ignored so we can mess with their MAC addresses without 1656 * fear of interference from transmit activity. 1657 */ 1658 if (swap_slave) { 1659 tlb_clear_slave(bond, swap_slave, 1); 1660 } 1661 tlb_clear_slave(bond, new_slave, 1); 1662 1663 write_unlock_bh(&bond->curr_slave_lock); 1664 read_unlock(&bond->lock); 1665 1666 ASSERT_RTNL(); 1667 1668 /* curr_active_slave must be set before calling alb_swap_mac_addr */ 1669 if (swap_slave) { 1670 /* swap mac address */ 1671 alb_swap_mac_addr(bond, swap_slave, new_slave); 1672 } else { 1673 /* set the new_slave to the bond mac address */ 1674 alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, 1675 bond->alb_info.rlb_enabled); 1676 } 1677 1678 if (swap_slave) { 1679 alb_fasten_mac_swap(bond, swap_slave, new_slave); 1680 read_lock(&bond->lock); 1681 } else { 1682 read_lock(&bond->lock); 1683 alb_send_learning_packets(new_slave, bond->dev->dev_addr); 1684 } 1685 1686 write_lock_bh(&bond->curr_slave_lock); 1687} 1688 1689/* 1690 * Called with RTNL 1691 */ 1692int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) 1693 __acquires(&bond->lock) 1694 __releases(&bond->lock) 1695{ 1696 struct bonding *bond = netdev_priv(bond_dev); 1697 struct sockaddr *sa = addr; 1698 struct slave *slave, *swap_slave; 1699 int res; 1700 int i; 1701 1702 if (!is_valid_ether_addr(sa->sa_data)) { 1703 return -EADDRNOTAVAIL; 1704 } 1705 1706 res = alb_set_mac_address(bond, addr); 1707 if (res) { 1708 return res; 1709 } 1710 1711 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 1712 1713 /* If there is no curr_active_slave there is nothing else to do. 1714 * Otherwise we'll need to pass the new address to it and handle 1715 * duplications. 1716 */ 1717 if (!bond->curr_active_slave) { 1718 return 0; 1719 } 1720 1721 swap_slave = NULL; 1722 1723 bond_for_each_slave(bond, slave, i) { 1724 if (!compare_ether_addr_64bits(slave->dev->dev_addr, 1725 bond_dev->dev_addr)) { 1726 swap_slave = slave; 1727 break; 1728 } 1729 } 1730 1731 if (swap_slave) { 1732 alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave); 1733 alb_fasten_mac_swap(bond, swap_slave, bond->curr_active_slave); 1734 } else { 1735 alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr, 1736 bond->alb_info.rlb_enabled); 1737 1738 read_lock(&bond->lock); 1739 alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); 1740 if (bond->alb_info.rlb_enabled) { 1741 /* inform clients mac address has changed */ 1742 rlb_req_update_slave_clients(bond, bond->curr_active_slave); 1743 } 1744 read_unlock(&bond->lock); 1745 } 1746 1747 return 0; 1748} 1749 1750void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 1751{ 1752 if (bond->alb_info.current_alb_vlan && 1753 (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) { 1754 bond->alb_info.current_alb_vlan = NULL; 1755 } 1756 1757 if (bond->alb_info.rlb_enabled) { 1758 rlb_clear_vlan(bond, vlan_id); 1759 } 1760} 1761