• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/drivers/net/bonding/
1/*
2 * originally based on the dummy device.
3 *
4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov.
5 * Licensed under the GPL. Based on dummy.c, and eql.c devices.
6 *
7 * bonding.c: an Ethernet Bonding driver
8 *
9 * This is useful to talk to a Cisco EtherChannel compatible equipment:
10 *	Cisco 5500
11 *	Sun Trunking (Solaris)
12 *	Alteon AceDirector Trunks
13 *	Linux Bonding
14 *	and probably many L2 switches ...
15 *
16 * How it works:
17 *    ifconfig bond0 ipaddress netmask up
18 *      will setup a network device, with an ip address.  No mac address
19 *	will be assigned at this time.  The hw mac address will come from
20 *	the first slave bonded to the channel.  All slaves will then use
21 *	this hw mac address.
22 *
23 *    ifconfig bond0 down
24 *         will release all slaves, marking them as down.
25 *
26 *    ifenslave bond0 eth0
27 *	will attach eth0 to bond0 as a slave.  eth0 hw mac address will either
28 *	a: be used as initial mac address
29 *	b: if a hw mac address already is there, eth0's hw mac address
30 *	   will then be set from bond0.
31 *
32 */
33
34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35
36#include <linux/kernel.h>
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/fcntl.h>
40#include <linux/interrupt.h>
41#include <linux/ptrace.h>
42#include <linux/ioport.h>
43#include <linux/in.h>
44#include <net/ip.h>
45#include <linux/ip.h>
46#include <linux/tcp.h>
47#include <linux/udp.h>
48#include <linux/slab.h>
49#include <linux/string.h>
50#include <linux/init.h>
51#include <linux/timer.h>
52#include <linux/socket.h>
53#include <linux/ctype.h>
54#include <linux/inet.h>
55#include <linux/bitops.h>
56#include <linux/io.h>
57#include <asm/system.h>
58#include <asm/dma.h>
59#include <linux/uaccess.h>
60#include <linux/errno.h>
61#include <linux/netdevice.h>
62#include <linux/netpoll.h>
63#include <linux/inetdevice.h>
64#include <linux/igmp.h>
65#include <linux/etherdevice.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/rtnetlink.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
71#include <linux/smp.h>
72#include <linux/if_ether.h>
73#include <net/arp.h>
74#include <linux/mii.h>
75#include <linux/ethtool.h>
76#include <linux/if_vlan.h>
77#include <linux/if_bonding.h>
78#include <linux/jiffies.h>
79#include <net/route.h>
80#include <net/net_namespace.h>
81#include <net/netns/generic.h>
82#include "bonding.h"
83#include "bond_3ad.h"
84#include "bond_alb.h"
85
86/*---------------------------- Module parameters ----------------------------*/
87
88/* monitor all links that often (in milliseconds). <=0 disables monitoring */
89#define BOND_LINK_MON_INTERV	0
90#define BOND_LINK_ARP_INTERV	0
91
92static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
93static int tx_queues	= BOND_DEFAULT_TX_QUEUES;
94static int num_grat_arp = 1;
95static int num_unsol_na = 1;
96static int miimon	= BOND_LINK_MON_INTERV;
97static int updelay;
98static int downdelay;
99static int use_carrier	= 1;
100static char *mode;
101static char *primary;
102static char *primary_reselect;
103static char *lacp_rate;
104static char *ad_select;
105static char *xmit_hash_policy;
106static int arp_interval = BOND_LINK_ARP_INTERV;
107static char *arp_ip_target[BOND_MAX_ARP_TARGETS];
108static char *arp_validate;
109static char *fail_over_mac;
110static int all_slaves_active = 0;
111static struct bond_params bonding_defaults;
112
113module_param(max_bonds, int, 0);
114MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
115module_param(tx_queues, int, 0);
116MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
117module_param(num_grat_arp, int, 0644);
118MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
119module_param(num_unsol_na, int, 0644);
120MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event");
121module_param(miimon, int, 0);
122MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
123module_param(updelay, int, 0);
124MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
125module_param(downdelay, int, 0);
126MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
127			    "in milliseconds");
128module_param(use_carrier, int, 0);
129MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
130			      "0 for off, 1 for on (default)");
131module_param(mode, charp, 0);
132MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
133		       "1 for active-backup, 2 for balance-xor, "
134		       "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
135		       "6 for balance-alb");
136module_param(primary, charp, 0);
137MODULE_PARM_DESC(primary, "Primary network device to use");
138module_param(primary_reselect, charp, 0);
139MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
140				   "once it comes up; "
141				   "0 for always (default), "
142				   "1 for only if speed of primary is "
143				   "better, "
144				   "2 for only on active slave "
145				   "failure");
146module_param(lacp_rate, charp, 0);
147MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
148			    "(slow/fast)");
149module_param(ad_select, charp, 0);
150MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic: stable (0, default), bandwidth (1), count (2)");
151module_param(xmit_hash_policy, charp, 0);
152MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)"
153				   ", 1 for layer 3+4");
154module_param(arp_interval, int, 0);
155MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
156module_param_array(arp_ip_target, charp, NULL, 0);
157MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
158module_param(arp_validate, charp, 0);
159MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
160module_param(fail_over_mac, charp, 0);
161MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  none (default), active or follow");
162module_param(all_slaves_active, int, 0);
163MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
164				     "by setting active flag for all slaves.  "
165				     "0 for never (default), 1 for always.");
166
167/*----------------------------- Global variables ----------------------------*/
168
169static const char * const version =
170	DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
171
172int bond_net_id __read_mostly;
173
174static __be32 arp_target[BOND_MAX_ARP_TARGETS];
175static int arp_ip_count;
176static int bond_mode	= BOND_MODE_ROUNDROBIN;
177static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
178static int lacp_fast;
179#ifdef CONFIG_NET_POLL_CONTROLLER
180static int disable_netpoll = 1;
181#endif
182
183const struct bond_parm_tbl bond_lacp_tbl[] = {
184{	"slow",		AD_LACP_SLOW},
185{	"fast",		AD_LACP_FAST},
186{	NULL,		-1},
187};
188
189const struct bond_parm_tbl bond_mode_tbl[] = {
190{	"balance-rr",		BOND_MODE_ROUNDROBIN},
191{	"active-backup",	BOND_MODE_ACTIVEBACKUP},
192{	"balance-xor",		BOND_MODE_XOR},
193{	"broadcast",		BOND_MODE_BROADCAST},
194{	"802.3ad",		BOND_MODE_8023AD},
195{	"balance-tlb",		BOND_MODE_TLB},
196{	"balance-alb",		BOND_MODE_ALB},
197{	NULL,			-1},
198};
199
200const struct bond_parm_tbl xmit_hashtype_tbl[] = {
201{	"layer2",		BOND_XMIT_POLICY_LAYER2},
202{	"layer3+4",		BOND_XMIT_POLICY_LAYER34},
203{	"layer2+3",		BOND_XMIT_POLICY_LAYER23},
204{	NULL,			-1},
205};
206
207const struct bond_parm_tbl arp_validate_tbl[] = {
208{	"none",			BOND_ARP_VALIDATE_NONE},
209{	"active",		BOND_ARP_VALIDATE_ACTIVE},
210{	"backup",		BOND_ARP_VALIDATE_BACKUP},
211{	"all",			BOND_ARP_VALIDATE_ALL},
212{	NULL,			-1},
213};
214
215const struct bond_parm_tbl fail_over_mac_tbl[] = {
216{	"none",			BOND_FOM_NONE},
217{	"active",		BOND_FOM_ACTIVE},
218{	"follow",		BOND_FOM_FOLLOW},
219{	NULL,			-1},
220};
221
222const struct bond_parm_tbl pri_reselect_tbl[] = {
223{	"always",		BOND_PRI_RESELECT_ALWAYS},
224{	"better",		BOND_PRI_RESELECT_BETTER},
225{	"failure",		BOND_PRI_RESELECT_FAILURE},
226{	NULL,			-1},
227};
228
229struct bond_parm_tbl ad_select_tbl[] = {
230{	"stable",	BOND_AD_STABLE},
231{	"bandwidth",	BOND_AD_BANDWIDTH},
232{	"count",	BOND_AD_COUNT},
233{	NULL,		-1},
234};
235
236/*-------------------------- Forward declarations ---------------------------*/
237
238static void bond_send_gratuitous_arp(struct bonding *bond);
239static int bond_init(struct net_device *bond_dev);
240static void bond_uninit(struct net_device *bond_dev);
241
242/*---------------------------- General routines -----------------------------*/
243
244static const char *bond_mode_name(int mode)
245{
246	static const char *names[] = {
247		[BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)",
248		[BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)",
249		[BOND_MODE_XOR] = "load balancing (xor)",
250		[BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)",
251		[BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation",
252		[BOND_MODE_TLB] = "transmit load balancing",
253		[BOND_MODE_ALB] = "adaptive load balancing",
254	};
255
256	if (mode < 0 || mode > BOND_MODE_ALB)
257		return "unknown";
258
259	return names[mode];
260}
261
262/*---------------------------------- VLAN -----------------------------------*/
263
264/**
265 * bond_add_vlan - add a new vlan id on bond
266 * @bond: bond that got the notification
267 * @vlan_id: the vlan id to add
268 *
269 * Returns -ENOMEM if allocation failed.
270 */
271static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
272{
273	struct vlan_entry *vlan;
274
275	pr_debug("bond: %s, vlan id %d\n",
276		 (bond ? bond->dev->name : "None"), vlan_id);
277
278	vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
279	if (!vlan)
280		return -ENOMEM;
281
282	INIT_LIST_HEAD(&vlan->vlan_list);
283	vlan->vlan_id = vlan_id;
284
285	write_lock_bh(&bond->lock);
286
287	list_add_tail(&vlan->vlan_list, &bond->vlan_list);
288
289	write_unlock_bh(&bond->lock);
290
291	pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
292
293	return 0;
294}
295
296/**
297 * bond_del_vlan - delete a vlan id from bond
298 * @bond: bond that got the notification
299 * @vlan_id: the vlan id to delete
300 *
301 * returns -ENODEV if @vlan_id was not found in @bond.
302 */
303static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
304{
305	struct vlan_entry *vlan;
306	int res = -ENODEV;
307
308	pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
309
310	write_lock_bh(&bond->lock);
311
312	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
313		if (vlan->vlan_id == vlan_id) {
314			list_del(&vlan->vlan_list);
315
316			if (bond_is_lb(bond))
317				bond_alb_clear_vlan(bond, vlan_id);
318
319			pr_debug("removed VLAN ID %d from bond %s\n",
320				 vlan_id, bond->dev->name);
321
322			kfree(vlan);
323
324			if (list_empty(&bond->vlan_list) &&
325			    (bond->slave_cnt == 0)) {
326				/* Last VLAN removed and no slaves, so
327				 * restore block on adding VLANs. This will
328				 * be removed once new slaves that are not
329				 * VLAN challenged will be added.
330				 */
331				bond->dev->features |= NETIF_F_VLAN_CHALLENGED;
332			}
333
334			res = 0;
335			goto out;
336		}
337	}
338
339	pr_debug("couldn't find VLAN ID %d in bond %s\n",
340		 vlan_id, bond->dev->name);
341
342out:
343	write_unlock_bh(&bond->lock);
344	return res;
345}
346
347/**
348 * bond_has_challenged_slaves
349 * @bond: the bond we're working on
350 *
351 * Searches the slave list. Returns 1 if a vlan challenged slave
352 * was found, 0 otherwise.
353 *
354 * Assumes bond->lock is held.
355 */
356static int bond_has_challenged_slaves(struct bonding *bond)
357{
358	struct slave *slave;
359	int i;
360
361	bond_for_each_slave(bond, slave, i) {
362		if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) {
363			pr_debug("found VLAN challenged slave - %s\n",
364				 slave->dev->name);
365			return 1;
366		}
367	}
368
369	pr_debug("no VLAN challenged slaves found\n");
370	return 0;
371}
372
373/**
374 * bond_next_vlan - safely skip to the next item in the vlans list.
375 * @bond: the bond we're working on
376 * @curr: item we're advancing from
377 *
378 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
379 * or @curr->next otherwise (even if it is @curr itself again).
380 *
381 * Caller must hold bond->lock
382 */
383struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
384{
385	struct vlan_entry *next, *last;
386
387	if (list_empty(&bond->vlan_list))
388		return NULL;
389
390	if (!curr) {
391		next = list_entry(bond->vlan_list.next,
392				  struct vlan_entry, vlan_list);
393	} else {
394		last = list_entry(bond->vlan_list.prev,
395				  struct vlan_entry, vlan_list);
396		if (last == curr) {
397			next = list_entry(bond->vlan_list.next,
398					  struct vlan_entry, vlan_list);
399		} else {
400			next = list_entry(curr->vlan_list.next,
401					  struct vlan_entry, vlan_list);
402		}
403	}
404
405	return next;
406}
407
408/**
409 * bond_dev_queue_xmit - Prepare skb for xmit.
410 *
411 * @bond: bond device that got this skb for tx.
412 * @skb: hw accel VLAN tagged skb to transmit
413 * @slave_dev: slave that is supposed to xmit this skbuff
414 *
415 * When the bond gets an skb to transmit that is
416 * already hardware accelerated VLAN tagged, and it
417 * needs to relay this skb to a slave that is not
418 * hw accel capable, the skb needs to be "unaccelerated",
419 * i.e. strip the hwaccel tag and re-insert it as part
420 * of the payload.
421 */
422int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
423			struct net_device *slave_dev)
424{
425	unsigned short uninitialized_var(vlan_id);
426
427	/* Test vlan_list not vlgrp to catch and handle 802.1p tags */
428	if (!list_empty(&bond->vlan_list) &&
429	    !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
430	    vlan_get_tag(skb, &vlan_id) == 0) {
431		skb->dev = slave_dev;
432		skb = vlan_put_tag(skb, vlan_id);
433		if (!skb) {
434			/* vlan_put_tag() frees the skb in case of error,
435			 * so return success here so the calling functions
436			 * won't attempt to free is again.
437			 */
438			return 0;
439		}
440	} else {
441		skb->dev = slave_dev;
442	}
443
444	skb->priority = 1;
445#ifdef CONFIG_NET_POLL_CONTROLLER
446	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
447		struct netpoll *np = bond->dev->npinfo->netpoll;
448		slave_dev->npinfo = bond->dev->npinfo;
449		np->real_dev = np->dev = skb->dev;
450		slave_dev->priv_flags |= IFF_IN_NETPOLL;
451		netpoll_send_skb(np, skb);
452		slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
453		np->dev = bond->dev;
454	} else
455#endif
456		dev_queue_xmit(skb);
457
458	return 0;
459}
460
461/*
462 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid
463 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
464 * lock because:
465 * a. This operation is performed in IOCTL context,
466 * b. The operation is protected by the RTNL semaphore in the 8021q code,
467 * c. Holding a lock with BH disabled while directly calling a base driver
468 *    entry point is generally a BAD idea.
469 *
470 * The design of synchronization/protection for this operation in the 8021q
471 * module is good for one or more VLAN devices over a single physical device
472 * and cannot be extended for a teaming solution like bonding, so there is a
473 * potential race condition here where a net device from the vlan group might
474 * be referenced (either by a base driver or the 8021q code) while it is being
475 * removed from the system. However, it turns out we're not making matters
476 * worse, and if it works for regular VLAN usage it will work here too.
477*/
478
479/**
480 * bond_vlan_rx_register - Propagates registration to slaves
481 * @bond_dev: bonding net device that got called
482 * @grp: vlan group being registered
483 */
484static void bond_vlan_rx_register(struct net_device *bond_dev,
485				  struct vlan_group *grp)
486{
487	struct bonding *bond = netdev_priv(bond_dev);
488	struct slave *slave;
489	int i;
490
491	write_lock(&bond->lock);
492	bond->vlgrp = grp;
493	write_unlock(&bond->lock);
494
495	bond_for_each_slave(bond, slave, i) {
496		struct net_device *slave_dev = slave->dev;
497		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
498
499		if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
500		    slave_ops->ndo_vlan_rx_register) {
501			slave_ops->ndo_vlan_rx_register(slave_dev, grp);
502		}
503	}
504}
505
506/**
507 * bond_vlan_rx_add_vid - Propagates adding an id to slaves
508 * @bond_dev: bonding net device that got called
509 * @vid: vlan id being added
510 */
511static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
512{
513	struct bonding *bond = netdev_priv(bond_dev);
514	struct slave *slave;
515	int i, res;
516
517	bond_for_each_slave(bond, slave, i) {
518		struct net_device *slave_dev = slave->dev;
519		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
520
521		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
522		    slave_ops->ndo_vlan_rx_add_vid) {
523			slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid);
524		}
525	}
526
527	res = bond_add_vlan(bond, vid);
528	if (res) {
529		pr_err("%s: Error: Failed to add vlan id %d\n",
530		       bond_dev->name, vid);
531	}
532}
533
534/**
535 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
536 * @bond_dev: bonding net device that got called
537 * @vid: vlan id being removed
538 */
539static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
540{
541	struct bonding *bond = netdev_priv(bond_dev);
542	struct slave *slave;
543	struct net_device *vlan_dev;
544	int i, res;
545
546	bond_for_each_slave(bond, slave, i) {
547		struct net_device *slave_dev = slave->dev;
548		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
549
550		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
551		    slave_ops->ndo_vlan_rx_kill_vid) {
552			/* Save and then restore vlan_dev in the grp array,
553			 * since the slave's driver might clear it.
554			 */
555			vlan_dev = vlan_group_get_device(bond->vlgrp, vid);
556			slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid);
557			vlan_group_set_device(bond->vlgrp, vid, vlan_dev);
558		}
559	}
560
561	res = bond_del_vlan(bond, vid);
562	if (res) {
563		pr_err("%s: Error: Failed to remove vlan id %d\n",
564		       bond_dev->name, vid);
565	}
566}
567
568static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
569{
570	struct vlan_entry *vlan;
571	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
572
573	if (!bond->vlgrp)
574		return;
575
576	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
577	    slave_ops->ndo_vlan_rx_register)
578		slave_ops->ndo_vlan_rx_register(slave_dev, bond->vlgrp);
579
580	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
581	    !(slave_ops->ndo_vlan_rx_add_vid))
582		return;
583
584	list_for_each_entry(vlan, &bond->vlan_list, vlan_list)
585		slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id);
586}
587
588static void bond_del_vlans_from_slave(struct bonding *bond,
589				      struct net_device *slave_dev)
590{
591	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
592	struct vlan_entry *vlan;
593	struct net_device *vlan_dev;
594
595	if (!bond->vlgrp)
596		return;
597
598	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
599	    !(slave_ops->ndo_vlan_rx_kill_vid))
600		goto unreg;
601
602	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
603		if (!vlan->vlan_id)
604			continue;
605		/* Save and then restore vlan_dev in the grp array,
606		 * since the slave's driver might clear it.
607		 */
608		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
609		slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
610		vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev);
611	}
612
613unreg:
614	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
615	    slave_ops->ndo_vlan_rx_register)
616		slave_ops->ndo_vlan_rx_register(slave_dev, NULL);
617}
618
619/*------------------------------- Link status -------------------------------*/
620
621/*
622 * Set the carrier state for the master according to the state of its
623 * slaves.  If any slaves are up, the master is up.  In 802.3ad mode,
624 * do special 802.3ad magic.
625 *
626 * Returns zero if carrier state does not change, nonzero if it does.
627 */
628static int bond_set_carrier(struct bonding *bond)
629{
630	struct slave *slave;
631	int i;
632
633	if (bond->slave_cnt == 0)
634		goto down;
635
636	if (bond->params.mode == BOND_MODE_8023AD)
637		return bond_3ad_set_carrier(bond);
638
639	bond_for_each_slave(bond, slave, i) {
640		if (slave->link == BOND_LINK_UP) {
641			if (!netif_carrier_ok(bond->dev)) {
642				netif_carrier_on(bond->dev);
643				return 1;
644			}
645			return 0;
646		}
647	}
648
649down:
650	if (netif_carrier_ok(bond->dev)) {
651		netif_carrier_off(bond->dev);
652		return 1;
653	}
654	return 0;
655}
656
657/*
658 * Get link speed and duplex from the slave's base driver
659 * using ethtool. If for some reason the call fails or the
660 * values are invalid, fake speed and duplex to 100/Full
661 * and return error.
662 */
663static int bond_update_speed_duplex(struct slave *slave)
664{
665	struct net_device *slave_dev = slave->dev;
666	struct ethtool_cmd etool;
667	int res;
668
669	/* Fake speed and duplex */
670	slave->speed = SPEED_100;
671	slave->duplex = DUPLEX_FULL;
672
673	if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings)
674		return -1;
675
676	res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
677	if (res < 0)
678		return -1;
679
680	switch (etool.speed) {
681	case SPEED_10:
682	case SPEED_100:
683	case SPEED_1000:
684	case SPEED_10000:
685		break;
686	default:
687		return -1;
688	}
689
690	switch (etool.duplex) {
691	case DUPLEX_FULL:
692	case DUPLEX_HALF:
693		break;
694	default:
695		return -1;
696	}
697
698	slave->speed = etool.speed;
699	slave->duplex = etool.duplex;
700
701	return 0;
702}
703
704/*
705 * if <dev> supports MII link status reporting, check its link status.
706 *
707 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
708 * depending upon the setting of the use_carrier parameter.
709 *
710 * Return either BMSR_LSTATUS, meaning that the link is up (or we
711 * can't tell and just pretend it is), or 0, meaning that the link is
712 * down.
713 *
714 * If reporting is non-zero, instead of faking link up, return -1 if
715 * both ETHTOOL and MII ioctls fail (meaning the device does not
716 * support them).  If use_carrier is set, return whatever it says.
717 * It'd be nice if there was a good way to tell if a driver supports
718 * netif_carrier, but there really isn't.
719 */
720static int bond_check_dev_link(struct bonding *bond,
721			       struct net_device *slave_dev, int reporting)
722{
723	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
724	int (*ioctl)(struct net_device *, struct ifreq *, int);
725	struct ifreq ifr;
726	struct mii_ioctl_data *mii;
727
728	if (!reporting && !netif_running(slave_dev))
729		return 0;
730
731	if (bond->params.use_carrier)
732		return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
733
734	/* Try to get link status using Ethtool first. */
735	if (slave_dev->ethtool_ops) {
736		if (slave_dev->ethtool_ops->get_link) {
737			u32 link;
738
739			link = slave_dev->ethtool_ops->get_link(slave_dev);
740
741			return link ? BMSR_LSTATUS : 0;
742		}
743	}
744
745	/* Ethtool can't be used, fallback to MII ioctls. */
746	ioctl = slave_ops->ndo_do_ioctl;
747	if (ioctl) {
748		/* TODO: set pointer to correct ioctl on a per team member */
749		/*       bases to make this more efficient. that is, once  */
750		/*       we determine the correct ioctl, we will always    */
751		/*       call it and not the others for that team          */
752		/*       member.                                           */
753
754		/*
755		 * We cannot assume that SIOCGMIIPHY will also read a
756		 * register; not all network drivers (e.g., e100)
757		 * support that.
758		 */
759
760		/* Yes, the mii is overlaid on the ifreq.ifr_ifru */
761		strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
762		mii = if_mii(&ifr);
763		if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
764			mii->reg_num = MII_BMSR;
765			if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0)
766				return mii->val_out & BMSR_LSTATUS;
767		}
768	}
769
770	/*
771	 * If reporting, report that either there's no dev->do_ioctl,
772	 * or both SIOCGMIIREG and get_link failed (meaning that we
773	 * cannot report link status).  If not reporting, pretend
774	 * we're ok.
775	 */
776	return reporting ? -1 : BMSR_LSTATUS;
777}
778
779/*----------------------------- Multicast list ------------------------------*/
780
781/*
782 * Push the promiscuity flag down to appropriate slaves
783 */
784static int bond_set_promiscuity(struct bonding *bond, int inc)
785{
786	int err = 0;
787	if (USES_PRIMARY(bond->params.mode)) {
788		/* write lock already acquired */
789		if (bond->curr_active_slave) {
790			err = dev_set_promiscuity(bond->curr_active_slave->dev,
791						  inc);
792		}
793	} else {
794		struct slave *slave;
795		int i;
796		bond_for_each_slave(bond, slave, i) {
797			err = dev_set_promiscuity(slave->dev, inc);
798			if (err)
799				return err;
800		}
801	}
802	return err;
803}
804
805/*
806 * Push the allmulti flag down to all slaves
807 */
808static int bond_set_allmulti(struct bonding *bond, int inc)
809{
810	int err = 0;
811	if (USES_PRIMARY(bond->params.mode)) {
812		/* write lock already acquired */
813		if (bond->curr_active_slave) {
814			err = dev_set_allmulti(bond->curr_active_slave->dev,
815					       inc);
816		}
817	} else {
818		struct slave *slave;
819		int i;
820		bond_for_each_slave(bond, slave, i) {
821			err = dev_set_allmulti(slave->dev, inc);
822			if (err)
823				return err;
824		}
825	}
826	return err;
827}
828
829/*
830 * Add a Multicast address to slaves
831 * according to mode
832 */
833static void bond_mc_add(struct bonding *bond, void *addr)
834{
835	if (USES_PRIMARY(bond->params.mode)) {
836		/* write lock already acquired */
837		if (bond->curr_active_slave)
838			dev_mc_add(bond->curr_active_slave->dev, addr);
839	} else {
840		struct slave *slave;
841		int i;
842
843		bond_for_each_slave(bond, slave, i)
844			dev_mc_add(slave->dev, addr);
845	}
846}
847
848/*
849 * Remove a multicast address from slave
850 * according to mode
851 */
852static void bond_mc_del(struct bonding *bond, void *addr)
853{
854	if (USES_PRIMARY(bond->params.mode)) {
855		/* write lock already acquired */
856		if (bond->curr_active_slave)
857			dev_mc_del(bond->curr_active_slave->dev, addr);
858	} else {
859		struct slave *slave;
860		int i;
861		bond_for_each_slave(bond, slave, i) {
862			dev_mc_del(slave->dev, addr);
863		}
864	}
865}
866
867
868/*
869 * Retrieve the list of registered multicast addresses for the bonding
870 * device and retransmit an IGMP JOIN request to the current active
871 * slave.
872 */
873static void bond_resend_igmp_join_requests(struct bonding *bond)
874{
875	struct in_device *in_dev;
876	struct ip_mc_list *im;
877
878	rcu_read_lock();
879	in_dev = __in_dev_get_rcu(bond->dev);
880	if (in_dev) {
881		for (im = in_dev->mc_list; im; im = im->next)
882			ip_mc_rejoin_group(im);
883	}
884
885	rcu_read_unlock();
886}
887
888/*
889 * flush all members of flush->mc_list from device dev->mc_list
890 */
891static void bond_mc_list_flush(struct net_device *bond_dev,
892			       struct net_device *slave_dev)
893{
894	struct bonding *bond = netdev_priv(bond_dev);
895	struct netdev_hw_addr *ha;
896
897	netdev_for_each_mc_addr(ha, bond_dev)
898		dev_mc_del(slave_dev, ha->addr);
899
900	if (bond->params.mode == BOND_MODE_8023AD) {
901		/* del lacpdu mc addr from mc list */
902		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
903
904		dev_mc_del(slave_dev, lacpdu_multicast);
905	}
906}
907
908/*--------------------------- Active slave change ---------------------------*/
909
910/*
911 * Update the mc list and multicast-related flags for the new and
912 * old active slaves (if any) according to the multicast mode, and
913 * promiscuous flags unconditionally.
914 */
915static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
916			 struct slave *old_active)
917{
918	struct netdev_hw_addr *ha;
919
920	if (!USES_PRIMARY(bond->params.mode))
921		/* nothing to do -  mc list is already up-to-date on
922		 * all slaves
923		 */
924		return;
925
926	if (old_active) {
927		if (bond->dev->flags & IFF_PROMISC)
928			dev_set_promiscuity(old_active->dev, -1);
929
930		if (bond->dev->flags & IFF_ALLMULTI)
931			dev_set_allmulti(old_active->dev, -1);
932
933		netdev_for_each_mc_addr(ha, bond->dev)
934			dev_mc_del(old_active->dev, ha->addr);
935	}
936
937	if (new_active) {
938		if (bond->dev->flags & IFF_PROMISC)
939			dev_set_promiscuity(new_active->dev, 1);
940
941		if (bond->dev->flags & IFF_ALLMULTI)
942			dev_set_allmulti(new_active->dev, 1);
943
944		netdev_for_each_mc_addr(ha, bond->dev)
945			dev_mc_add(new_active->dev, ha->addr);
946		bond_resend_igmp_join_requests(bond);
947	}
948}
949
950/*
951 * bond_do_fail_over_mac
952 *
953 * Perform special MAC address swapping for fail_over_mac settings
954 *
955 * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh.
956 */
957static void bond_do_fail_over_mac(struct bonding *bond,
958				  struct slave *new_active,
959				  struct slave *old_active)
960	__releases(&bond->curr_slave_lock)
961	__releases(&bond->lock)
962	__acquires(&bond->lock)
963	__acquires(&bond->curr_slave_lock)
964{
965	u8 tmp_mac[ETH_ALEN];
966	struct sockaddr saddr;
967	int rv;
968
969	switch (bond->params.fail_over_mac) {
970	case BOND_FOM_ACTIVE:
971		if (new_active)
972			memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr,
973			       new_active->dev->addr_len);
974		break;
975	case BOND_FOM_FOLLOW:
976		/*
977		 * if new_active && old_active, swap them
978		 * if just old_active, do nothing (going to no active slave)
979		 * if just new_active, set new_active to bond's MAC
980		 */
981		if (!new_active)
982			return;
983
984		write_unlock_bh(&bond->curr_slave_lock);
985		read_unlock(&bond->lock);
986
987		if (old_active) {
988			memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN);
989			memcpy(saddr.sa_data, old_active->dev->dev_addr,
990			       ETH_ALEN);
991			saddr.sa_family = new_active->dev->type;
992		} else {
993			memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN);
994			saddr.sa_family = bond->dev->type;
995		}
996
997		rv = dev_set_mac_address(new_active->dev, &saddr);
998		if (rv) {
999			pr_err("%s: Error %d setting MAC of slave %s\n",
1000			       bond->dev->name, -rv, new_active->dev->name);
1001			goto out;
1002		}
1003
1004		if (!old_active)
1005			goto out;
1006
1007		memcpy(saddr.sa_data, tmp_mac, ETH_ALEN);
1008		saddr.sa_family = old_active->dev->type;
1009
1010		rv = dev_set_mac_address(old_active->dev, &saddr);
1011		if (rv)
1012			pr_err("%s: Error %d setting MAC of slave %s\n",
1013			       bond->dev->name, -rv, new_active->dev->name);
1014out:
1015		read_lock(&bond->lock);
1016		write_lock_bh(&bond->curr_slave_lock);
1017		break;
1018	default:
1019		pr_err("%s: bond_do_fail_over_mac impossible: bad policy %d\n",
1020		       bond->dev->name, bond->params.fail_over_mac);
1021		break;
1022	}
1023
1024}
1025
1026static bool bond_should_change_active(struct bonding *bond)
1027{
1028	struct slave *prim = bond->primary_slave;
1029	struct slave *curr = bond->curr_active_slave;
1030
1031	if (!prim || !curr || curr->link != BOND_LINK_UP)
1032		return true;
1033	if (bond->force_primary) {
1034		bond->force_primary = false;
1035		return true;
1036	}
1037	if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
1038	    (prim->speed < curr->speed ||
1039	     (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
1040		return false;
1041	if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
1042		return false;
1043	return true;
1044}
1045
1046/**
1047 * find_best_interface - select the best available slave to be the active one
1048 * @bond: our bonding struct
1049 *
1050 * Warning: Caller must hold curr_slave_lock for writing.
1051 */
1052static struct slave *bond_find_best_slave(struct bonding *bond)
1053{
1054	struct slave *new_active, *old_active;
1055	struct slave *bestslave = NULL;
1056	int mintime = bond->params.updelay;
1057	int i;
1058
1059	new_active = bond->curr_active_slave;
1060
1061	if (!new_active) { /* there were no active slaves left */
1062		if (bond->slave_cnt > 0)   /* found one slave */
1063			new_active = bond->first_slave;
1064		else
1065			return NULL; /* still no slave, return NULL */
1066	}
1067
1068	if ((bond->primary_slave) &&
1069	    bond->primary_slave->link == BOND_LINK_UP &&
1070	    bond_should_change_active(bond)) {
1071		new_active = bond->primary_slave;
1072	}
1073
1074	/* remember where to stop iterating over the slaves */
1075	old_active = new_active;
1076
1077	bond_for_each_slave_from(bond, new_active, i, old_active) {
1078		if (new_active->link == BOND_LINK_UP) {
1079			return new_active;
1080		} else if (new_active->link == BOND_LINK_BACK &&
1081			   IS_UP(new_active->dev)) {
1082			/* link up, but waiting for stabilization */
1083			if (new_active->delay < mintime) {
1084				mintime = new_active->delay;
1085				bestslave = new_active;
1086			}
1087		}
1088	}
1089
1090	return bestslave;
1091}
1092
1093/**
1094 * change_active_interface - change the active slave into the specified one
1095 * @bond: our bonding struct
1096 * @new: the new slave to make the active one
1097 *
1098 * Set the new slave to the bond's settings and unset them on the old
1099 * curr_active_slave.
1100 * Setting include flags, mc-list, promiscuity, allmulti, etc.
1101 *
1102 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP,
1103 * because it is apparently the best available slave we have, even though its
1104 * updelay hasn't timed out yet.
1105 *
1106 * If new_active is not NULL, caller must hold bond->lock for read and
1107 * curr_slave_lock for write_bh.
1108 */
1109void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1110{
1111	struct slave *old_active = bond->curr_active_slave;
1112
1113	if (old_active == new_active)
1114		return;
1115
1116	if (new_active) {
1117		new_active->jiffies = jiffies;
1118
1119		if (new_active->link == BOND_LINK_BACK) {
1120			if (USES_PRIMARY(bond->params.mode)) {
1121				pr_info("%s: making interface %s the new active one %d ms earlier.\n",
1122					bond->dev->name, new_active->dev->name,
1123					(bond->params.updelay - new_active->delay) * bond->params.miimon);
1124			}
1125
1126			new_active->delay = 0;
1127			new_active->link = BOND_LINK_UP;
1128
1129			if (bond->params.mode == BOND_MODE_8023AD)
1130				bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
1131
1132			if (bond_is_lb(bond))
1133				bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
1134		} else {
1135			if (USES_PRIMARY(bond->params.mode)) {
1136				pr_info("%s: making interface %s the new active one.\n",
1137					bond->dev->name, new_active->dev->name);
1138			}
1139		}
1140	}
1141
1142	if (USES_PRIMARY(bond->params.mode))
1143		bond_mc_swap(bond, new_active, old_active);
1144
1145	if (bond_is_lb(bond)) {
1146		bond_alb_handle_active_change(bond, new_active);
1147		if (old_active)
1148			bond_set_slave_inactive_flags(old_active);
1149		if (new_active)
1150			bond_set_slave_active_flags(new_active);
1151	} else {
1152		bond->curr_active_slave = new_active;
1153	}
1154
1155	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
1156		if (old_active)
1157			bond_set_slave_inactive_flags(old_active);
1158
1159		if (new_active) {
1160			bond_set_slave_active_flags(new_active);
1161
1162			if (bond->params.fail_over_mac)
1163				bond_do_fail_over_mac(bond, new_active,
1164						      old_active);
1165
1166			bond->send_grat_arp = bond->params.num_grat_arp;
1167			bond_send_gratuitous_arp(bond);
1168
1169			bond->send_unsol_na = bond->params.num_unsol_na;
1170			bond_send_unsolicited_na(bond);
1171
1172			write_unlock_bh(&bond->curr_slave_lock);
1173			read_unlock(&bond->lock);
1174
1175			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
1176
1177			read_lock(&bond->lock);
1178			write_lock_bh(&bond->curr_slave_lock);
1179		}
1180	}
1181
1182	/* resend IGMP joins since all were sent on curr_active_slave */
1183	if (bond->params.mode == BOND_MODE_ROUNDROBIN) {
1184		bond_resend_igmp_join_requests(bond);
1185	}
1186}
1187
1188/**
1189 * bond_select_active_slave - select a new active slave, if needed
1190 * @bond: our bonding struct
1191 *
1192 * This functions should be called when one of the following occurs:
1193 * - The old curr_active_slave has been released or lost its link.
1194 * - The primary_slave has got its link back.
1195 * - A slave has got its link back and there's no old curr_active_slave.
1196 *
1197 * Caller must hold bond->lock for read and curr_slave_lock for write_bh.
1198 */
1199void bond_select_active_slave(struct bonding *bond)
1200{
1201	struct slave *best_slave;
1202	int rv;
1203
1204	best_slave = bond_find_best_slave(bond);
1205	if (best_slave != bond->curr_active_slave) {
1206		bond_change_active_slave(bond, best_slave);
1207		rv = bond_set_carrier(bond);
1208		if (!rv)
1209			return;
1210
1211		if (netif_carrier_ok(bond->dev)) {
1212			pr_info("%s: first active interface up!\n",
1213				bond->dev->name);
1214		} else {
1215			pr_info("%s: now running without any active interface !\n",
1216				bond->dev->name);
1217		}
1218	}
1219}
1220
1221/*--------------------------- slave list handling ---------------------------*/
1222
1223/*
1224 * This function attaches the slave to the end of list.
1225 *
1226 * bond->lock held for writing by caller.
1227 */
1228static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
1229{
1230	if (bond->first_slave == NULL) { /* attaching the first slave */
1231		new_slave->next = new_slave;
1232		new_slave->prev = new_slave;
1233		bond->first_slave = new_slave;
1234	} else {
1235		new_slave->next = bond->first_slave;
1236		new_slave->prev = bond->first_slave->prev;
1237		new_slave->next->prev = new_slave;
1238		new_slave->prev->next = new_slave;
1239	}
1240
1241	bond->slave_cnt++;
1242}
1243
1244/*
1245 * This function detaches the slave from the list.
1246 * WARNING: no check is made to verify if the slave effectively
1247 * belongs to <bond>.
1248 * Nothing is freed on return, structures are just unchained.
1249 * If any slave pointer in bond was pointing to <slave>,
1250 * it should be changed by the calling function.
1251 *
1252 * bond->lock held for writing by caller.
1253 */
1254static void bond_detach_slave(struct bonding *bond, struct slave *slave)
1255{
1256	if (slave->next)
1257		slave->next->prev = slave->prev;
1258
1259	if (slave->prev)
1260		slave->prev->next = slave->next;
1261
1262	if (bond->first_slave == slave) { /* slave is the first slave */
1263		if (bond->slave_cnt > 1) { /* there are more slave */
1264			bond->first_slave = slave->next;
1265		} else {
1266			bond->first_slave = NULL; /* slave was the last one */
1267		}
1268	}
1269
1270	slave->next = NULL;
1271	slave->prev = NULL;
1272	bond->slave_cnt--;
1273}
1274
1275#ifdef CONFIG_NET_POLL_CONTROLLER
1276/*
1277 * You must hold read lock on bond->lock before calling this.
1278 */
1279static bool slaves_support_netpoll(struct net_device *bond_dev)
1280{
1281	struct bonding *bond = netdev_priv(bond_dev);
1282	struct slave *slave;
1283	int i = 0;
1284	bool ret = true;
1285
1286	bond_for_each_slave(bond, slave, i) {
1287		if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
1288		    !slave->dev->netdev_ops->ndo_poll_controller)
1289			ret = false;
1290	}
1291	return i != 0 && ret;
1292}
1293
1294static void bond_poll_controller(struct net_device *bond_dev)
1295{
1296	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
1297	if (dev != bond_dev)
1298		netpoll_poll_dev(dev);
1299}
1300
1301static void bond_netpoll_cleanup(struct net_device *bond_dev)
1302{
1303	struct bonding *bond = netdev_priv(bond_dev);
1304	struct slave *slave;
1305	const struct net_device_ops *ops;
1306	int i;
1307
1308	read_lock(&bond->lock);
1309	bond_dev->npinfo = NULL;
1310	bond_for_each_slave(bond, slave, i) {
1311		if (slave->dev) {
1312			ops = slave->dev->netdev_ops;
1313			if (ops->ndo_netpoll_cleanup)
1314				ops->ndo_netpoll_cleanup(slave->dev);
1315			else
1316				slave->dev->npinfo = NULL;
1317		}
1318	}
1319	read_unlock(&bond->lock);
1320}
1321
1322#else
1323
1324static void bond_netpoll_cleanup(struct net_device *bond_dev)
1325{
1326}
1327
1328#endif
1329
1330/*---------------------------------- IOCTL ----------------------------------*/
1331
1332static int bond_sethwaddr(struct net_device *bond_dev,
1333			  struct net_device *slave_dev)
1334{
1335	pr_debug("bond_dev=%p\n", bond_dev);
1336	pr_debug("slave_dev=%p\n", slave_dev);
1337	pr_debug("slave_dev->addr_len=%d\n", slave_dev->addr_len);
1338	memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
1339	return 0;
1340}
1341
1342#define BOND_VLAN_FEATURES \
1343	(NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \
1344	 NETIF_F_HW_VLAN_FILTER)
1345
1346/*
1347 * Compute the common dev->feature set available to all slaves.  Some
1348 * feature bits are managed elsewhere, so preserve those feature bits
1349 * on the master device.
1350 */
1351static int bond_compute_features(struct bonding *bond)
1352{
1353	struct slave *slave;
1354	struct net_device *bond_dev = bond->dev;
1355	unsigned long features = bond_dev->features;
1356	unsigned long vlan_features = 0;
1357	unsigned short max_hard_header_len = max((u16)ETH_HLEN,
1358						bond_dev->hard_header_len);
1359	int i;
1360
1361	features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
1362	features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM;
1363
1364	if (!bond->first_slave)
1365		goto done;
1366
1367	features &= ~NETIF_F_ONE_FOR_ALL;
1368
1369	vlan_features = bond->first_slave->dev->vlan_features;
1370	bond_for_each_slave(bond, slave, i) {
1371		features = netdev_increment_features(features,
1372						     slave->dev->features,
1373						     NETIF_F_ONE_FOR_ALL);
1374		vlan_features = netdev_increment_features(vlan_features,
1375							slave->dev->vlan_features,
1376							NETIF_F_ONE_FOR_ALL);
1377		if (slave->dev->hard_header_len > max_hard_header_len)
1378			max_hard_header_len = slave->dev->hard_header_len;
1379	}
1380
1381done:
1382	features |= (bond_dev->features & BOND_VLAN_FEATURES);
1383	bond_dev->features = netdev_fix_features(features, NULL);
1384	bond_dev->vlan_features = netdev_fix_features(vlan_features, NULL);
1385	bond_dev->hard_header_len = max_hard_header_len;
1386
1387	return 0;
1388}
1389
1390static void bond_setup_by_slave(struct net_device *bond_dev,
1391				struct net_device *slave_dev)
1392{
1393	struct bonding *bond = netdev_priv(bond_dev);
1394
1395	bond_dev->header_ops	    = slave_dev->header_ops;
1396
1397	bond_dev->type		    = slave_dev->type;
1398	bond_dev->hard_header_len   = slave_dev->hard_header_len;
1399	bond_dev->addr_len	    = slave_dev->addr_len;
1400
1401	memcpy(bond_dev->broadcast, slave_dev->broadcast,
1402		slave_dev->addr_len);
1403	bond->setup_by_slave = 1;
1404}
1405
1406/* enslave device <slave> to bond device <master> */
1407int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1408{
1409	struct bonding *bond = netdev_priv(bond_dev);
1410	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1411	struct slave *new_slave = NULL;
1412	struct netdev_hw_addr *ha;
1413	struct sockaddr addr;
1414	int link_reporting;
1415	int old_features = bond_dev->features;
1416	int res = 0;
1417
1418	if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL &&
1419		slave_ops->ndo_do_ioctl == NULL) {
1420		pr_warning("%s: Warning: no link monitoring support for %s\n",
1421			   bond_dev->name, slave_dev->name);
1422	}
1423
1424	/* bond must be initialized by bond_open() before enslaving */
1425	if (!(bond_dev->flags & IFF_UP)) {
1426		pr_warning("%s: master_dev is not up in bond_enslave\n",
1427			   bond_dev->name);
1428	}
1429
1430	/* already enslaved */
1431	if (slave_dev->flags & IFF_SLAVE) {
1432		pr_debug("Error, Device was already enslaved\n");
1433		return -EBUSY;
1434	}
1435
1436	/* vlan challenged mutual exclusion */
1437	/* no need to lock since we're protected by rtnl_lock */
1438	if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
1439		pr_debug("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1440		if (bond->vlgrp) {
1441			pr_err("%s: Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
1442			       bond_dev->name, slave_dev->name, bond_dev->name);
1443			return -EPERM;
1444		} else {
1445			pr_warning("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n",
1446				   bond_dev->name, slave_dev->name,
1447				   slave_dev->name, bond_dev->name);
1448			bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1449		}
1450	} else {
1451		pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1452		if (bond->slave_cnt == 0) {
1453			/* First slave, and it is not VLAN challenged,
1454			 * so remove the block of adding VLANs over the bond.
1455			 */
1456			bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1457		}
1458	}
1459
1460	/*
1461	 * Old ifenslave binaries are no longer supported.  These can
1462	 * be identified with moderate accuracy by the state of the slave:
1463	 * the current ifenslave will set the interface down prior to
1464	 * enslaving it; the old ifenslave will not.
1465	 */
1466	if ((slave_dev->flags & IFF_UP)) {
1467		pr_err("%s is up. This may be due to an out of date ifenslave.\n",
1468		       slave_dev->name);
1469		res = -EPERM;
1470		goto err_undo_flags;
1471	}
1472
1473	/* set bonding device ether type by slave - bonding netdevices are
1474	 * created with ether_setup, so when the slave type is not ARPHRD_ETHER
1475	 * there is a need to override some of the type dependent attribs/funcs.
1476	 *
1477	 * bond ether type mutual exclusion - don't allow slaves of dissimilar
1478	 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
1479	 */
1480	if (bond->slave_cnt == 0) {
1481		if (bond_dev->type != slave_dev->type) {
1482			pr_debug("%s: change device type from %d to %d\n",
1483				 bond_dev->name,
1484				 bond_dev->type, slave_dev->type);
1485
1486			res = netdev_bonding_change(bond_dev,
1487						    NETDEV_PRE_TYPE_CHANGE);
1488			res = notifier_to_errno(res);
1489			if (res) {
1490				pr_err("%s: refused to change device type\n",
1491				       bond_dev->name);
1492				res = -EBUSY;
1493				goto err_undo_flags;
1494			}
1495
1496			/* Flush unicast and multicast addresses */
1497			dev_uc_flush(bond_dev);
1498			dev_mc_flush(bond_dev);
1499
1500			if (slave_dev->type != ARPHRD_ETHER)
1501				bond_setup_by_slave(bond_dev, slave_dev);
1502			else
1503				ether_setup(bond_dev);
1504
1505			netdev_bonding_change(bond_dev,
1506					      NETDEV_POST_TYPE_CHANGE);
1507		}
1508	} else if (bond_dev->type != slave_dev->type) {
1509		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n",
1510		       slave_dev->name,
1511		       slave_dev->type, bond_dev->type);
1512		res = -EINVAL;
1513		goto err_undo_flags;
1514	}
1515
1516	if (slave_ops->ndo_set_mac_address == NULL) {
1517		if (bond->slave_cnt == 0) {
1518			pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.",
1519				   bond_dev->name);
1520			bond->params.fail_over_mac = BOND_FOM_ACTIVE;
1521		} else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
1522			pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active.\n",
1523			       bond_dev->name);
1524			res = -EOPNOTSUPP;
1525			goto err_undo_flags;
1526		}
1527	}
1528
1529	/* If this is the first slave, then we need to set the master's hardware
1530	 * address to be the same as the slave's. */
1531	if (bond->slave_cnt == 0)
1532		memcpy(bond->dev->dev_addr, slave_dev->dev_addr,
1533		       slave_dev->addr_len);
1534
1535
1536	new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
1537	if (!new_slave) {
1538		res = -ENOMEM;
1539		goto err_undo_flags;
1540	}
1541
1542	/*
1543	 * Set the new_slave's queue_id to be zero.  Queue ID mapping
1544	 * is set via sysfs or module option if desired.
1545	 */
1546	new_slave->queue_id = 0;
1547
1548	/* Save slave's original mtu and then set it to match the bond */
1549	new_slave->original_mtu = slave_dev->mtu;
1550	res = dev_set_mtu(slave_dev, bond->dev->mtu);
1551	if (res) {
1552		pr_debug("Error %d calling dev_set_mtu\n", res);
1553		goto err_free;
1554	}
1555
1556	/*
1557	 * Save slave's original ("permanent") mac address for modes
1558	 * that need it, and for restoring it upon release, and then
1559	 * set it to the master's address
1560	 */
1561	memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
1562
1563	if (!bond->params.fail_over_mac) {
1564		/*
1565		 * Set slave to master's mac address.  The application already
1566		 * set the master's mac address to that of the first slave
1567		 */
1568		memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
1569		addr.sa_family = slave_dev->type;
1570		res = dev_set_mac_address(slave_dev, &addr);
1571		if (res) {
1572			pr_debug("Error %d calling set_mac_address\n", res);
1573			goto err_restore_mtu;
1574		}
1575	}
1576
1577	res = netdev_set_master(slave_dev, bond_dev);
1578	if (res) {
1579		pr_debug("Error %d calling netdev_set_master\n", res);
1580		goto err_restore_mac;
1581	}
1582	/* open the slave since the application closed it */
1583	res = dev_open(slave_dev);
1584	if (res) {
1585		pr_debug("Opening slave %s failed\n", slave_dev->name);
1586		goto err_unset_master;
1587	}
1588
1589	new_slave->dev = slave_dev;
1590	slave_dev->priv_flags |= IFF_BONDING;
1591
1592	if (bond_is_lb(bond)) {
1593		/* bond_alb_init_slave() must be called before all other stages since
1594		 * it might fail and we do not want to have to undo everything
1595		 */
1596		res = bond_alb_init_slave(bond, new_slave);
1597		if (res)
1598			goto err_close;
1599	}
1600
1601	/* If the mode USES_PRIMARY, then the new slave gets the
1602	 * master's promisc (and mc) settings only if it becomes the
1603	 * curr_active_slave, and that is taken care of later when calling
1604	 * bond_change_active()
1605	 */
1606	if (!USES_PRIMARY(bond->params.mode)) {
1607		/* set promiscuity level to new slave */
1608		if (bond_dev->flags & IFF_PROMISC) {
1609			res = dev_set_promiscuity(slave_dev, 1);
1610			if (res)
1611				goto err_close;
1612		}
1613
1614		/* set allmulti level to new slave */
1615		if (bond_dev->flags & IFF_ALLMULTI) {
1616			res = dev_set_allmulti(slave_dev, 1);
1617			if (res)
1618				goto err_close;
1619		}
1620
1621		netif_addr_lock_bh(bond_dev);
1622		/* upload master's mc_list to new slave */
1623		netdev_for_each_mc_addr(ha, bond_dev)
1624			dev_mc_add(slave_dev, ha->addr);
1625		netif_addr_unlock_bh(bond_dev);
1626	}
1627
1628	if (bond->params.mode == BOND_MODE_8023AD) {
1629		/* add lacpdu mc addr to mc list */
1630		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
1631
1632		dev_mc_add(slave_dev, lacpdu_multicast);
1633	}
1634
1635	bond_add_vlans_on_slave(bond, slave_dev);
1636
1637	write_lock_bh(&bond->lock);
1638
1639	bond_attach_slave(bond, new_slave);
1640
1641	new_slave->delay = 0;
1642	new_slave->link_failure_count = 0;
1643
1644	bond_compute_features(bond);
1645
1646	write_unlock_bh(&bond->lock);
1647
1648	read_lock(&bond->lock);
1649
1650	new_slave->last_arp_rx = jiffies;
1651
1652	if (bond->params.miimon && !bond->params.use_carrier) {
1653		link_reporting = bond_check_dev_link(bond, slave_dev, 1);
1654
1655		if ((link_reporting == -1) && !bond->params.arp_interval) {
1656			/*
1657			 * miimon is set but a bonded network driver
1658			 * does not support ETHTOOL/MII and
1659			 * arp_interval is not set.  Note: if
1660			 * use_carrier is enabled, we will never go
1661			 * here (because netif_carrier is always
1662			 * supported); thus, we don't need to change
1663			 * the messages for netif_carrier.
1664			 */
1665			pr_warning("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details.\n",
1666			       bond_dev->name, slave_dev->name);
1667		} else if (link_reporting == -1) {
1668			/* unable get link status using mii/ethtool */
1669			pr_warning("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface.\n",
1670				   bond_dev->name, slave_dev->name);
1671		}
1672	}
1673
1674	/* check for initial state */
1675	if (!bond->params.miimon ||
1676	    (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) {
1677		if (bond->params.updelay) {
1678			pr_debug("Initial state of slave_dev is BOND_LINK_BACK\n");
1679			new_slave->link  = BOND_LINK_BACK;
1680			new_slave->delay = bond->params.updelay;
1681		} else {
1682			pr_debug("Initial state of slave_dev is BOND_LINK_UP\n");
1683			new_slave->link  = BOND_LINK_UP;
1684		}
1685		new_slave->jiffies = jiffies;
1686	} else {
1687		pr_debug("Initial state of slave_dev is BOND_LINK_DOWN\n");
1688		new_slave->link  = BOND_LINK_DOWN;
1689	}
1690
1691	if (bond_update_speed_duplex(new_slave) &&
1692	    (new_slave->link != BOND_LINK_DOWN)) {
1693		pr_warning("%s: Warning: failed to get speed and duplex from %s, assumed to be 100Mb/sec and Full.\n",
1694			   bond_dev->name, new_slave->dev->name);
1695
1696		if (bond->params.mode == BOND_MODE_8023AD) {
1697			pr_warning("%s: Warning: Operation of 802.3ad mode requires ETHTOOL support in base driver for proper aggregator selection.\n",
1698				   bond_dev->name);
1699		}
1700	}
1701
1702	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
1703		/* if there is a primary slave, remember it */
1704		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
1705			bond->primary_slave = new_slave;
1706			bond->force_primary = true;
1707		}
1708	}
1709
1710	write_lock_bh(&bond->curr_slave_lock);
1711
1712	switch (bond->params.mode) {
1713	case BOND_MODE_ACTIVEBACKUP:
1714		bond_set_slave_inactive_flags(new_slave);
1715		bond_select_active_slave(bond);
1716		break;
1717	case BOND_MODE_8023AD:
1718		/* in 802.3ad mode, the internal mechanism
1719		 * will activate the slaves in the selected
1720		 * aggregator
1721		 */
1722		bond_set_slave_inactive_flags(new_slave);
1723		/* if this is the first slave */
1724		if (bond->slave_cnt == 1) {
1725			SLAVE_AD_INFO(new_slave).id = 1;
1726			/* Initialize AD with the number of times that the AD timer is called in 1 second
1727			 * can be called only after the mac address of the bond is set
1728			 */
1729			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL,
1730					    bond->params.lacp_fast);
1731		} else {
1732			SLAVE_AD_INFO(new_slave).id =
1733				SLAVE_AD_INFO(new_slave->prev).id + 1;
1734		}
1735
1736		bond_3ad_bind_slave(new_slave);
1737		break;
1738	case BOND_MODE_TLB:
1739	case BOND_MODE_ALB:
1740		new_slave->state = BOND_STATE_ACTIVE;
1741		bond_set_slave_inactive_flags(new_slave);
1742		bond_select_active_slave(bond);
1743		break;
1744	default:
1745		pr_debug("This slave is always active in trunk mode\n");
1746
1747		/* always active in trunk mode */
1748		new_slave->state = BOND_STATE_ACTIVE;
1749
1750		/* In trunking mode there is little meaning to curr_active_slave
1751		 * anyway (it holds no special properties of the bond device),
1752		 * so we can change it without calling change_active_interface()
1753		 */
1754		if (!bond->curr_active_slave)
1755			bond->curr_active_slave = new_slave;
1756
1757		break;
1758	} /* switch(bond_mode) */
1759
1760	write_unlock_bh(&bond->curr_slave_lock);
1761
1762	bond_set_carrier(bond);
1763
1764#ifdef CONFIG_NET_POLL_CONTROLLER
1765	/*
1766	 * Netpoll and bonding is broken, make sure it is not initialized
1767	 * until it is fixed.
1768	 */
1769	if (disable_netpoll) {
1770		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
1771	} else {
1772		if (slaves_support_netpoll(bond_dev)) {
1773			bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
1774			if (bond_dev->npinfo)
1775				slave_dev->npinfo = bond_dev->npinfo;
1776		} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
1777			bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
1778			pr_info("New slave device %s does not support netpoll\n",
1779				slave_dev->name);
1780			pr_info("Disabling netpoll support for %s\n", bond_dev->name);
1781		}
1782	}
1783#endif
1784	read_unlock(&bond->lock);
1785
1786	res = bond_create_slave_symlinks(bond_dev, slave_dev);
1787	if (res)
1788		goto err_close;
1789
1790	pr_info("%s: enslaving %s as a%s interface with a%s link.\n",
1791		bond_dev->name, slave_dev->name,
1792		new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
1793		new_slave->link != BOND_LINK_DOWN ? "n up" : " down");
1794
1795	/* enslave is successful */
1796	return 0;
1797
1798/* Undo stages on error */
1799err_close:
1800	dev_close(slave_dev);
1801
1802err_unset_master:
1803	netdev_set_master(slave_dev, NULL);
1804
1805err_restore_mac:
1806	if (!bond->params.fail_over_mac) {
1807		memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
1808		addr.sa_family = slave_dev->type;
1809		dev_set_mac_address(slave_dev, &addr);
1810	}
1811
1812err_restore_mtu:
1813	dev_set_mtu(slave_dev, new_slave->original_mtu);
1814
1815err_free:
1816	kfree(new_slave);
1817
1818err_undo_flags:
1819	bond_dev->features = old_features;
1820
1821	return res;
1822}
1823
1824/*
1825 * Try to release the slave device <slave> from the bond device <master>
1826 * It is legal to access curr_active_slave without a lock because all the function
1827 * is write-locked.
1828 *
1829 * The rules for slave state should be:
1830 *   for Active/Backup:
1831 *     Active stays on all backups go down
1832 *   for Bonded connections:
1833 *     The first up interface should be left on and all others downed.
1834 */
1835int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1836{
1837	struct bonding *bond = netdev_priv(bond_dev);
1838	struct slave *slave, *oldcurrent;
1839	struct sockaddr addr;
1840
1841	/* slave is not a slave or master is not master of this slave */
1842	if (!(slave_dev->flags & IFF_SLAVE) ||
1843	    (slave_dev->master != bond_dev)) {
1844		pr_err("%s: Error: cannot release %s.\n",
1845		       bond_dev->name, slave_dev->name);
1846		return -EINVAL;
1847	}
1848
1849	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
1850	write_lock_bh(&bond->lock);
1851
1852	slave = bond_get_slave_by_dev(bond, slave_dev);
1853	if (!slave) {
1854		/* not a slave of this bond */
1855		pr_info("%s: %s not enslaved\n",
1856			bond_dev->name, slave_dev->name);
1857		write_unlock_bh(&bond->lock);
1858		return -EINVAL;
1859	}
1860
1861	if (!bond->params.fail_over_mac) {
1862		if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) &&
1863		    bond->slave_cnt > 1)
1864			pr_warning("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n",
1865				   bond_dev->name, slave_dev->name,
1866				   slave->perm_hwaddr,
1867				   bond_dev->name, slave_dev->name);
1868	}
1869
1870	/* Inform AD package of unbinding of slave. */
1871	if (bond->params.mode == BOND_MODE_8023AD) {
1872		/* must be called before the slave is
1873		 * detached from the list
1874		 */
1875		bond_3ad_unbind_slave(slave);
1876	}
1877
1878	pr_info("%s: releasing %s interface %s\n",
1879		bond_dev->name,
1880		(slave->state == BOND_STATE_ACTIVE) ? "active" : "backup",
1881		slave_dev->name);
1882
1883	oldcurrent = bond->curr_active_slave;
1884
1885	bond->current_arp_slave = NULL;
1886
1887	/* release the slave from its bond */
1888	bond_detach_slave(bond, slave);
1889
1890	bond_compute_features(bond);
1891
1892	if (bond->primary_slave == slave)
1893		bond->primary_slave = NULL;
1894
1895	if (oldcurrent == slave)
1896		bond_change_active_slave(bond, NULL);
1897
1898	if (bond_is_lb(bond)) {
1899		/* Must be called only after the slave has been
1900		 * detached from the list and the curr_active_slave
1901		 * has been cleared (if our_slave == old_current),
1902		 * but before a new active slave is selected.
1903		 */
1904		write_unlock_bh(&bond->lock);
1905		bond_alb_deinit_slave(bond, slave);
1906		write_lock_bh(&bond->lock);
1907	}
1908
1909	if (oldcurrent == slave) {
1910		/*
1911		 * Note that we hold RTNL over this sequence, so there
1912		 * is no concern that another slave add/remove event
1913		 * will interfere.
1914		 */
1915		write_unlock_bh(&bond->lock);
1916		read_lock(&bond->lock);
1917		write_lock_bh(&bond->curr_slave_lock);
1918
1919		bond_select_active_slave(bond);
1920
1921		write_unlock_bh(&bond->curr_slave_lock);
1922		read_unlock(&bond->lock);
1923		write_lock_bh(&bond->lock);
1924	}
1925
1926	if (bond->slave_cnt == 0) {
1927		bond_set_carrier(bond);
1928
1929		/* if the last slave was removed, zero the mac address
1930		 * of the master so it will be set by the application
1931		 * to the mac address of the first slave
1932		 */
1933		memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
1934
1935		if (!bond->vlgrp) {
1936			bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1937		} else {
1938			pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n",
1939				   bond_dev->name, bond_dev->name);
1940			pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n",
1941				   bond_dev->name);
1942		}
1943	} else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
1944		   !bond_has_challenged_slaves(bond)) {
1945		pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n",
1946			bond_dev->name, slave_dev->name, bond_dev->name);
1947		bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1948	}
1949
1950	write_unlock_bh(&bond->lock);
1951
1952	/* must do this from outside any spinlocks */
1953	bond_destroy_slave_symlinks(bond_dev, slave_dev);
1954
1955	bond_del_vlans_from_slave(bond, slave_dev);
1956
1957	/* If the mode USES_PRIMARY, then we should only remove its
1958	 * promisc and mc settings if it was the curr_active_slave, but that was
1959	 * already taken care of above when we detached the slave
1960	 */
1961	if (!USES_PRIMARY(bond->params.mode)) {
1962		/* unset promiscuity level from slave */
1963		if (bond_dev->flags & IFF_PROMISC)
1964			dev_set_promiscuity(slave_dev, -1);
1965
1966		/* unset allmulti level from slave */
1967		if (bond_dev->flags & IFF_ALLMULTI)
1968			dev_set_allmulti(slave_dev, -1);
1969
1970		/* flush master's mc_list from slave */
1971		netif_addr_lock_bh(bond_dev);
1972		bond_mc_list_flush(bond_dev, slave_dev);
1973		netif_addr_unlock_bh(bond_dev);
1974	}
1975
1976	netdev_set_master(slave_dev, NULL);
1977
1978#ifdef CONFIG_NET_POLL_CONTROLLER
1979	read_lock_bh(&bond->lock);
1980
1981	 /* Make sure netpoll over stays disabled until fixed. */
1982	if (!disable_netpoll)
1983		if (slaves_support_netpoll(bond_dev))
1984				bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
1985	read_unlock_bh(&bond->lock);
1986	if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
1987		slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
1988	else
1989		slave_dev->npinfo = NULL;
1990#endif
1991
1992	/* close slave before restoring its mac address */
1993	dev_close(slave_dev);
1994
1995	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
1996		/* restore original ("permanent") mac address */
1997		memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
1998		addr.sa_family = slave_dev->type;
1999		dev_set_mac_address(slave_dev, &addr);
2000	}
2001
2002	dev_set_mtu(slave_dev, slave->original_mtu);
2003
2004	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
2005				   IFF_SLAVE_INACTIVE | IFF_BONDING |
2006				   IFF_SLAVE_NEEDARP);
2007
2008	kfree(slave);
2009
2010	return 0;  /* deletion OK */
2011}
2012
2013/*
2014* First release a slave and than destroy the bond if no more slaves are left.
2015* Must be under rtnl_lock when this function is called.
2016*/
2017int  bond_release_and_destroy(struct net_device *bond_dev,
2018			      struct net_device *slave_dev)
2019{
2020	struct bonding *bond = netdev_priv(bond_dev);
2021	int ret;
2022
2023	ret = bond_release(bond_dev, slave_dev);
2024	if ((ret == 0) && (bond->slave_cnt == 0)) {
2025		pr_info("%s: destroying bond %s.\n",
2026			bond_dev->name, bond_dev->name);
2027		unregister_netdevice(bond_dev);
2028	}
2029	return ret;
2030}
2031
2032/*
2033 * This function releases all slaves.
2034 */
2035static int bond_release_all(struct net_device *bond_dev)
2036{
2037	struct bonding *bond = netdev_priv(bond_dev);
2038	struct slave *slave;
2039	struct net_device *slave_dev;
2040	struct sockaddr addr;
2041
2042	write_lock_bh(&bond->lock);
2043
2044	netif_carrier_off(bond_dev);
2045
2046	if (bond->slave_cnt == 0)
2047		goto out;
2048
2049	bond->current_arp_slave = NULL;
2050	bond->primary_slave = NULL;
2051	bond_change_active_slave(bond, NULL);
2052
2053	while ((slave = bond->first_slave) != NULL) {
2054		/* Inform AD package of unbinding of slave
2055		 * before slave is detached from the list.
2056		 */
2057		if (bond->params.mode == BOND_MODE_8023AD)
2058			bond_3ad_unbind_slave(slave);
2059
2060		slave_dev = slave->dev;
2061		bond_detach_slave(bond, slave);
2062
2063		/* now that the slave is detached, unlock and perform
2064		 * all the undo steps that should not be called from
2065		 * within a lock.
2066		 */
2067		write_unlock_bh(&bond->lock);
2068
2069		if (bond_is_lb(bond)) {
2070			/* must be called only after the slave
2071			 * has been detached from the list
2072			 */
2073			bond_alb_deinit_slave(bond, slave);
2074		}
2075
2076		bond_compute_features(bond);
2077
2078		bond_destroy_slave_symlinks(bond_dev, slave_dev);
2079		bond_del_vlans_from_slave(bond, slave_dev);
2080
2081		/* If the mode USES_PRIMARY, then we should only remove its
2082		 * promisc and mc settings if it was the curr_active_slave, but that was
2083		 * already taken care of above when we detached the slave
2084		 */
2085		if (!USES_PRIMARY(bond->params.mode)) {
2086			/* unset promiscuity level from slave */
2087			if (bond_dev->flags & IFF_PROMISC)
2088				dev_set_promiscuity(slave_dev, -1);
2089
2090			/* unset allmulti level from slave */
2091			if (bond_dev->flags & IFF_ALLMULTI)
2092				dev_set_allmulti(slave_dev, -1);
2093
2094			/* flush master's mc_list from slave */
2095			netif_addr_lock_bh(bond_dev);
2096			bond_mc_list_flush(bond_dev, slave_dev);
2097			netif_addr_unlock_bh(bond_dev);
2098		}
2099
2100		netdev_set_master(slave_dev, NULL);
2101
2102		/* close slave before restoring its mac address */
2103		dev_close(slave_dev);
2104
2105		if (!bond->params.fail_over_mac) {
2106			/* restore original ("permanent") mac address*/
2107			memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
2108			addr.sa_family = slave_dev->type;
2109			dev_set_mac_address(slave_dev, &addr);
2110		}
2111
2112		slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
2113					   IFF_SLAVE_INACTIVE);
2114
2115		kfree(slave);
2116
2117		/* re-acquire the lock before getting the next slave */
2118		write_lock_bh(&bond->lock);
2119	}
2120
2121	/* zero the mac address of the master so it will be
2122	 * set by the application to the mac address of the
2123	 * first slave
2124	 */
2125	memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
2126
2127	if (!bond->vlgrp) {
2128		bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
2129	} else {
2130		pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n",
2131			   bond_dev->name, bond_dev->name);
2132		pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n",
2133			   bond_dev->name);
2134	}
2135
2136	pr_info("%s: released all slaves\n", bond_dev->name);
2137
2138out:
2139	write_unlock_bh(&bond->lock);
2140
2141	return 0;
2142}
2143
2144/*
2145 * This function changes the active slave to slave <slave_dev>.
2146 * It returns -EINVAL in the following cases.
2147 *  - <slave_dev> is not found in the list.
2148 *  - There is not active slave now.
2149 *  - <slave_dev> is already active.
2150 *  - The link state of <slave_dev> is not BOND_LINK_UP.
2151 *  - <slave_dev> is not running.
2152 * In these cases, this function does nothing.
2153 * In the other cases, current_slave pointer is changed and 0 is returned.
2154 */
2155static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev)
2156{
2157	struct bonding *bond = netdev_priv(bond_dev);
2158	struct slave *old_active = NULL;
2159	struct slave *new_active = NULL;
2160	int res = 0;
2161
2162	if (!USES_PRIMARY(bond->params.mode))
2163		return -EINVAL;
2164
2165	/* Verify that master_dev is indeed the master of slave_dev */
2166	if (!(slave_dev->flags & IFF_SLAVE) || (slave_dev->master != bond_dev))
2167		return -EINVAL;
2168
2169	read_lock(&bond->lock);
2170
2171	read_lock(&bond->curr_slave_lock);
2172	old_active = bond->curr_active_slave;
2173	read_unlock(&bond->curr_slave_lock);
2174
2175	new_active = bond_get_slave_by_dev(bond, slave_dev);
2176
2177	/*
2178	 * Changing to the current active: do nothing; return success.
2179	 */
2180	if (new_active && (new_active == old_active)) {
2181		read_unlock(&bond->lock);
2182		return 0;
2183	}
2184
2185	if ((new_active) &&
2186	    (old_active) &&
2187	    (new_active->link == BOND_LINK_UP) &&
2188	    IS_UP(new_active->dev)) {
2189		write_lock_bh(&bond->curr_slave_lock);
2190		bond_change_active_slave(bond, new_active);
2191		write_unlock_bh(&bond->curr_slave_lock);
2192	} else
2193		res = -EINVAL;
2194
2195	read_unlock(&bond->lock);
2196
2197	return res;
2198}
2199
2200static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
2201{
2202	struct bonding *bond = netdev_priv(bond_dev);
2203
2204	info->bond_mode = bond->params.mode;
2205	info->miimon = bond->params.miimon;
2206
2207	read_lock(&bond->lock);
2208	info->num_slaves = bond->slave_cnt;
2209	read_unlock(&bond->lock);
2210
2211	return 0;
2212}
2213
2214static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)
2215{
2216	struct bonding *bond = netdev_priv(bond_dev);
2217	struct slave *slave;
2218	int i, res = -ENODEV;
2219
2220	read_lock(&bond->lock);
2221
2222	bond_for_each_slave(bond, slave, i) {
2223		if (i == (int)info->slave_id) {
2224			res = 0;
2225			strcpy(info->slave_name, slave->dev->name);
2226			info->link = slave->link;
2227			info->state = slave->state;
2228			info->link_failure_count = slave->link_failure_count;
2229			break;
2230		}
2231	}
2232
2233	read_unlock(&bond->lock);
2234
2235	return res;
2236}
2237
2238/*-------------------------------- Monitoring -------------------------------*/
2239
2240
2241static int bond_miimon_inspect(struct bonding *bond)
2242{
2243	struct slave *slave;
2244	int i, link_state, commit = 0;
2245	bool ignore_updelay;
2246
2247	ignore_updelay = !bond->curr_active_slave ? true : false;
2248
2249	bond_for_each_slave(bond, slave, i) {
2250		slave->new_link = BOND_LINK_NOCHANGE;
2251
2252		link_state = bond_check_dev_link(bond, slave->dev, 0);
2253
2254		switch (slave->link) {
2255		case BOND_LINK_UP:
2256			if (link_state)
2257				continue;
2258
2259			slave->link = BOND_LINK_FAIL;
2260			slave->delay = bond->params.downdelay;
2261			if (slave->delay) {
2262				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms.\n",
2263					bond->dev->name,
2264					(bond->params.mode ==
2265					 BOND_MODE_ACTIVEBACKUP) ?
2266					((slave->state == BOND_STATE_ACTIVE) ?
2267					 "active " : "backup ") : "",
2268					slave->dev->name,
2269					bond->params.downdelay * bond->params.miimon);
2270			}
2271			/*FALLTHRU*/
2272		case BOND_LINK_FAIL:
2273			if (link_state) {
2274				/*
2275				 * recovered before downdelay expired
2276				 */
2277				slave->link = BOND_LINK_UP;
2278				slave->jiffies = jiffies;
2279				pr_info("%s: link status up again after %d ms for interface %s.\n",
2280					bond->dev->name,
2281					(bond->params.downdelay - slave->delay) *
2282					bond->params.miimon,
2283					slave->dev->name);
2284				continue;
2285			}
2286
2287			if (slave->delay <= 0) {
2288				slave->new_link = BOND_LINK_DOWN;
2289				commit++;
2290				continue;
2291			}
2292
2293			slave->delay--;
2294			break;
2295
2296		case BOND_LINK_DOWN:
2297			if (!link_state)
2298				continue;
2299
2300			slave->link = BOND_LINK_BACK;
2301			slave->delay = bond->params.updelay;
2302
2303			if (slave->delay) {
2304				pr_info("%s: link status up for interface %s, enabling it in %d ms.\n",
2305					bond->dev->name, slave->dev->name,
2306					ignore_updelay ? 0 :
2307					bond->params.updelay *
2308					bond->params.miimon);
2309			}
2310			/*FALLTHRU*/
2311		case BOND_LINK_BACK:
2312			if (!link_state) {
2313				slave->link = BOND_LINK_DOWN;
2314				pr_info("%s: link status down again after %d ms for interface %s.\n",
2315					bond->dev->name,
2316					(bond->params.updelay - slave->delay) *
2317					bond->params.miimon,
2318					slave->dev->name);
2319
2320				continue;
2321			}
2322
2323			if (ignore_updelay)
2324				slave->delay = 0;
2325
2326			if (slave->delay <= 0) {
2327				slave->new_link = BOND_LINK_UP;
2328				commit++;
2329				ignore_updelay = false;
2330				continue;
2331			}
2332
2333			slave->delay--;
2334			break;
2335		}
2336	}
2337
2338	return commit;
2339}
2340
2341static void bond_miimon_commit(struct bonding *bond)
2342{
2343	struct slave *slave;
2344	int i;
2345
2346	bond_for_each_slave(bond, slave, i) {
2347		switch (slave->new_link) {
2348		case BOND_LINK_NOCHANGE:
2349			continue;
2350
2351		case BOND_LINK_UP:
2352			slave->link = BOND_LINK_UP;
2353			slave->jiffies = jiffies;
2354
2355			if (bond->params.mode == BOND_MODE_8023AD) {
2356				/* prevent it from being the active one */
2357				slave->state = BOND_STATE_BACKUP;
2358			} else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
2359				/* make it immediately active */
2360				slave->state = BOND_STATE_ACTIVE;
2361			} else if (slave != bond->primary_slave) {
2362				/* prevent it from being the active one */
2363				slave->state = BOND_STATE_BACKUP;
2364			}
2365
2366			pr_info("%s: link status definitely up for interface %s.\n",
2367				bond->dev->name, slave->dev->name);
2368
2369			/* notify ad that the link status has changed */
2370			if (bond->params.mode == BOND_MODE_8023AD)
2371				bond_3ad_handle_link_change(slave, BOND_LINK_UP);
2372
2373			if (bond_is_lb(bond))
2374				bond_alb_handle_link_change(bond, slave,
2375							    BOND_LINK_UP);
2376
2377			if (!bond->curr_active_slave ||
2378			    (slave == bond->primary_slave))
2379				goto do_failover;
2380
2381			continue;
2382
2383		case BOND_LINK_DOWN:
2384			if (slave->link_failure_count < UINT_MAX)
2385				slave->link_failure_count++;
2386
2387			slave->link = BOND_LINK_DOWN;
2388
2389			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP ||
2390			    bond->params.mode == BOND_MODE_8023AD)
2391				bond_set_slave_inactive_flags(slave);
2392
2393			pr_info("%s: link status definitely down for interface %s, disabling it\n",
2394				bond->dev->name, slave->dev->name);
2395
2396			if (bond->params.mode == BOND_MODE_8023AD)
2397				bond_3ad_handle_link_change(slave,
2398							    BOND_LINK_DOWN);
2399
2400			if (bond_is_lb(bond))
2401				bond_alb_handle_link_change(bond, slave,
2402							    BOND_LINK_DOWN);
2403
2404			if (slave == bond->curr_active_slave)
2405				goto do_failover;
2406
2407			continue;
2408
2409		default:
2410			pr_err("%s: invalid new link %d on slave %s\n",
2411			       bond->dev->name, slave->new_link,
2412			       slave->dev->name);
2413			slave->new_link = BOND_LINK_NOCHANGE;
2414
2415			continue;
2416		}
2417
2418do_failover:
2419		ASSERT_RTNL();
2420		write_lock_bh(&bond->curr_slave_lock);
2421		bond_select_active_slave(bond);
2422		write_unlock_bh(&bond->curr_slave_lock);
2423	}
2424
2425	bond_set_carrier(bond);
2426}
2427
2428/*
2429 * bond_mii_monitor
2430 *
2431 * Really a wrapper that splits the mii monitor into two phases: an
2432 * inspection, then (if inspection indicates something needs to be done)
2433 * an acquisition of appropriate locks followed by a commit phase to
2434 * implement whatever link state changes are indicated.
2435 */
2436void bond_mii_monitor(struct work_struct *work)
2437{
2438	struct bonding *bond = container_of(work, struct bonding,
2439					    mii_work.work);
2440
2441	read_lock(&bond->lock);
2442	if (bond->kill_timers)
2443		goto out;
2444
2445	if (bond->slave_cnt == 0)
2446		goto re_arm;
2447
2448	if (bond->send_grat_arp) {
2449		read_lock(&bond->curr_slave_lock);
2450		bond_send_gratuitous_arp(bond);
2451		read_unlock(&bond->curr_slave_lock);
2452	}
2453
2454	if (bond->send_unsol_na) {
2455		read_lock(&bond->curr_slave_lock);
2456		bond_send_unsolicited_na(bond);
2457		read_unlock(&bond->curr_slave_lock);
2458	}
2459
2460	if (bond_miimon_inspect(bond)) {
2461		read_unlock(&bond->lock);
2462		rtnl_lock();
2463		read_lock(&bond->lock);
2464
2465		bond_miimon_commit(bond);
2466
2467		read_unlock(&bond->lock);
2468		rtnl_unlock();	/* might sleep, hold no other locks */
2469		read_lock(&bond->lock);
2470	}
2471
2472re_arm:
2473	if (bond->params.miimon)
2474		queue_delayed_work(bond->wq, &bond->mii_work,
2475				   msecs_to_jiffies(bond->params.miimon));
2476out:
2477	read_unlock(&bond->lock);
2478}
2479
2480static __be32 bond_glean_dev_ip(struct net_device *dev)
2481{
2482	struct in_device *idev;
2483	struct in_ifaddr *ifa;
2484	__be32 addr = 0;
2485
2486	if (!dev)
2487		return 0;
2488
2489	rcu_read_lock();
2490	idev = __in_dev_get_rcu(dev);
2491	if (!idev)
2492		goto out;
2493
2494	ifa = idev->ifa_list;
2495	if (!ifa)
2496		goto out;
2497
2498	addr = ifa->ifa_local;
2499out:
2500	rcu_read_unlock();
2501	return addr;
2502}
2503
2504static int bond_has_this_ip(struct bonding *bond, __be32 ip)
2505{
2506	struct vlan_entry *vlan;
2507
2508	if (ip == bond->master_ip)
2509		return 1;
2510
2511	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2512		if (ip == vlan->vlan_ip)
2513			return 1;
2514	}
2515
2516	return 0;
2517}
2518
2519/*
2520 * We go to the (large) trouble of VLAN tagging ARP frames because
2521 * switches in VLAN mode (especially if ports are configured as
2522 * "native" to a VLAN) might not pass non-tagged frames.
2523 */
2524static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id)
2525{
2526	struct sk_buff *skb;
2527
2528	pr_debug("arp %d on slave %s: dst %x src %x vid %d\n", arp_op,
2529		 slave_dev->name, dest_ip, src_ip, vlan_id);
2530
2531	skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,
2532			 NULL, slave_dev->dev_addr, NULL);
2533
2534	if (!skb) {
2535		pr_err("ARP packet allocation failed\n");
2536		return;
2537	}
2538	if (vlan_id) {
2539		skb = vlan_put_tag(skb, vlan_id);
2540		if (!skb) {
2541			pr_err("failed to insert VLAN tag\n");
2542			return;
2543		}
2544	}
2545	arp_xmit(skb);
2546}
2547
2548
2549static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
2550{
2551	int i, vlan_id, rv;
2552	__be32 *targets = bond->params.arp_targets;
2553	struct vlan_entry *vlan;
2554	struct net_device *vlan_dev;
2555	struct flowi fl;
2556	struct rtable *rt;
2557
2558	for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
2559		if (!targets[i])
2560			break;
2561		pr_debug("basa: target %x\n", targets[i]);
2562		if (!bond->vlgrp) {
2563			pr_debug("basa: empty vlan: arp_send\n");
2564			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2565				      bond->master_ip, 0);
2566			continue;
2567		}
2568
2569		/*
2570		 * If VLANs are configured, we do a route lookup to
2571		 * determine which VLAN interface would be used, so we
2572		 * can tag the ARP with the proper VLAN tag.
2573		 */
2574		memset(&fl, 0, sizeof(fl));
2575		fl.fl4_dst = targets[i];
2576		fl.fl4_tos = RTO_ONLINK;
2577
2578		rv = ip_route_output_key(dev_net(bond->dev), &rt, &fl);
2579		if (rv) {
2580			if (net_ratelimit()) {
2581				pr_warning("%s: no route to arp_ip_target %pI4\n",
2582					   bond->dev->name, &fl.fl4_dst);
2583			}
2584			continue;
2585		}
2586
2587		/*
2588		 * This target is not on a VLAN
2589		 */
2590		if (rt->dst.dev == bond->dev) {
2591			ip_rt_put(rt);
2592			pr_debug("basa: rtdev == bond->dev: arp_send\n");
2593			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2594				      bond->master_ip, 0);
2595			continue;
2596		}
2597
2598		vlan_id = 0;
2599		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2600			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
2601			if (vlan_dev == rt->dst.dev) {
2602				vlan_id = vlan->vlan_id;
2603				pr_debug("basa: vlan match on %s %d\n",
2604				       vlan_dev->name, vlan_id);
2605				break;
2606			}
2607		}
2608
2609		if (vlan_id) {
2610			ip_rt_put(rt);
2611			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2612				      vlan->vlan_ip, vlan_id);
2613			continue;
2614		}
2615
2616		if (net_ratelimit()) {
2617			pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
2618				   bond->dev->name, &fl.fl4_dst,
2619				   rt->dst.dev ? rt->dst.dev->name : "NULL");
2620		}
2621		ip_rt_put(rt);
2622	}
2623}
2624
2625/*
2626 * Kick out a gratuitous ARP for an IP on the bonding master plus one
2627 * for each VLAN above us.
2628 *
2629 * Caller must hold curr_slave_lock for read or better
2630 */
2631static void bond_send_gratuitous_arp(struct bonding *bond)
2632{
2633	struct slave *slave = bond->curr_active_slave;
2634	struct vlan_entry *vlan;
2635	struct net_device *vlan_dev;
2636
2637	pr_debug("bond_send_grat_arp: bond %s slave %s\n",
2638		 bond->dev->name, slave ? slave->dev->name : "NULL");
2639
2640	if (!slave || !bond->send_grat_arp ||
2641	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
2642		return;
2643
2644	bond->send_grat_arp--;
2645
2646	if (bond->master_ip) {
2647		bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
2648				bond->master_ip, 0);
2649	}
2650
2651	if (!bond->vlgrp)
2652		return;
2653
2654	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2655		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
2656		if (vlan->vlan_ip) {
2657			bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
2658				      vlan->vlan_ip, vlan->vlan_id);
2659		}
2660	}
2661}
2662
2663static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
2664{
2665	int i;
2666	__be32 *targets = bond->params.arp_targets;
2667
2668	for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
2669		pr_debug("bva: sip %pI4 tip %pI4 t[%d] %pI4 bhti(tip) %d\n",
2670			 &sip, &tip, i, &targets[i],
2671			 bond_has_this_ip(bond, tip));
2672		if (sip == targets[i]) {
2673			if (bond_has_this_ip(bond, tip))
2674				slave->last_arp_rx = jiffies;
2675			return;
2676		}
2677	}
2678}
2679
2680static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
2681{
2682	struct arphdr *arp;
2683	struct slave *slave;
2684	struct bonding *bond;
2685	unsigned char *arp_ptr;
2686	__be32 sip, tip;
2687
2688	if (dev->priv_flags & IFF_802_1Q_VLAN) {
2689		/*
2690		 * When using VLANS and bonding, dev and oriv_dev may be
2691		 * incorrect if the physical interface supports VLAN
2692		 * acceleration.  With this change ARP validation now
2693		 * works for hosts only reachable on the VLAN interface.
2694		 */
2695		dev = vlan_dev_real_dev(dev);
2696		orig_dev = dev_get_by_index_rcu(dev_net(skb->dev),skb->skb_iif);
2697	}
2698
2699	if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
2700		goto out;
2701
2702	bond = netdev_priv(dev);
2703	read_lock(&bond->lock);
2704
2705	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
2706		 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
2707		 orig_dev ? orig_dev->name : "NULL");
2708
2709	slave = bond_get_slave_by_dev(bond, orig_dev);
2710	if (!slave || !slave_do_arp_validate(bond, slave))
2711		goto out_unlock;
2712
2713	if (!pskb_may_pull(skb, arp_hdr_len(dev)))
2714		goto out_unlock;
2715
2716	arp = arp_hdr(skb);
2717	if (arp->ar_hln != dev->addr_len ||
2718	    skb->pkt_type == PACKET_OTHERHOST ||
2719	    skb->pkt_type == PACKET_LOOPBACK ||
2720	    arp->ar_hrd != htons(ARPHRD_ETHER) ||
2721	    arp->ar_pro != htons(ETH_P_IP) ||
2722	    arp->ar_pln != 4)
2723		goto out_unlock;
2724
2725	arp_ptr = (unsigned char *)(arp + 1);
2726	arp_ptr += dev->addr_len;
2727	memcpy(&sip, arp_ptr, 4);
2728	arp_ptr += 4 + dev->addr_len;
2729	memcpy(&tip, arp_ptr, 4);
2730
2731	pr_debug("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n",
2732		 bond->dev->name, slave->dev->name, slave->state,
2733		 bond->params.arp_validate, slave_do_arp_validate(bond, slave),
2734		 &sip, &tip);
2735
2736	/*
2737	 * Backup slaves won't see the ARP reply, but do come through
2738	 * here for each ARP probe (so we swap the sip/tip to validate
2739	 * the probe).  In a "redundant switch, common router" type of
2740	 * configuration, the ARP probe will (hopefully) travel from
2741	 * the active, through one switch, the router, then the other
2742	 * switch before reaching the backup.
2743	 */
2744	if (slave->state == BOND_STATE_ACTIVE)
2745		bond_validate_arp(bond, slave, sip, tip);
2746	else
2747		bond_validate_arp(bond, slave, tip, sip);
2748
2749out_unlock:
2750	read_unlock(&bond->lock);
2751out:
2752	dev_kfree_skb(skb);
2753	return NET_RX_SUCCESS;
2754}
2755
2756/*
2757 * this function is called regularly to monitor each slave's link
2758 * ensuring that traffic is being sent and received when arp monitoring
2759 * is used in load-balancing mode. if the adapter has been dormant, then an
2760 * arp is transmitted to generate traffic. see activebackup_arp_monitor for
2761 * arp monitoring in active backup mode.
2762 */
2763void bond_loadbalance_arp_mon(struct work_struct *work)
2764{
2765	struct bonding *bond = container_of(work, struct bonding,
2766					    arp_work.work);
2767	struct slave *slave, *oldcurrent;
2768	int do_failover = 0;
2769	int delta_in_ticks;
2770	int i;
2771
2772	read_lock(&bond->lock);
2773
2774	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
2775
2776	if (bond->kill_timers)
2777		goto out;
2778
2779	if (bond->slave_cnt == 0)
2780		goto re_arm;
2781
2782	read_lock(&bond->curr_slave_lock);
2783	oldcurrent = bond->curr_active_slave;
2784	read_unlock(&bond->curr_slave_lock);
2785
2786	/* see if any of the previous devices are up now (i.e. they have
2787	 * xmt and rcv traffic). the curr_active_slave does not come into
2788	 * the picture unless it is null. also, slave->jiffies is not needed
2789	 * here because we send an arp on each slave and give a slave as
2790	 * long as it needs to get the tx/rx within the delta.
2791	 * TODO: what about up/down delay in arp mode? it wasn't here before
2792	 *       so it can wait
2793	 */
2794	bond_for_each_slave(bond, slave, i) {
2795		unsigned long trans_start = dev_trans_start(slave->dev);
2796
2797		if (slave->link != BOND_LINK_UP) {
2798			if (time_in_range(jiffies,
2799				trans_start - delta_in_ticks,
2800				trans_start + delta_in_ticks) &&
2801			    time_in_range(jiffies,
2802				slave->dev->last_rx - delta_in_ticks,
2803				slave->dev->last_rx + delta_in_ticks)) {
2804
2805				slave->link  = BOND_LINK_UP;
2806				slave->state = BOND_STATE_ACTIVE;
2807
2808				/* primary_slave has no meaning in round-robin
2809				 * mode. the window of a slave being up and
2810				 * curr_active_slave being null after enslaving
2811				 * is closed.
2812				 */
2813				if (!oldcurrent) {
2814					pr_info("%s: link status definitely up for interface %s, ",
2815						bond->dev->name,
2816						slave->dev->name);
2817					do_failover = 1;
2818				} else {
2819					pr_info("%s: interface %s is now up\n",
2820						bond->dev->name,
2821						slave->dev->name);
2822				}
2823			}
2824		} else {
2825			/* slave->link == BOND_LINK_UP */
2826
2827			/* not all switches will respond to an arp request
2828			 * when the source ip is 0, so don't take the link down
2829			 * if we don't know our ip yet
2830			 */
2831			if (!time_in_range(jiffies,
2832				trans_start - delta_in_ticks,
2833				trans_start + 2 * delta_in_ticks) ||
2834			    !time_in_range(jiffies,
2835				slave->dev->last_rx - delta_in_ticks,
2836				slave->dev->last_rx + 2 * delta_in_ticks)) {
2837
2838				slave->link  = BOND_LINK_DOWN;
2839				slave->state = BOND_STATE_BACKUP;
2840
2841				if (slave->link_failure_count < UINT_MAX)
2842					slave->link_failure_count++;
2843
2844				pr_info("%s: interface %s is now down.\n",
2845					bond->dev->name,
2846					slave->dev->name);
2847
2848				if (slave == oldcurrent)
2849					do_failover = 1;
2850			}
2851		}
2852
2853		/* note: if switch is in round-robin mode, all links
2854		 * must tx arp to ensure all links rx an arp - otherwise
2855		 * links may oscillate or not come up at all; if switch is
2856		 * in something like xor mode, there is nothing we can
2857		 * do - all replies will be rx'ed on same link causing slaves
2858		 * to be unstable during low/no traffic periods
2859		 */
2860		if (IS_UP(slave->dev))
2861			bond_arp_send_all(bond, slave);
2862	}
2863
2864	if (do_failover) {
2865		write_lock_bh(&bond->curr_slave_lock);
2866
2867		bond_select_active_slave(bond);
2868
2869		write_unlock_bh(&bond->curr_slave_lock);
2870	}
2871
2872re_arm:
2873	if (bond->params.arp_interval)
2874		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
2875out:
2876	read_unlock(&bond->lock);
2877}
2878
2879/*
2880 * Called to inspect slaves for active-backup mode ARP monitor link state
2881 * changes.  Sets new_link in slaves to specify what action should take
2882 * place for the slave.  Returns 0 if no changes are found, >0 if changes
2883 * to link states must be committed.
2884 *
2885 * Called with bond->lock held for read.
2886 */
2887static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2888{
2889	struct slave *slave;
2890	int i, commit = 0;
2891	unsigned long trans_start;
2892
2893	bond_for_each_slave(bond, slave, i) {
2894		slave->new_link = BOND_LINK_NOCHANGE;
2895
2896		if (slave->link != BOND_LINK_UP) {
2897			if (time_in_range(jiffies,
2898				slave_last_rx(bond, slave) - delta_in_ticks,
2899				slave_last_rx(bond, slave) + delta_in_ticks)) {
2900
2901				slave->new_link = BOND_LINK_UP;
2902				commit++;
2903			}
2904
2905			continue;
2906		}
2907
2908		/*
2909		 * Give slaves 2*delta after being enslaved or made
2910		 * active.  This avoids bouncing, as the last receive
2911		 * times need a full ARP monitor cycle to be updated.
2912		 */
2913		if (time_in_range(jiffies,
2914				  slave->jiffies - delta_in_ticks,
2915				  slave->jiffies + 2 * delta_in_ticks))
2916			continue;
2917
2918		/*
2919		 * Backup slave is down if:
2920		 * - No current_arp_slave AND
2921		 * - more than 3*delta since last receive AND
2922		 * - the bond has an IP address
2923		 *
2924		 * Note: a non-null current_arp_slave indicates
2925		 * the curr_active_slave went down and we are
2926		 * searching for a new one; under this condition
2927		 * we only take the curr_active_slave down - this
2928		 * gives each slave a chance to tx/rx traffic
2929		 * before being taken out
2930		 */
2931		if (slave->state == BOND_STATE_BACKUP &&
2932		    !bond->current_arp_slave &&
2933		    !time_in_range(jiffies,
2934			slave_last_rx(bond, slave) - delta_in_ticks,
2935			slave_last_rx(bond, slave) + 3 * delta_in_ticks)) {
2936
2937			slave->new_link = BOND_LINK_DOWN;
2938			commit++;
2939		}
2940
2941		/*
2942		 * Active slave is down if:
2943		 * - more than 2*delta since transmitting OR
2944		 * - (more than 2*delta since receive AND
2945		 *    the bond has an IP address)
2946		 */
2947		trans_start = dev_trans_start(slave->dev);
2948		if ((slave->state == BOND_STATE_ACTIVE) &&
2949		    (!time_in_range(jiffies,
2950			trans_start - delta_in_ticks,
2951			trans_start + 2 * delta_in_ticks) ||
2952		     !time_in_range(jiffies,
2953			slave_last_rx(bond, slave) - delta_in_ticks,
2954			slave_last_rx(bond, slave) + 2 * delta_in_ticks))) {
2955
2956			slave->new_link = BOND_LINK_DOWN;
2957			commit++;
2958		}
2959	}
2960
2961	return commit;
2962}
2963
2964/*
2965 * Called to commit link state changes noted by inspection step of
2966 * active-backup mode ARP monitor.
2967 *
2968 * Called with RTNL and bond->lock for read.
2969 */
2970static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
2971{
2972	struct slave *slave;
2973	int i;
2974	unsigned long trans_start;
2975
2976	bond_for_each_slave(bond, slave, i) {
2977		switch (slave->new_link) {
2978		case BOND_LINK_NOCHANGE:
2979			continue;
2980
2981		case BOND_LINK_UP:
2982			trans_start = dev_trans_start(slave->dev);
2983			if ((!bond->curr_active_slave &&
2984			     time_in_range(jiffies,
2985					   trans_start - delta_in_ticks,
2986					   trans_start + delta_in_ticks)) ||
2987			    bond->curr_active_slave != slave) {
2988				slave->link = BOND_LINK_UP;
2989				bond->current_arp_slave = NULL;
2990
2991				pr_info("%s: link status definitely up for interface %s.\n",
2992					bond->dev->name, slave->dev->name);
2993
2994				if (!bond->curr_active_slave ||
2995				    (slave == bond->primary_slave))
2996					goto do_failover;
2997
2998			}
2999
3000			continue;
3001
3002		case BOND_LINK_DOWN:
3003			if (slave->link_failure_count < UINT_MAX)
3004				slave->link_failure_count++;
3005
3006			slave->link = BOND_LINK_DOWN;
3007			bond_set_slave_inactive_flags(slave);
3008
3009			pr_info("%s: link status definitely down for interface %s, disabling it\n",
3010				bond->dev->name, slave->dev->name);
3011
3012			if (slave == bond->curr_active_slave) {
3013				bond->current_arp_slave = NULL;
3014				goto do_failover;
3015			}
3016
3017			continue;
3018
3019		default:
3020			pr_err("%s: impossible: new_link %d on slave %s\n",
3021			       bond->dev->name, slave->new_link,
3022			       slave->dev->name);
3023			continue;
3024		}
3025
3026do_failover:
3027		ASSERT_RTNL();
3028		write_lock_bh(&bond->curr_slave_lock);
3029		bond_select_active_slave(bond);
3030		write_unlock_bh(&bond->curr_slave_lock);
3031	}
3032
3033	bond_set_carrier(bond);
3034}
3035
3036/*
3037 * Send ARP probes for active-backup mode ARP monitor.
3038 *
3039 * Called with bond->lock held for read.
3040 */
3041static void bond_ab_arp_probe(struct bonding *bond)
3042{
3043	struct slave *slave;
3044	int i;
3045
3046	read_lock(&bond->curr_slave_lock);
3047
3048	if (bond->current_arp_slave && bond->curr_active_slave)
3049		pr_info("PROBE: c_arp %s && cas %s BAD\n",
3050			bond->current_arp_slave->dev->name,
3051			bond->curr_active_slave->dev->name);
3052
3053	if (bond->curr_active_slave) {
3054		bond_arp_send_all(bond, bond->curr_active_slave);
3055		read_unlock(&bond->curr_slave_lock);
3056		return;
3057	}
3058
3059	read_unlock(&bond->curr_slave_lock);
3060
3061	/* if we don't have a curr_active_slave, search for the next available
3062	 * backup slave from the current_arp_slave and make it the candidate
3063	 * for becoming the curr_active_slave
3064	 */
3065
3066	if (!bond->current_arp_slave) {
3067		bond->current_arp_slave = bond->first_slave;
3068		if (!bond->current_arp_slave)
3069			return;
3070	}
3071
3072	bond_set_slave_inactive_flags(bond->current_arp_slave);
3073
3074	/* search for next candidate */
3075	bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) {
3076		if (IS_UP(slave->dev)) {
3077			slave->link = BOND_LINK_BACK;
3078			bond_set_slave_active_flags(slave);
3079			bond_arp_send_all(bond, slave);
3080			slave->jiffies = jiffies;
3081			bond->current_arp_slave = slave;
3082			break;
3083		}
3084
3085		/* if the link state is up at this point, we
3086		 * mark it down - this can happen if we have
3087		 * simultaneous link failures and
3088		 * reselect_active_interface doesn't make this
3089		 * one the current slave so it is still marked
3090		 * up when it is actually down
3091		 */
3092		if (slave->link == BOND_LINK_UP) {
3093			slave->link = BOND_LINK_DOWN;
3094			if (slave->link_failure_count < UINT_MAX)
3095				slave->link_failure_count++;
3096
3097			bond_set_slave_inactive_flags(slave);
3098
3099			pr_info("%s: backup interface %s is now down.\n",
3100				bond->dev->name, slave->dev->name);
3101		}
3102	}
3103}
3104
3105void bond_activebackup_arp_mon(struct work_struct *work)
3106{
3107	struct bonding *bond = container_of(work, struct bonding,
3108					    arp_work.work);
3109	int delta_in_ticks;
3110
3111	read_lock(&bond->lock);
3112
3113	if (bond->kill_timers)
3114		goto out;
3115
3116	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
3117
3118	if (bond->slave_cnt == 0)
3119		goto re_arm;
3120
3121	if (bond->send_grat_arp) {
3122		read_lock(&bond->curr_slave_lock);
3123		bond_send_gratuitous_arp(bond);
3124		read_unlock(&bond->curr_slave_lock);
3125	}
3126
3127	if (bond->send_unsol_na) {
3128		read_lock(&bond->curr_slave_lock);
3129		bond_send_unsolicited_na(bond);
3130		read_unlock(&bond->curr_slave_lock);
3131	}
3132
3133	if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
3134		read_unlock(&bond->lock);
3135		rtnl_lock();
3136		read_lock(&bond->lock);
3137
3138		bond_ab_arp_commit(bond, delta_in_ticks);
3139
3140		read_unlock(&bond->lock);
3141		rtnl_unlock();
3142		read_lock(&bond->lock);
3143	}
3144
3145	bond_ab_arp_probe(bond);
3146
3147re_arm:
3148	if (bond->params.arp_interval)
3149		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
3150out:
3151	read_unlock(&bond->lock);
3152}
3153
3154/*------------------------------ proc/seq_file-------------------------------*/
3155
3156#ifdef CONFIG_PROC_FS
3157
3158static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
3159	__acquires(&dev_base_lock)
3160	__acquires(&bond->lock)
3161{
3162	struct bonding *bond = seq->private;
3163	loff_t off = 0;
3164	struct slave *slave;
3165	int i;
3166
3167	/* make sure the bond won't be taken away */
3168	read_lock(&dev_base_lock);
3169	read_lock(&bond->lock);
3170
3171	if (*pos == 0)
3172		return SEQ_START_TOKEN;
3173
3174	bond_for_each_slave(bond, slave, i) {
3175		if (++off == *pos)
3176			return slave;
3177	}
3178
3179	return NULL;
3180}
3181
3182static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3183{
3184	struct bonding *bond = seq->private;
3185	struct slave *slave = v;
3186
3187	++*pos;
3188	if (v == SEQ_START_TOKEN)
3189		return bond->first_slave;
3190
3191	slave = slave->next;
3192
3193	return (slave == bond->first_slave) ? NULL : slave;
3194}
3195
3196static void bond_info_seq_stop(struct seq_file *seq, void *v)
3197	__releases(&bond->lock)
3198	__releases(&dev_base_lock)
3199{
3200	struct bonding *bond = seq->private;
3201
3202	read_unlock(&bond->lock);
3203	read_unlock(&dev_base_lock);
3204}
3205
3206static void bond_info_show_master(struct seq_file *seq)
3207{
3208	struct bonding *bond = seq->private;
3209	struct slave *curr;
3210	int i;
3211
3212	read_lock(&bond->curr_slave_lock);
3213	curr = bond->curr_active_slave;
3214	read_unlock(&bond->curr_slave_lock);
3215
3216	seq_printf(seq, "Bonding Mode: %s",
3217		   bond_mode_name(bond->params.mode));
3218
3219	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
3220	    bond->params.fail_over_mac)
3221		seq_printf(seq, " (fail_over_mac %s)",
3222		   fail_over_mac_tbl[bond->params.fail_over_mac].modename);
3223
3224	seq_printf(seq, "\n");
3225
3226	if (bond->params.mode == BOND_MODE_XOR ||
3227		bond->params.mode == BOND_MODE_8023AD) {
3228		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
3229			xmit_hashtype_tbl[bond->params.xmit_policy].modename,
3230			bond->params.xmit_policy);
3231	}
3232
3233	if (USES_PRIMARY(bond->params.mode)) {
3234		seq_printf(seq, "Primary Slave: %s",
3235			   (bond->primary_slave) ?
3236			   bond->primary_slave->dev->name : "None");
3237		if (bond->primary_slave)
3238			seq_printf(seq, " (primary_reselect %s)",
3239		   pri_reselect_tbl[bond->params.primary_reselect].modename);
3240
3241		seq_printf(seq, "\nCurrently Active Slave: %s\n",
3242			   (curr) ? curr->dev->name : "None");
3243	}
3244
3245	seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ?
3246		   "up" : "down");
3247	seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon);
3248	seq_printf(seq, "Up Delay (ms): %d\n",
3249		   bond->params.updelay * bond->params.miimon);
3250	seq_printf(seq, "Down Delay (ms): %d\n",
3251		   bond->params.downdelay * bond->params.miimon);
3252
3253
3254	/* ARP information */
3255	if (bond->params.arp_interval > 0) {
3256		int printed = 0;
3257		seq_printf(seq, "ARP Polling Interval (ms): %d\n",
3258				bond->params.arp_interval);
3259
3260		seq_printf(seq, "ARP IP target/s (n.n.n.n form):");
3261
3262		for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
3263			if (!bond->params.arp_targets[i])
3264				break;
3265			if (printed)
3266				seq_printf(seq, ",");
3267			seq_printf(seq, " %pI4", &bond->params.arp_targets[i]);
3268			printed = 1;
3269		}
3270		seq_printf(seq, "\n");
3271	}
3272
3273	if (bond->params.mode == BOND_MODE_8023AD) {
3274		struct ad_info ad_info;
3275
3276		seq_puts(seq, "\n802.3ad info\n");
3277		seq_printf(seq, "LACP rate: %s\n",
3278			   (bond->params.lacp_fast) ? "fast" : "slow");
3279		seq_printf(seq, "Aggregator selection policy (ad_select): %s\n",
3280			   ad_select_tbl[bond->params.ad_select].modename);
3281
3282		if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
3283			seq_printf(seq, "bond %s has no active aggregator\n",
3284				   bond->dev->name);
3285		} else {
3286			seq_printf(seq, "Active Aggregator Info:\n");
3287
3288			seq_printf(seq, "\tAggregator ID: %d\n",
3289				   ad_info.aggregator_id);
3290			seq_printf(seq, "\tNumber of ports: %d\n",
3291				   ad_info.ports);
3292			seq_printf(seq, "\tActor Key: %d\n",
3293				   ad_info.actor_key);
3294			seq_printf(seq, "\tPartner Key: %d\n",
3295				   ad_info.partner_key);
3296			seq_printf(seq, "\tPartner Mac Address: %pM\n",
3297				   ad_info.partner_system);
3298		}
3299	}
3300}
3301
3302static void bond_info_show_slave(struct seq_file *seq,
3303				 const struct slave *slave)
3304{
3305	struct bonding *bond = seq->private;
3306
3307	seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
3308	seq_printf(seq, "MII Status: %s\n",
3309		   (slave->link == BOND_LINK_UP) ?  "up" : "down");
3310	seq_printf(seq, "Link Failure Count: %u\n",
3311		   slave->link_failure_count);
3312
3313	seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr);
3314
3315	if (bond->params.mode == BOND_MODE_8023AD) {
3316		const struct aggregator *agg
3317			= SLAVE_AD_INFO(slave).port.aggregator;
3318
3319		if (agg)
3320			seq_printf(seq, "Aggregator ID: %d\n",
3321				   agg->aggregator_identifier);
3322		else
3323			seq_puts(seq, "Aggregator ID: N/A\n");
3324	}
3325	seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id);
3326}
3327
3328static int bond_info_seq_show(struct seq_file *seq, void *v)
3329{
3330	if (v == SEQ_START_TOKEN) {
3331		seq_printf(seq, "%s\n", version);
3332		bond_info_show_master(seq);
3333	} else
3334		bond_info_show_slave(seq, v);
3335
3336	return 0;
3337}
3338
3339static const struct seq_operations bond_info_seq_ops = {
3340	.start = bond_info_seq_start,
3341	.next  = bond_info_seq_next,
3342	.stop  = bond_info_seq_stop,
3343	.show  = bond_info_seq_show,
3344};
3345
3346static int bond_info_open(struct inode *inode, struct file *file)
3347{
3348	struct seq_file *seq;
3349	struct proc_dir_entry *proc;
3350	int res;
3351
3352	res = seq_open(file, &bond_info_seq_ops);
3353	if (!res) {
3354		/* recover the pointer buried in proc_dir_entry data */
3355		seq = file->private_data;
3356		proc = PDE(inode);
3357		seq->private = proc->data;
3358	}
3359
3360	return res;
3361}
3362
3363static const struct file_operations bond_info_fops = {
3364	.owner   = THIS_MODULE,
3365	.open    = bond_info_open,
3366	.read    = seq_read,
3367	.llseek  = seq_lseek,
3368	.release = seq_release,
3369};
3370
3371static void bond_create_proc_entry(struct bonding *bond)
3372{
3373	struct net_device *bond_dev = bond->dev;
3374	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
3375
3376	if (bn->proc_dir) {
3377		bond->proc_entry = proc_create_data(bond_dev->name,
3378						    S_IRUGO, bn->proc_dir,
3379						    &bond_info_fops, bond);
3380		if (bond->proc_entry == NULL)
3381			pr_warning("Warning: Cannot create /proc/net/%s/%s\n",
3382				   DRV_NAME, bond_dev->name);
3383		else
3384			memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ);
3385	}
3386}
3387
3388static void bond_remove_proc_entry(struct bonding *bond)
3389{
3390	struct net_device *bond_dev = bond->dev;
3391	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
3392
3393	if (bn->proc_dir && bond->proc_entry) {
3394		remove_proc_entry(bond->proc_file_name, bn->proc_dir);
3395		memset(bond->proc_file_name, 0, IFNAMSIZ);
3396		bond->proc_entry = NULL;
3397	}
3398}
3399
3400/* Create the bonding directory under /proc/net, if doesn't exist yet.
3401 * Caller must hold rtnl_lock.
3402 */
3403static void __net_init bond_create_proc_dir(struct bond_net *bn)
3404{
3405	if (!bn->proc_dir) {
3406		bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net);
3407		if (!bn->proc_dir)
3408			pr_warning("Warning: cannot create /proc/net/%s\n",
3409				   DRV_NAME);
3410	}
3411}
3412
3413/* Destroy the bonding directory under /proc/net, if empty.
3414 * Caller must hold rtnl_lock.
3415 */
3416static void __net_exit bond_destroy_proc_dir(struct bond_net *bn)
3417{
3418	if (bn->proc_dir) {
3419		remove_proc_entry(DRV_NAME, bn->net->proc_net);
3420		bn->proc_dir = NULL;
3421	}
3422}
3423
3424#else /* !CONFIG_PROC_FS */
3425
3426static void bond_create_proc_entry(struct bonding *bond)
3427{
3428}
3429
3430static void bond_remove_proc_entry(struct bonding *bond)
3431{
3432}
3433
3434static inline void bond_create_proc_dir(struct bond_net *bn)
3435{
3436}
3437
3438static inline void bond_destroy_proc_dir(struct bond_net *bn)
3439{
3440}
3441
3442#endif /* CONFIG_PROC_FS */
3443
3444
3445/*-------------------------- netdev event handling --------------------------*/
3446
3447/*
3448 * Change device name
3449 */
3450static int bond_event_changename(struct bonding *bond)
3451{
3452	bond_remove_proc_entry(bond);
3453	bond_create_proc_entry(bond);
3454
3455	return NOTIFY_DONE;
3456}
3457
3458static int bond_master_netdev_event(unsigned long event,
3459				    struct net_device *bond_dev)
3460{
3461	struct bonding *event_bond = netdev_priv(bond_dev);
3462
3463	switch (event) {
3464	case NETDEV_CHANGENAME:
3465		return bond_event_changename(event_bond);
3466	default:
3467		break;
3468	}
3469
3470	return NOTIFY_DONE;
3471}
3472
3473static int bond_slave_netdev_event(unsigned long event,
3474				   struct net_device *slave_dev)
3475{
3476	struct net_device *bond_dev = slave_dev->master;
3477	struct bonding *bond = netdev_priv(bond_dev);
3478
3479	switch (event) {
3480	case NETDEV_UNREGISTER:
3481		if (bond_dev) {
3482			if (bond->setup_by_slave)
3483				bond_release_and_destroy(bond_dev, slave_dev);
3484			else
3485				bond_release(bond_dev, slave_dev);
3486		}
3487		break;
3488	case NETDEV_CHANGE:
3489		if (bond->params.mode == BOND_MODE_8023AD || bond_is_lb(bond)) {
3490			struct slave *slave;
3491
3492			slave = bond_get_slave_by_dev(bond, slave_dev);
3493			if (slave) {
3494				u16 old_speed = slave->speed;
3495				u16 old_duplex = slave->duplex;
3496
3497				bond_update_speed_duplex(slave);
3498
3499				if (bond_is_lb(bond))
3500					break;
3501
3502				if (old_speed != slave->speed)
3503					bond_3ad_adapter_speed_changed(slave);
3504				if (old_duplex != slave->duplex)
3505					bond_3ad_adapter_duplex_changed(slave);
3506			}
3507		}
3508
3509		break;
3510	case NETDEV_DOWN:
3511		/*
3512		 * ... Or is it this?
3513		 */
3514		break;
3515	case NETDEV_CHANGEMTU:
3516		/*
3517		 * TODO: Should slaves be allowed to
3518		 * independently alter their MTU?  For
3519		 * an active-backup bond, slaves need
3520		 * not be the same type of device, so
3521		 * MTUs may vary.  For other modes,
3522		 * slaves arguably should have the
3523		 * same MTUs. To do this, we'd need to
3524		 * take over the slave's change_mtu
3525		 * function for the duration of their
3526		 * servitude.
3527		 */
3528		break;
3529	case NETDEV_CHANGENAME:
3530		/*
3531		 * TODO: handle changing the primary's name
3532		 */
3533		break;
3534	case NETDEV_FEAT_CHANGE:
3535		bond_compute_features(bond);
3536		break;
3537	default:
3538		break;
3539	}
3540
3541	return NOTIFY_DONE;
3542}
3543
3544/*
3545 * bond_netdev_event: handle netdev notifier chain events.
3546 *
3547 * This function receives events for the netdev chain.  The caller (an
3548 * ioctl handler calling blocking_notifier_call_chain) holds the necessary
3549 * locks for us to safely manipulate the slave devices (RTNL lock,
3550 * dev_probe_lock).
3551 */
3552static int bond_netdev_event(struct notifier_block *this,
3553			     unsigned long event, void *ptr)
3554{
3555	struct net_device *event_dev = (struct net_device *)ptr;
3556
3557	pr_debug("event_dev: %s, event: %lx\n",
3558		 event_dev ? event_dev->name : "None",
3559		 event);
3560
3561	if (!(event_dev->priv_flags & IFF_BONDING))
3562		return NOTIFY_DONE;
3563
3564	if (event_dev->flags & IFF_MASTER) {
3565		pr_debug("IFF_MASTER\n");
3566		return bond_master_netdev_event(event, event_dev);
3567	}
3568
3569	if (event_dev->flags & IFF_SLAVE) {
3570		pr_debug("IFF_SLAVE\n");
3571		return bond_slave_netdev_event(event, event_dev);
3572	}
3573
3574	return NOTIFY_DONE;
3575}
3576
3577/*
3578 * bond_inetaddr_event: handle inetaddr notifier chain events.
3579 *
3580 * We keep track of device IPs primarily to use as source addresses in
3581 * ARP monitor probes (rather than spewing out broadcasts all the time).
3582 *
3583 * We track one IP for the main device (if it has one), plus one per VLAN.
3584 */
3585static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
3586{
3587	struct in_ifaddr *ifa = ptr;
3588	struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev;
3589	struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id);
3590	struct bonding *bond;
3591	struct vlan_entry *vlan;
3592
3593	list_for_each_entry(bond, &bn->dev_list, bond_list) {
3594		if (bond->dev == event_dev) {
3595			switch (event) {
3596			case NETDEV_UP:
3597				bond->master_ip = ifa->ifa_local;
3598				return NOTIFY_OK;
3599			case NETDEV_DOWN:
3600				bond->master_ip = bond_glean_dev_ip(bond->dev);
3601				return NOTIFY_OK;
3602			default:
3603				return NOTIFY_DONE;
3604			}
3605		}
3606
3607		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
3608			if (!bond->vlgrp)
3609				continue;
3610			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
3611			if (vlan_dev == event_dev) {
3612				switch (event) {
3613				case NETDEV_UP:
3614					vlan->vlan_ip = ifa->ifa_local;
3615					return NOTIFY_OK;
3616				case NETDEV_DOWN:
3617					vlan->vlan_ip =
3618						bond_glean_dev_ip(vlan_dev);
3619					return NOTIFY_OK;
3620				default:
3621					return NOTIFY_DONE;
3622				}
3623			}
3624		}
3625	}
3626	return NOTIFY_DONE;
3627}
3628
3629static struct notifier_block bond_netdev_notifier = {
3630	.notifier_call = bond_netdev_event,
3631};
3632
3633static struct notifier_block bond_inetaddr_notifier = {
3634	.notifier_call = bond_inetaddr_event,
3635};
3636
3637/*-------------------------- Packet type handling ---------------------------*/
3638
3639/* register to receive lacpdus on a bond */
3640static void bond_register_lacpdu(struct bonding *bond)
3641{
3642	struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type);
3643
3644	/* initialize packet type */
3645	pk_type->type = PKT_TYPE_LACPDU;
3646	pk_type->dev = bond->dev;
3647	pk_type->func = bond_3ad_lacpdu_recv;
3648
3649	dev_add_pack(pk_type);
3650}
3651
3652/* unregister to receive lacpdus on a bond */
3653static void bond_unregister_lacpdu(struct bonding *bond)
3654{
3655	dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
3656}
3657
3658void bond_register_arp(struct bonding *bond)
3659{
3660	struct packet_type *pt = &bond->arp_mon_pt;
3661
3662	if (pt->type)
3663		return;
3664
3665	pt->type = htons(ETH_P_ARP);
3666	pt->dev = bond->dev;
3667	pt->func = bond_arp_rcv;
3668	dev_add_pack(pt);
3669}
3670
3671void bond_unregister_arp(struct bonding *bond)
3672{
3673	struct packet_type *pt = &bond->arp_mon_pt;
3674
3675	dev_remove_pack(pt);
3676	pt->type = 0;
3677}
3678
3679/*---------------------------- Hashing Policies -----------------------------*/
3680
3681/*
3682 * Hash for the output device based upon layer 2 and layer 3 data. If
3683 * the packet is not IP mimic bond_xmit_hash_policy_l2()
3684 */
3685static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
3686{
3687	struct ethhdr *data = (struct ethhdr *)skb->data;
3688	struct iphdr *iph = ip_hdr(skb);
3689
3690	if (skb->protocol == htons(ETH_P_IP)) {
3691		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
3692			(data->h_dest[5] ^ data->h_source[5])) % count;
3693	}
3694
3695	return (data->h_dest[5] ^ data->h_source[5]) % count;
3696}
3697
3698/*
3699 * Hash for the output device based upon layer 3 and layer 4 data. If
3700 * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
3701 * altogether not IP, mimic bond_xmit_hash_policy_l2()
3702 */
3703static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
3704{
3705	struct ethhdr *data = (struct ethhdr *)skb->data;
3706	struct iphdr *iph = ip_hdr(skb);
3707	__be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
3708	int layer4_xor = 0;
3709
3710	if (skb->protocol == htons(ETH_P_IP)) {
3711		if (!(iph->frag_off & htons(IP_MF|IP_OFFSET)) &&
3712		    (iph->protocol == IPPROTO_TCP ||
3713		     iph->protocol == IPPROTO_UDP)) {
3714			layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
3715		}
3716		return (layer4_xor ^
3717			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
3718
3719	}
3720
3721	return (data->h_dest[5] ^ data->h_source[5]) % count;
3722}
3723
3724/*
3725 * Hash for the output device based upon layer 2 data
3726 */
3727static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
3728{
3729	struct ethhdr *data = (struct ethhdr *)skb->data;
3730
3731	return (data->h_dest[5] ^ data->h_source[5]) % count;
3732}
3733
3734/*-------------------------- Device entry points ----------------------------*/
3735
3736static int bond_open(struct net_device *bond_dev)
3737{
3738	struct bonding *bond = netdev_priv(bond_dev);
3739
3740	bond->kill_timers = 0;
3741
3742	if (bond_is_lb(bond)) {
3743		/* bond_alb_initialize must be called before the timer
3744		 * is started.
3745		 */
3746		if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) {
3747			/* something went wrong - fail the open operation */
3748			return -ENOMEM;
3749		}
3750
3751		INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
3752		queue_delayed_work(bond->wq, &bond->alb_work, 0);
3753	}
3754
3755	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
3756		INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
3757		queue_delayed_work(bond->wq, &bond->mii_work, 0);
3758	}
3759
3760	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
3761		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
3762			INIT_DELAYED_WORK(&bond->arp_work,
3763					  bond_activebackup_arp_mon);
3764		else
3765			INIT_DELAYED_WORK(&bond->arp_work,
3766					  bond_loadbalance_arp_mon);
3767
3768		queue_delayed_work(bond->wq, &bond->arp_work, 0);
3769		if (bond->params.arp_validate)
3770			bond_register_arp(bond);
3771	}
3772
3773	if (bond->params.mode == BOND_MODE_8023AD) {
3774		INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
3775		queue_delayed_work(bond->wq, &bond->ad_work, 0);
3776		/* register to receive LACPDUs */
3777		bond_register_lacpdu(bond);
3778		bond_3ad_initiate_agg_selection(bond, 1);
3779	}
3780
3781	return 0;
3782}
3783
3784static int bond_close(struct net_device *bond_dev)
3785{
3786	struct bonding *bond = netdev_priv(bond_dev);
3787
3788	if (bond->params.mode == BOND_MODE_8023AD) {
3789		/* Unregister the receive of LACPDUs */
3790		bond_unregister_lacpdu(bond);
3791	}
3792
3793	if (bond->params.arp_validate)
3794		bond_unregister_arp(bond);
3795
3796	write_lock_bh(&bond->lock);
3797
3798	bond->send_grat_arp = 0;
3799	bond->send_unsol_na = 0;
3800
3801	/* signal timers not to re-arm */
3802	bond->kill_timers = 1;
3803
3804	write_unlock_bh(&bond->lock);
3805
3806	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
3807		cancel_delayed_work(&bond->mii_work);
3808	}
3809
3810	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
3811		cancel_delayed_work(&bond->arp_work);
3812	}
3813
3814	switch (bond->params.mode) {
3815	case BOND_MODE_8023AD:
3816		cancel_delayed_work(&bond->ad_work);
3817		break;
3818	case BOND_MODE_TLB:
3819	case BOND_MODE_ALB:
3820		cancel_delayed_work(&bond->alb_work);
3821		break;
3822	default:
3823		break;
3824	}
3825
3826
3827	if (bond_is_lb(bond)) {
3828		/* Must be called only after all
3829		 * slaves have been released
3830		 */
3831		bond_alb_deinitialize(bond);
3832	}
3833
3834	return 0;
3835}
3836
3837static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
3838						struct rtnl_link_stats64 *stats)
3839{
3840	struct bonding *bond = netdev_priv(bond_dev);
3841	struct rtnl_link_stats64 temp;
3842	struct slave *slave;
3843	int i;
3844
3845	memset(stats, 0, sizeof(*stats));
3846
3847	read_lock_bh(&bond->lock);
3848
3849	bond_for_each_slave(bond, slave, i) {
3850		const struct rtnl_link_stats64 *sstats =
3851			dev_get_stats(slave->dev, &temp);
3852
3853		stats->rx_packets += sstats->rx_packets;
3854		stats->rx_bytes += sstats->rx_bytes;
3855		stats->rx_errors += sstats->rx_errors;
3856		stats->rx_dropped += sstats->rx_dropped;
3857
3858		stats->tx_packets += sstats->tx_packets;
3859		stats->tx_bytes += sstats->tx_bytes;
3860		stats->tx_errors += sstats->tx_errors;
3861		stats->tx_dropped += sstats->tx_dropped;
3862
3863		stats->multicast += sstats->multicast;
3864		stats->collisions += sstats->collisions;
3865
3866		stats->rx_length_errors += sstats->rx_length_errors;
3867		stats->rx_over_errors += sstats->rx_over_errors;
3868		stats->rx_crc_errors += sstats->rx_crc_errors;
3869		stats->rx_frame_errors += sstats->rx_frame_errors;
3870		stats->rx_fifo_errors += sstats->rx_fifo_errors;
3871		stats->rx_missed_errors += sstats->rx_missed_errors;
3872
3873		stats->tx_aborted_errors += sstats->tx_aborted_errors;
3874		stats->tx_carrier_errors += sstats->tx_carrier_errors;
3875		stats->tx_fifo_errors += sstats->tx_fifo_errors;
3876		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
3877		stats->tx_window_errors += sstats->tx_window_errors;
3878	}
3879
3880	read_unlock_bh(&bond->lock);
3881
3882	return stats;
3883}
3884
3885static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
3886{
3887	struct net_device *slave_dev = NULL;
3888	struct ifbond k_binfo;
3889	struct ifbond __user *u_binfo = NULL;
3890	struct ifslave k_sinfo;
3891	struct ifslave __user *u_sinfo = NULL;
3892	struct mii_ioctl_data *mii = NULL;
3893	int res = 0;
3894
3895	pr_debug("bond_ioctl: master=%s, cmd=%d\n", bond_dev->name, cmd);
3896
3897	switch (cmd) {
3898	case SIOCGMIIPHY:
3899		mii = if_mii(ifr);
3900		if (!mii)
3901			return -EINVAL;
3902
3903		mii->phy_id = 0;
3904		/* Fall Through */
3905	case SIOCGMIIREG:
3906		/*
3907		 * We do this again just in case we were called by SIOCGMIIREG
3908		 * instead of SIOCGMIIPHY.
3909		 */
3910		mii = if_mii(ifr);
3911		if (!mii)
3912			return -EINVAL;
3913
3914
3915		if (mii->reg_num == 1) {
3916			struct bonding *bond = netdev_priv(bond_dev);
3917			mii->val_out = 0;
3918			read_lock(&bond->lock);
3919			read_lock(&bond->curr_slave_lock);
3920			if (netif_carrier_ok(bond->dev))
3921				mii->val_out = BMSR_LSTATUS;
3922
3923			read_unlock(&bond->curr_slave_lock);
3924			read_unlock(&bond->lock);
3925		}
3926
3927		return 0;
3928	case BOND_INFO_QUERY_OLD:
3929	case SIOCBONDINFOQUERY:
3930		u_binfo = (struct ifbond __user *)ifr->ifr_data;
3931
3932		if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond)))
3933			return -EFAULT;
3934
3935		res = bond_info_query(bond_dev, &k_binfo);
3936		if (res == 0 &&
3937		    copy_to_user(u_binfo, &k_binfo, sizeof(ifbond)))
3938			return -EFAULT;
3939
3940		return res;
3941	case BOND_SLAVE_INFO_QUERY_OLD:
3942	case SIOCBONDSLAVEINFOQUERY:
3943		u_sinfo = (struct ifslave __user *)ifr->ifr_data;
3944
3945		if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave)))
3946			return -EFAULT;
3947
3948		res = bond_slave_info_query(bond_dev, &k_sinfo);
3949		if (res == 0 &&
3950		    copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave)))
3951			return -EFAULT;
3952
3953		return res;
3954	default:
3955		/* Go on */
3956		break;
3957	}
3958
3959	if (!capable(CAP_NET_ADMIN))
3960		return -EPERM;
3961
3962	slave_dev = dev_get_by_name(dev_net(bond_dev), ifr->ifr_slave);
3963
3964	pr_debug("slave_dev=%p:\n", slave_dev);
3965
3966	if (!slave_dev)
3967		res = -ENODEV;
3968	else {
3969		pr_debug("slave_dev->name=%s:\n", slave_dev->name);
3970		switch (cmd) {
3971		case BOND_ENSLAVE_OLD:
3972		case SIOCBONDENSLAVE:
3973			res = bond_enslave(bond_dev, slave_dev);
3974			break;
3975		case BOND_RELEASE_OLD:
3976		case SIOCBONDRELEASE:
3977			res = bond_release(bond_dev, slave_dev);
3978			break;
3979		case BOND_SETHWADDR_OLD:
3980		case SIOCBONDSETHWADDR:
3981			res = bond_sethwaddr(bond_dev, slave_dev);
3982			break;
3983		case BOND_CHANGE_ACTIVE_OLD:
3984		case SIOCBONDCHANGEACTIVE:
3985			res = bond_ioctl_change_active(bond_dev, slave_dev);
3986			break;
3987		default:
3988			res = -EOPNOTSUPP;
3989		}
3990
3991		dev_put(slave_dev);
3992	}
3993
3994	return res;
3995}
3996
3997static bool bond_addr_in_mc_list(unsigned char *addr,
3998				 struct netdev_hw_addr_list *list,
3999				 int addrlen)
4000{
4001	struct netdev_hw_addr *ha;
4002
4003	netdev_hw_addr_list_for_each(ha, list)
4004		if (!memcmp(ha->addr, addr, addrlen))
4005			return true;
4006
4007	return false;
4008}
4009
4010static void bond_set_multicast_list(struct net_device *bond_dev)
4011{
4012	struct bonding *bond = netdev_priv(bond_dev);
4013	struct netdev_hw_addr *ha;
4014	bool found;
4015
4016	/*
4017	 * Do promisc before checking multicast_mode
4018	 */
4019	if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC))
4020		bond_set_promiscuity(bond, 1);
4021
4022
4023	if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC))
4024		bond_set_promiscuity(bond, -1);
4025
4026
4027	/* set allmulti flag to slaves */
4028	if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI))
4029		bond_set_allmulti(bond, 1);
4030
4031
4032	if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI))
4033		bond_set_allmulti(bond, -1);
4034
4035
4036	read_lock(&bond->lock);
4037
4038	bond->flags = bond_dev->flags;
4039
4040	/* looking for addresses to add to slaves' mc list */
4041	netdev_for_each_mc_addr(ha, bond_dev) {
4042		found = bond_addr_in_mc_list(ha->addr, &bond->mc_list,
4043					     bond_dev->addr_len);
4044		if (!found)
4045			bond_mc_add(bond, ha->addr);
4046	}
4047
4048	/* looking for addresses to delete from slaves' list */
4049	netdev_hw_addr_list_for_each(ha, &bond->mc_list) {
4050		found = bond_addr_in_mc_list(ha->addr, &bond_dev->mc,
4051					     bond_dev->addr_len);
4052		if (!found)
4053			bond_mc_del(bond, ha->addr);
4054	}
4055
4056	/* save master's multicast list */
4057	__hw_addr_flush(&bond->mc_list);
4058	__hw_addr_add_multiple(&bond->mc_list, &bond_dev->mc,
4059			       bond_dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST);
4060
4061	read_unlock(&bond->lock);
4062}
4063
4064static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms)
4065{
4066	struct bonding *bond = netdev_priv(dev);
4067	struct slave *slave = bond->first_slave;
4068
4069	if (slave) {
4070		const struct net_device_ops *slave_ops
4071			= slave->dev->netdev_ops;
4072		if (slave_ops->ndo_neigh_setup)
4073			return slave_ops->ndo_neigh_setup(slave->dev, parms);
4074	}
4075	return 0;
4076}
4077
4078/*
4079 * Change the MTU of all of a master's slaves to match the master
4080 */
4081static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
4082{
4083	struct bonding *bond = netdev_priv(bond_dev);
4084	struct slave *slave, *stop_at;
4085	int res = 0;
4086	int i;
4087
4088	pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond,
4089		 (bond_dev ? bond_dev->name : "None"), new_mtu);
4090
4091	/* Can't hold bond->lock with bh disabled here since
4092	 * some base drivers panic. On the other hand we can't
4093	 * hold bond->lock without bh disabled because we'll
4094	 * deadlock. The only solution is to rely on the fact
4095	 * that we're under rtnl_lock here, and the slaves
4096	 * list won't change. This doesn't solve the problem
4097	 * of setting the slave's MTU while it is
4098	 * transmitting, but the assumption is that the base
4099	 * driver can handle that.
4100	 *
4101	 * TODO: figure out a way to safely iterate the slaves
4102	 * list, but without holding a lock around the actual
4103	 * call to the base driver.
4104	 */
4105
4106	bond_for_each_slave(bond, slave, i) {
4107		pr_debug("s %p s->p %p c_m %p\n",
4108			 slave,
4109			 slave->prev,
4110			 slave->dev->netdev_ops->ndo_change_mtu);
4111
4112		res = dev_set_mtu(slave->dev, new_mtu);
4113
4114		if (res) {
4115			/* If we failed to set the slave's mtu to the new value
4116			 * we must abort the operation even in ACTIVE_BACKUP
4117			 * mode, because if we allow the backup slaves to have
4118			 * different mtu values than the active slave we'll
4119			 * need to change their mtu when doing a failover. That
4120			 * means changing their mtu from timer context, which
4121			 * is probably not a good idea.
4122			 */
4123			pr_debug("err %d %s\n", res, slave->dev->name);
4124			goto unwind;
4125		}
4126	}
4127
4128	bond_dev->mtu = new_mtu;
4129
4130	return 0;
4131
4132unwind:
4133	/* unwind from head to the slave that failed */
4134	stop_at = slave;
4135	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4136		int tmp_res;
4137
4138		tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu);
4139		if (tmp_res) {
4140			pr_debug("unwind err %d dev %s\n",
4141				 tmp_res, slave->dev->name);
4142		}
4143	}
4144
4145	return res;
4146}
4147
4148/*
4149 * Change HW address
4150 *
4151 * Note that many devices must be down to change the HW address, and
4152 * downing the master releases all slaves.  We can make bonds full of
4153 * bonding devices to test this, however.
4154 */
4155static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
4156{
4157	struct bonding *bond = netdev_priv(bond_dev);
4158	struct sockaddr *sa = addr, tmp_sa;
4159	struct slave *slave, *stop_at;
4160	int res = 0;
4161	int i;
4162
4163	if (bond->params.mode == BOND_MODE_ALB)
4164		return bond_alb_set_mac_address(bond_dev, addr);
4165
4166
4167	pr_debug("bond=%p, name=%s\n",
4168		 bond, bond_dev ? bond_dev->name : "None");
4169
4170	/*
4171	 * If fail_over_mac is set to active, do nothing and return
4172	 * success.  Returning an error causes ifenslave to fail.
4173	 */
4174	if (bond->params.fail_over_mac == BOND_FOM_ACTIVE)
4175		return 0;
4176
4177	if (!is_valid_ether_addr(sa->sa_data))
4178		return -EADDRNOTAVAIL;
4179
4180	/* Can't hold bond->lock with bh disabled here since
4181	 * some base drivers panic. On the other hand we can't
4182	 * hold bond->lock without bh disabled because we'll
4183	 * deadlock. The only solution is to rely on the fact
4184	 * that we're under rtnl_lock here, and the slaves
4185	 * list won't change. This doesn't solve the problem
4186	 * of setting the slave's hw address while it is
4187	 * transmitting, but the assumption is that the base
4188	 * driver can handle that.
4189	 *
4190	 * TODO: figure out a way to safely iterate the slaves
4191	 * list, but without holding a lock around the actual
4192	 * call to the base driver.
4193	 */
4194
4195	bond_for_each_slave(bond, slave, i) {
4196		const struct net_device_ops *slave_ops = slave->dev->netdev_ops;
4197		pr_debug("slave %p %s\n", slave, slave->dev->name);
4198
4199		if (slave_ops->ndo_set_mac_address == NULL) {
4200			res = -EOPNOTSUPP;
4201			pr_debug("EOPNOTSUPP %s\n", slave->dev->name);
4202			goto unwind;
4203		}
4204
4205		res = dev_set_mac_address(slave->dev, addr);
4206		if (res) {
4207			/* TODO: consider downing the slave
4208			 * and retry ?
4209			 * User should expect communications
4210			 * breakage anyway until ARP finish
4211			 * updating, so...
4212			 */
4213			pr_debug("err %d %s\n", res, slave->dev->name);
4214			goto unwind;
4215		}
4216	}
4217
4218	/* success */
4219	memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len);
4220	return 0;
4221
4222unwind:
4223	memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
4224	tmp_sa.sa_family = bond_dev->type;
4225
4226	/* unwind from head to the slave that failed */
4227	stop_at = slave;
4228	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4229		int tmp_res;
4230
4231		tmp_res = dev_set_mac_address(slave->dev, &tmp_sa);
4232		if (tmp_res) {
4233			pr_debug("unwind err %d dev %s\n",
4234				 tmp_res, slave->dev->name);
4235		}
4236	}
4237
4238	return res;
4239}
4240
4241static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
4242{
4243	struct bonding *bond = netdev_priv(bond_dev);
4244	struct slave *slave, *start_at;
4245	int i, slave_no, res = 1;
4246	struct iphdr *iph = ip_hdr(skb);
4247
4248	read_lock(&bond->lock);
4249
4250	if (!BOND_IS_OK(bond))
4251		goto out;
4252	/*
4253	 * Start with the curr_active_slave that joined the bond as the
4254	 * default for sending IGMP traffic.  For failover purposes one
4255	 * needs to maintain some consistency for the interface that will
4256	 * send the join/membership reports.  The curr_active_slave found
4257	 * will send all of this type of traffic.
4258	 */
4259	if ((iph->protocol == IPPROTO_IGMP) &&
4260	    (skb->protocol == htons(ETH_P_IP))) {
4261
4262		read_lock(&bond->curr_slave_lock);
4263		slave = bond->curr_active_slave;
4264		read_unlock(&bond->curr_slave_lock);
4265
4266		if (!slave)
4267			goto out;
4268	} else {
4269		/*
4270		 * Concurrent TX may collide on rr_tx_counter; we accept
4271		 * that as being rare enough not to justify using an
4272		 * atomic op here.
4273		 */
4274		slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
4275
4276		bond_for_each_slave(bond, slave, i) {
4277			slave_no--;
4278			if (slave_no < 0)
4279				break;
4280		}
4281	}
4282
4283	start_at = slave;
4284	bond_for_each_slave_from(bond, slave, i, start_at) {
4285		if (IS_UP(slave->dev) &&
4286		    (slave->link == BOND_LINK_UP) &&
4287		    (slave->state == BOND_STATE_ACTIVE)) {
4288			res = bond_dev_queue_xmit(bond, skb, slave->dev);
4289			break;
4290		}
4291	}
4292
4293out:
4294	if (res) {
4295		/* no suitable interface, frame not sent */
4296		dev_kfree_skb(skb);
4297	}
4298	read_unlock(&bond->lock);
4299	return NETDEV_TX_OK;
4300}
4301
4302
4303/*
4304 * in active-backup mode, we know that bond->curr_active_slave is always valid if
4305 * the bond has a usable interface.
4306 */
4307static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
4308{
4309	struct bonding *bond = netdev_priv(bond_dev);
4310	int res = 1;
4311
4312	read_lock(&bond->lock);
4313	read_lock(&bond->curr_slave_lock);
4314
4315	if (!BOND_IS_OK(bond))
4316		goto out;
4317
4318	if (!bond->curr_active_slave)
4319		goto out;
4320
4321	res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
4322
4323out:
4324	if (res)
4325		/* no suitable interface, frame not sent */
4326		dev_kfree_skb(skb);
4327
4328	read_unlock(&bond->curr_slave_lock);
4329	read_unlock(&bond->lock);
4330	return NETDEV_TX_OK;
4331}
4332
4333/*
4334 * In bond_xmit_xor() , we determine the output device by using a pre-
4335 * determined xmit_hash_policy(), If the selected device is not enabled,
4336 * find the next active slave.
4337 */
4338static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
4339{
4340	struct bonding *bond = netdev_priv(bond_dev);
4341	struct slave *slave, *start_at;
4342	int slave_no;
4343	int i;
4344	int res = 1;
4345
4346	read_lock(&bond->lock);
4347
4348	if (!BOND_IS_OK(bond))
4349		goto out;
4350
4351	slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt);
4352
4353	bond_for_each_slave(bond, slave, i) {
4354		slave_no--;
4355		if (slave_no < 0)
4356			break;
4357	}
4358
4359	start_at = slave;
4360
4361	bond_for_each_slave_from(bond, slave, i, start_at) {
4362		if (IS_UP(slave->dev) &&
4363		    (slave->link == BOND_LINK_UP) &&
4364		    (slave->state == BOND_STATE_ACTIVE)) {
4365			res = bond_dev_queue_xmit(bond, skb, slave->dev);
4366			break;
4367		}
4368	}
4369
4370out:
4371	if (res) {
4372		/* no suitable interface, frame not sent */
4373		dev_kfree_skb(skb);
4374	}
4375	read_unlock(&bond->lock);
4376	return NETDEV_TX_OK;
4377}
4378
4379/*
4380 * in broadcast mode, we send everything to all usable interfaces.
4381 */
4382static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
4383{
4384	struct bonding *bond = netdev_priv(bond_dev);
4385	struct slave *slave, *start_at;
4386	struct net_device *tx_dev = NULL;
4387	int i;
4388	int res = 1;
4389
4390	read_lock(&bond->lock);
4391
4392	if (!BOND_IS_OK(bond))
4393		goto out;
4394
4395	read_lock(&bond->curr_slave_lock);
4396	start_at = bond->curr_active_slave;
4397	read_unlock(&bond->curr_slave_lock);
4398
4399	if (!start_at)
4400		goto out;
4401
4402	bond_for_each_slave_from(bond, slave, i, start_at) {
4403		if (IS_UP(slave->dev) &&
4404		    (slave->link == BOND_LINK_UP) &&
4405		    (slave->state == BOND_STATE_ACTIVE)) {
4406			if (tx_dev) {
4407				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
4408				if (!skb2) {
4409					pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n",
4410					       bond_dev->name);
4411					continue;
4412				}
4413
4414				res = bond_dev_queue_xmit(bond, skb2, tx_dev);
4415				if (res) {
4416					dev_kfree_skb(skb2);
4417					continue;
4418				}
4419			}
4420			tx_dev = slave->dev;
4421		}
4422	}
4423
4424	if (tx_dev)
4425		res = bond_dev_queue_xmit(bond, skb, tx_dev);
4426
4427out:
4428	if (res)
4429		/* no suitable interface, frame not sent */
4430		dev_kfree_skb(skb);
4431
4432	/* frame sent to all suitable interfaces */
4433	read_unlock(&bond->lock);
4434	return NETDEV_TX_OK;
4435}
4436
4437/*------------------------- Device initialization ---------------------------*/
4438
4439static void bond_set_xmit_hash_policy(struct bonding *bond)
4440{
4441	switch (bond->params.xmit_policy) {
4442	case BOND_XMIT_POLICY_LAYER23:
4443		bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
4444		break;
4445	case BOND_XMIT_POLICY_LAYER34:
4446		bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
4447		break;
4448	case BOND_XMIT_POLICY_LAYER2:
4449	default:
4450		bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
4451		break;
4452	}
4453}
4454
4455/*
4456 * Lookup the slave that corresponds to a qid
4457 */
4458static inline int bond_slave_override(struct bonding *bond,
4459				      struct sk_buff *skb)
4460{
4461	int i, res = 1;
4462	struct slave *slave = NULL;
4463	struct slave *check_slave;
4464
4465	read_lock(&bond->lock);
4466
4467	if (!BOND_IS_OK(bond) || !skb->queue_mapping)
4468		goto out;
4469
4470	/* Find out if any slaves have the same mapping as this skb. */
4471	bond_for_each_slave(bond, check_slave, i) {
4472		if (check_slave->queue_id == skb->queue_mapping) {
4473			slave = check_slave;
4474			break;
4475		}
4476	}
4477
4478	/* If the slave isn't UP, use default transmit policy. */
4479	if (slave && slave->queue_id && IS_UP(slave->dev) &&
4480	    (slave->link == BOND_LINK_UP)) {
4481		res = bond_dev_queue_xmit(bond, skb, slave->dev);
4482	}
4483
4484out:
4485	read_unlock(&bond->lock);
4486	return res;
4487}
4488
4489static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
4490{
4491	/*
4492	 * This helper function exists to help dev_pick_tx get the correct
4493	 * destination queue.  Using a helper function skips the a call to
4494	 * skb_tx_hash and will put the skbs in the queue we expect on their
4495	 * way down to the bonding driver.
4496	 */
4497	return skb->queue_mapping;
4498}
4499
4500static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
4501{
4502	struct bonding *bond = netdev_priv(dev);
4503
4504	if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
4505		if (!bond_slave_override(bond, skb))
4506			return NETDEV_TX_OK;
4507	}
4508
4509	switch (bond->params.mode) {
4510	case BOND_MODE_ROUNDROBIN:
4511		return bond_xmit_roundrobin(skb, dev);
4512	case BOND_MODE_ACTIVEBACKUP:
4513		return bond_xmit_activebackup(skb, dev);
4514	case BOND_MODE_XOR:
4515		return bond_xmit_xor(skb, dev);
4516	case BOND_MODE_BROADCAST:
4517		return bond_xmit_broadcast(skb, dev);
4518	case BOND_MODE_8023AD:
4519		return bond_3ad_xmit_xor(skb, dev);
4520	case BOND_MODE_ALB:
4521	case BOND_MODE_TLB:
4522		return bond_alb_xmit(skb, dev);
4523	default:
4524		/* Should never happen, mode already checked */
4525		pr_err("%s: Error: Unknown bonding mode %d\n",
4526		       dev->name, bond->params.mode);
4527		WARN_ON_ONCE(1);
4528		dev_kfree_skb(skb);
4529		return NETDEV_TX_OK;
4530	}
4531}
4532
4533
4534/*
4535 * set bond mode specific net device operations
4536 */
4537void bond_set_mode_ops(struct bonding *bond, int mode)
4538{
4539	struct net_device *bond_dev = bond->dev;
4540
4541	switch (mode) {
4542	case BOND_MODE_ROUNDROBIN:
4543		break;
4544	case BOND_MODE_ACTIVEBACKUP:
4545		break;
4546	case BOND_MODE_XOR:
4547		bond_set_xmit_hash_policy(bond);
4548		break;
4549	case BOND_MODE_BROADCAST:
4550		break;
4551	case BOND_MODE_8023AD:
4552		bond_set_master_3ad_flags(bond);
4553		bond_set_xmit_hash_policy(bond);
4554		break;
4555	case BOND_MODE_ALB:
4556		bond_set_master_alb_flags(bond);
4557		/* FALLTHRU */
4558	case BOND_MODE_TLB:
4559		break;
4560	default:
4561		/* Should never happen, mode already checked */
4562		pr_err("%s: Error: Unknown bonding mode %d\n",
4563		       bond_dev->name, mode);
4564		break;
4565	}
4566}
4567
4568static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
4569				    struct ethtool_drvinfo *drvinfo)
4570{
4571	strncpy(drvinfo->driver, DRV_NAME, 32);
4572	strncpy(drvinfo->version, DRV_VERSION, 32);
4573	snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION);
4574}
4575
4576static const struct ethtool_ops bond_ethtool_ops = {
4577	.get_drvinfo		= bond_ethtool_get_drvinfo,
4578	.get_link		= ethtool_op_get_link,
4579	.get_tx_csum		= ethtool_op_get_tx_csum,
4580	.get_sg			= ethtool_op_get_sg,
4581	.get_tso		= ethtool_op_get_tso,
4582	.get_ufo		= ethtool_op_get_ufo,
4583	.get_flags		= ethtool_op_get_flags,
4584};
4585
4586static const struct net_device_ops bond_netdev_ops = {
4587	.ndo_init		= bond_init,
4588	.ndo_uninit		= bond_uninit,
4589	.ndo_open		= bond_open,
4590	.ndo_stop		= bond_close,
4591	.ndo_start_xmit		= bond_start_xmit,
4592	.ndo_select_queue	= bond_select_queue,
4593	.ndo_get_stats64	= bond_get_stats,
4594	.ndo_do_ioctl		= bond_do_ioctl,
4595	.ndo_set_multicast_list	= bond_set_multicast_list,
4596	.ndo_change_mtu		= bond_change_mtu,
4597	.ndo_set_mac_address 	= bond_set_mac_address,
4598	.ndo_neigh_setup	= bond_neigh_setup,
4599	.ndo_vlan_rx_register	= bond_vlan_rx_register,
4600	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid,
4601	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,
4602#ifdef CONFIG_NET_POLL_CONTROLLER
4603	.ndo_netpoll_cleanup	= bond_netpoll_cleanup,
4604	.ndo_poll_controller	= bond_poll_controller,
4605#endif
4606};
4607
4608static void bond_destructor(struct net_device *bond_dev)
4609{
4610	struct bonding *bond = netdev_priv(bond_dev);
4611	if (bond->wq)
4612		destroy_workqueue(bond->wq);
4613	free_netdev(bond_dev);
4614}
4615
4616static void bond_setup(struct net_device *bond_dev)
4617{
4618	struct bonding *bond = netdev_priv(bond_dev);
4619
4620	/* initialize rwlocks */
4621	rwlock_init(&bond->lock);
4622	rwlock_init(&bond->curr_slave_lock);
4623
4624	bond->params = bonding_defaults;
4625
4626	/* Initialize pointers */
4627	bond->dev = bond_dev;
4628	INIT_LIST_HEAD(&bond->vlan_list);
4629
4630	/* Initialize the device entry points */
4631	ether_setup(bond_dev);
4632	bond_dev->netdev_ops = &bond_netdev_ops;
4633	bond_dev->ethtool_ops = &bond_ethtool_ops;
4634	bond_set_mode_ops(bond, bond->params.mode);
4635
4636	bond_dev->destructor = bond_destructor;
4637
4638	/* Initialize the device options */
4639	bond_dev->tx_queue_len = 0;
4640	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
4641	bond_dev->priv_flags |= IFF_BONDING;
4642	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
4643
4644	if (bond->params.arp_interval)
4645		bond_dev->priv_flags |= IFF_MASTER_ARPMON;
4646
4647	/* At first, we block adding VLANs. That's the only way to
4648	 * prevent problems that occur when adding VLANs over an
4649	 * empty bond. The block will be removed once non-challenged
4650	 * slaves are enslaved.
4651	 */
4652	bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
4653
4654	/* don't acquire bond device's netif_tx_lock when
4655	 * transmitting */
4656	bond_dev->features |= NETIF_F_LLTX;
4657
4658	/* By default, we declare the bond to be fully
4659	 * VLAN hardware accelerated capable. Special
4660	 * care is taken in the various xmit functions
4661	 * when there are slaves that are not hw accel
4662	 * capable
4663	 */
4664	bond_dev->features |= (NETIF_F_HW_VLAN_TX |
4665			       NETIF_F_HW_VLAN_RX |
4666			       NETIF_F_HW_VLAN_FILTER);
4667
4668}
4669
4670static void bond_work_cancel_all(struct bonding *bond)
4671{
4672	write_lock_bh(&bond->lock);
4673	bond->kill_timers = 1;
4674	write_unlock_bh(&bond->lock);
4675
4676	if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
4677		cancel_delayed_work(&bond->mii_work);
4678
4679	if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
4680		cancel_delayed_work(&bond->arp_work);
4681
4682	if (bond->params.mode == BOND_MODE_ALB &&
4683	    delayed_work_pending(&bond->alb_work))
4684		cancel_delayed_work(&bond->alb_work);
4685
4686	if (bond->params.mode == BOND_MODE_8023AD &&
4687	    delayed_work_pending(&bond->ad_work))
4688		cancel_delayed_work(&bond->ad_work);
4689}
4690
4691/*
4692* Destroy a bonding device.
4693* Must be under rtnl_lock when this function is called.
4694*/
4695static void bond_uninit(struct net_device *bond_dev)
4696{
4697	struct bonding *bond = netdev_priv(bond_dev);
4698	struct vlan_entry *vlan, *tmp;
4699
4700	bond_netpoll_cleanup(bond_dev);
4701
4702	/* Release the bonded slaves */
4703	bond_release_all(bond_dev);
4704
4705	list_del(&bond->bond_list);
4706
4707	bond_work_cancel_all(bond);
4708
4709	bond_remove_proc_entry(bond);
4710
4711	__hw_addr_flush(&bond->mc_list);
4712
4713	list_for_each_entry_safe(vlan, tmp, &bond->vlan_list, vlan_list) {
4714		list_del(&vlan->vlan_list);
4715		kfree(vlan);
4716	}
4717}
4718
4719/*------------------------- Module initialization ---------------------------*/
4720
4721/*
4722 * Convert string input module parms.  Accept either the
4723 * number of the mode or its string name.  A bit complicated because
4724 * some mode names are substrings of other names, and calls from sysfs
4725 * may have whitespace in the name (trailing newlines, for example).
4726 */
4727int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl)
4728{
4729	int modeint = -1, i, rv;
4730	char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, };
4731
4732	for (p = (char *)buf; *p; p++)
4733		if (!(isdigit(*p) || isspace(*p)))
4734			break;
4735
4736	if (*p)
4737		rv = sscanf(buf, "%20s", modestr);
4738	else
4739		rv = sscanf(buf, "%d", &modeint);
4740
4741	if (!rv)
4742		return -1;
4743
4744	for (i = 0; tbl[i].modename; i++) {
4745		if (modeint == tbl[i].mode)
4746			return tbl[i].mode;
4747		if (strcmp(modestr, tbl[i].modename) == 0)
4748			return tbl[i].mode;
4749	}
4750
4751	return -1;
4752}
4753
4754static int bond_check_params(struct bond_params *params)
4755{
4756	int arp_validate_value, fail_over_mac_value, primary_reselect_value;
4757
4758	/*
4759	 * Convert string parameters.
4760	 */
4761	if (mode) {
4762		bond_mode = bond_parse_parm(mode, bond_mode_tbl);
4763		if (bond_mode == -1) {
4764			pr_err("Error: Invalid bonding mode \"%s\"\n",
4765			       mode == NULL ? "NULL" : mode);
4766			return -EINVAL;
4767		}
4768	}
4769
4770	if (xmit_hash_policy) {
4771		if ((bond_mode != BOND_MODE_XOR) &&
4772		    (bond_mode != BOND_MODE_8023AD)) {
4773			pr_info("xmit_hash_policy param is irrelevant in mode %s\n",
4774			       bond_mode_name(bond_mode));
4775		} else {
4776			xmit_hashtype = bond_parse_parm(xmit_hash_policy,
4777							xmit_hashtype_tbl);
4778			if (xmit_hashtype == -1) {
4779				pr_err("Error: Invalid xmit_hash_policy \"%s\"\n",
4780				       xmit_hash_policy == NULL ? "NULL" :
4781				       xmit_hash_policy);
4782				return -EINVAL;
4783			}
4784		}
4785	}
4786
4787	if (lacp_rate) {
4788		if (bond_mode != BOND_MODE_8023AD) {
4789			pr_info("lacp_rate param is irrelevant in mode %s\n",
4790				bond_mode_name(bond_mode));
4791		} else {
4792			lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl);
4793			if (lacp_fast == -1) {
4794				pr_err("Error: Invalid lacp rate \"%s\"\n",
4795				       lacp_rate == NULL ? "NULL" : lacp_rate);
4796				return -EINVAL;
4797			}
4798		}
4799	}
4800
4801	if (ad_select) {
4802		params->ad_select = bond_parse_parm(ad_select, ad_select_tbl);
4803		if (params->ad_select == -1) {
4804			pr_err("Error: Invalid ad_select \"%s\"\n",
4805			       ad_select == NULL ? "NULL" : ad_select);
4806			return -EINVAL;
4807		}
4808
4809		if (bond_mode != BOND_MODE_8023AD) {
4810			pr_warning("ad_select param only affects 802.3ad mode\n");
4811		}
4812	} else {
4813		params->ad_select = BOND_AD_STABLE;
4814	}
4815
4816	if (max_bonds < 0) {
4817		pr_warning("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n",
4818			   max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);
4819		max_bonds = BOND_DEFAULT_MAX_BONDS;
4820	}
4821
4822	if (miimon < 0) {
4823		pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to %d\n",
4824			   miimon, INT_MAX, BOND_LINK_MON_INTERV);
4825		miimon = BOND_LINK_MON_INTERV;
4826	}
4827
4828	if (updelay < 0) {
4829		pr_warning("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n",
4830			   updelay, INT_MAX);
4831		updelay = 0;
4832	}
4833
4834	if (downdelay < 0) {
4835		pr_warning("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n",
4836			   downdelay, INT_MAX);
4837		downdelay = 0;
4838	}
4839
4840	if ((use_carrier != 0) && (use_carrier != 1)) {
4841		pr_warning("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n",
4842			   use_carrier);
4843		use_carrier = 1;
4844	}
4845
4846	if (num_grat_arp < 0 || num_grat_arp > 255) {
4847		pr_warning("Warning: num_grat_arp (%d) not in range 0-255 so it was reset to 1\n",
4848			   num_grat_arp);
4849		num_grat_arp = 1;
4850	}
4851
4852	if (num_unsol_na < 0 || num_unsol_na > 255) {
4853		pr_warning("Warning: num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
4854			   num_unsol_na);
4855		num_unsol_na = 1;
4856	}
4857
4858	/* reset values for 802.3ad */
4859	if (bond_mode == BOND_MODE_8023AD) {
4860		if (!miimon) {
4861			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n");
4862			pr_warning("Forcing miimon to 100msec\n");
4863			miimon = 100;
4864		}
4865	}
4866
4867	if (tx_queues < 1 || tx_queues > 255) {
4868		pr_warning("Warning: tx_queues (%d) should be between "
4869			   "1 and 255, resetting to %d\n",
4870			   tx_queues, BOND_DEFAULT_TX_QUEUES);
4871		tx_queues = BOND_DEFAULT_TX_QUEUES;
4872	}
4873
4874	if ((all_slaves_active != 0) && (all_slaves_active != 1)) {
4875		pr_warning("Warning: all_slaves_active module parameter (%d), "
4876			   "not of valid value (0/1), so it was set to "
4877			   "0\n", all_slaves_active);
4878		all_slaves_active = 0;
4879	}
4880
4881	/* reset values for TLB/ALB */
4882	if ((bond_mode == BOND_MODE_TLB) ||
4883	    (bond_mode == BOND_MODE_ALB)) {
4884		if (!miimon) {
4885			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure and link speed which are essential for TLB/ALB load balancing\n");
4886			pr_warning("Forcing miimon to 100msec\n");
4887			miimon = 100;
4888		}
4889	}
4890
4891	if (bond_mode == BOND_MODE_ALB) {
4892		pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n",
4893			  updelay);
4894	}
4895
4896	if (!miimon) {
4897		if (updelay || downdelay) {
4898			/* just warn the user the up/down delay will have
4899			 * no effect since miimon is zero...
4900			 */
4901			pr_warning("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n",
4902				   updelay, downdelay);
4903		}
4904	} else {
4905		/* don't allow arp monitoring */
4906		if (arp_interval) {
4907			pr_warning("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n",
4908				   miimon, arp_interval);
4909			arp_interval = 0;
4910		}
4911
4912		if ((updelay % miimon) != 0) {
4913			pr_warning("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n",
4914				   updelay, miimon,
4915				   (updelay / miimon) * miimon);
4916		}
4917
4918		updelay /= miimon;
4919
4920		if ((downdelay % miimon) != 0) {
4921			pr_warning("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n",
4922				   downdelay, miimon,
4923				   (downdelay / miimon) * miimon);
4924		}
4925
4926		downdelay /= miimon;
4927	}
4928
4929	if (arp_interval < 0) {
4930		pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to %d\n",
4931			   arp_interval, INT_MAX, BOND_LINK_ARP_INTERV);
4932		arp_interval = BOND_LINK_ARP_INTERV;
4933	}
4934
4935	for (arp_ip_count = 0;
4936	     (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count];
4937	     arp_ip_count++) {
4938		/* not complete check, but should be good enough to
4939		   catch mistakes */
4940		if (!isdigit(arp_ip_target[arp_ip_count][0])) {
4941			pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n",
4942				   arp_ip_target[arp_ip_count]);
4943			arp_interval = 0;
4944		} else {
4945			__be32 ip = in_aton(arp_ip_target[arp_ip_count]);
4946			arp_target[arp_ip_count] = ip;
4947		}
4948	}
4949
4950	if (arp_interval && !arp_ip_count) {
4951		/* don't allow arping if no arp_ip_target given... */
4952		pr_warning("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n",
4953			   arp_interval);
4954		arp_interval = 0;
4955	}
4956
4957	if (arp_validate) {
4958		if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
4959			pr_err("arp_validate only supported in active-backup mode\n");
4960			return -EINVAL;
4961		}
4962		if (!arp_interval) {
4963			pr_err("arp_validate requires arp_interval\n");
4964			return -EINVAL;
4965		}
4966
4967		arp_validate_value = bond_parse_parm(arp_validate,
4968						     arp_validate_tbl);
4969		if (arp_validate_value == -1) {
4970			pr_err("Error: invalid arp_validate \"%s\"\n",
4971			       arp_validate == NULL ? "NULL" : arp_validate);
4972			return -EINVAL;
4973		}
4974	} else
4975		arp_validate_value = 0;
4976
4977	if (miimon) {
4978		pr_info("MII link monitoring set to %d ms\n", miimon);
4979	} else if (arp_interval) {
4980		int i;
4981
4982		pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):",
4983			arp_interval,
4984			arp_validate_tbl[arp_validate_value].modename,
4985			arp_ip_count);
4986
4987		for (i = 0; i < arp_ip_count; i++)
4988			pr_info(" %s", arp_ip_target[i]);
4989
4990		pr_info("\n");
4991
4992	} else if (max_bonds) {
4993		/* miimon and arp_interval not set, we need one so things
4994		 * work as expected, see bonding.txt for details
4995		 */
4996		pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
4997	}
4998
4999	if (primary && !USES_PRIMARY(bond_mode)) {
5000		/* currently, using a primary only makes sense
5001		 * in active backup, TLB or ALB modes
5002		 */
5003		pr_warning("Warning: %s primary device specified but has no effect in %s mode\n",
5004			   primary, bond_mode_name(bond_mode));
5005		primary = NULL;
5006	}
5007
5008	if (primary && primary_reselect) {
5009		primary_reselect_value = bond_parse_parm(primary_reselect,
5010							 pri_reselect_tbl);
5011		if (primary_reselect_value == -1) {
5012			pr_err("Error: Invalid primary_reselect \"%s\"\n",
5013			       primary_reselect ==
5014					NULL ? "NULL" : primary_reselect);
5015			return -EINVAL;
5016		}
5017	} else {
5018		primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;
5019	}
5020
5021	if (fail_over_mac) {
5022		fail_over_mac_value = bond_parse_parm(fail_over_mac,
5023						      fail_over_mac_tbl);
5024		if (fail_over_mac_value == -1) {
5025			pr_err("Error: invalid fail_over_mac \"%s\"\n",
5026			       arp_validate == NULL ? "NULL" : arp_validate);
5027			return -EINVAL;
5028		}
5029
5030		if (bond_mode != BOND_MODE_ACTIVEBACKUP)
5031			pr_warning("Warning: fail_over_mac only affects active-backup mode.\n");
5032	} else {
5033		fail_over_mac_value = BOND_FOM_NONE;
5034	}
5035
5036	/* fill params struct with the proper values */
5037	params->mode = bond_mode;
5038	params->xmit_policy = xmit_hashtype;
5039	params->miimon = miimon;
5040	params->num_grat_arp = num_grat_arp;
5041	params->num_unsol_na = num_unsol_na;
5042	params->arp_interval = arp_interval;
5043	params->arp_validate = arp_validate_value;
5044	params->updelay = updelay;
5045	params->downdelay = downdelay;
5046	params->use_carrier = use_carrier;
5047	params->lacp_fast = lacp_fast;
5048	params->primary[0] = 0;
5049	params->primary_reselect = primary_reselect_value;
5050	params->fail_over_mac = fail_over_mac_value;
5051	params->tx_queues = tx_queues;
5052	params->all_slaves_active = all_slaves_active;
5053
5054	if (primary) {
5055		strncpy(params->primary, primary, IFNAMSIZ);
5056		params->primary[IFNAMSIZ - 1] = 0;
5057	}
5058
5059	memcpy(params->arp_targets, arp_target, sizeof(arp_target));
5060
5061	return 0;
5062}
5063
5064static struct lock_class_key bonding_netdev_xmit_lock_key;
5065static struct lock_class_key bonding_netdev_addr_lock_key;
5066
5067static void bond_set_lockdep_class_one(struct net_device *dev,
5068				       struct netdev_queue *txq,
5069				       void *_unused)
5070{
5071	lockdep_set_class(&txq->_xmit_lock,
5072			  &bonding_netdev_xmit_lock_key);
5073}
5074
5075static void bond_set_lockdep_class(struct net_device *dev)
5076{
5077	lockdep_set_class(&dev->addr_list_lock,
5078			  &bonding_netdev_addr_lock_key);
5079	netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
5080}
5081
5082/*
5083 * Called from registration process
5084 */
5085static int bond_init(struct net_device *bond_dev)
5086{
5087	struct bonding *bond = netdev_priv(bond_dev);
5088	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
5089
5090	pr_debug("Begin bond_init for %s\n", bond_dev->name);
5091
5092	bond->wq = create_singlethread_workqueue(bond_dev->name);
5093	if (!bond->wq)
5094		return -ENOMEM;
5095
5096	bond_set_lockdep_class(bond_dev);
5097
5098	netif_carrier_off(bond_dev);
5099
5100	bond_create_proc_entry(bond);
5101	list_add_tail(&bond->bond_list, &bn->dev_list);
5102
5103	bond_prepare_sysfs_group(bond);
5104
5105	__hw_addr_init(&bond->mc_list);
5106	return 0;
5107}
5108
5109static int bond_validate(struct nlattr *tb[], struct nlattr *data[])
5110{
5111	if (tb[IFLA_ADDRESS]) {
5112		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
5113			return -EINVAL;
5114		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
5115			return -EADDRNOTAVAIL;
5116	}
5117	return 0;
5118}
5119
5120static struct rtnl_link_ops bond_link_ops __read_mostly = {
5121	.kind		= "bond",
5122	.priv_size	= sizeof(struct bonding),
5123	.setup		= bond_setup,
5124	.validate	= bond_validate,
5125};
5126
5127/* Create a new bond based on the specified name and bonding parameters.
5128 * If name is NULL, obtain a suitable "bond%d" name for us.
5129 * Caller must NOT hold rtnl_lock; we need to release it here before we
5130 * set up our sysfs entries.
5131 */
5132int bond_create(struct net *net, const char *name)
5133{
5134	struct net_device *bond_dev;
5135	int res;
5136
5137	rtnl_lock();
5138
5139	bond_dev = alloc_netdev_mq(sizeof(struct bonding), name ? name : "",
5140				bond_setup, tx_queues);
5141	if (!bond_dev) {
5142		pr_err("%s: eek! can't alloc netdev!\n", name);
5143		rtnl_unlock();
5144		return -ENOMEM;
5145	}
5146
5147	dev_net_set(bond_dev, net);
5148	bond_dev->rtnl_link_ops = &bond_link_ops;
5149
5150	if (!name) {
5151		res = dev_alloc_name(bond_dev, "bond%d");
5152		if (res < 0)
5153			goto out;
5154	} else {
5155		/*
5156		 * If we're given a name to register
5157		 * we need to ensure that its not already
5158		 * registered
5159		 */
5160		res = -EEXIST;
5161		if (__dev_get_by_name(net, name) != NULL)
5162			goto out;
5163	}
5164
5165	res = register_netdevice(bond_dev);
5166
5167out:
5168	rtnl_unlock();
5169	if (res < 0)
5170		bond_destructor(bond_dev);
5171	return res;
5172}
5173
5174static int __net_init bond_net_init(struct net *net)
5175{
5176	struct bond_net *bn = net_generic(net, bond_net_id);
5177
5178	bn->net = net;
5179	INIT_LIST_HEAD(&bn->dev_list);
5180
5181	bond_create_proc_dir(bn);
5182
5183	return 0;
5184}
5185
5186static void __net_exit bond_net_exit(struct net *net)
5187{
5188	struct bond_net *bn = net_generic(net, bond_net_id);
5189
5190	bond_destroy_proc_dir(bn);
5191}
5192
5193static struct pernet_operations bond_net_ops = {
5194	.init = bond_net_init,
5195	.exit = bond_net_exit,
5196	.id   = &bond_net_id,
5197	.size = sizeof(struct bond_net),
5198};
5199
5200static int __init bonding_init(void)
5201{
5202	int i;
5203	int res;
5204
5205	pr_info("%s", version);
5206
5207	res = bond_check_params(&bonding_defaults);
5208	if (res)
5209		goto out;
5210
5211	res = register_pernet_subsys(&bond_net_ops);
5212	if (res)
5213		goto out;
5214
5215	res = rtnl_link_register(&bond_link_ops);
5216	if (res)
5217		goto err_link;
5218
5219	for (i = 0; i < max_bonds; i++) {
5220		res = bond_create(&init_net, NULL);
5221		if (res)
5222			goto err;
5223	}
5224
5225	res = bond_create_sysfs();
5226	if (res)
5227		goto err;
5228
5229	register_netdevice_notifier(&bond_netdev_notifier);
5230	register_inetaddr_notifier(&bond_inetaddr_notifier);
5231	bond_register_ipv6_notifier();
5232out:
5233	return res;
5234err:
5235	rtnl_link_unregister(&bond_link_ops);
5236err_link:
5237	unregister_pernet_subsys(&bond_net_ops);
5238	goto out;
5239
5240}
5241
5242static void __exit bonding_exit(void)
5243{
5244	unregister_netdevice_notifier(&bond_netdev_notifier);
5245	unregister_inetaddr_notifier(&bond_inetaddr_notifier);
5246	bond_unregister_ipv6_notifier();
5247
5248	bond_destroy_sysfs();
5249
5250	rtnl_link_unregister(&bond_link_ops);
5251	unregister_pernet_subsys(&bond_net_ops);
5252}
5253
5254module_init(bonding_init);
5255module_exit(bonding_exit);
5256MODULE_LICENSE("GPL");
5257MODULE_VERSION(DRV_VERSION);
5258MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);
5259MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
5260MODULE_ALIAS_RTNL_LINK("bond");
5261