1/* 2 * net/sched/sch_generic.c Generic packet scheduler routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * Jamal Hadi Salim, <hadi@cyberus.ca> 990601 11 * - Ingress support 12 */ 13 14#include <linux/bitops.h> 15#include <linux/module.h> 16#include <linux/types.h> 17#include <linux/kernel.h> 18#include <linux/sched.h> 19#include <linux/string.h> 20#include <linux/errno.h> 21#include <linux/netdevice.h> 22#include <linux/skbuff.h> 23#include <linux/rtnetlink.h> 24#include <linux/init.h> 25#include <linux/rcupdate.h> 26#include <linux/list.h> 27#include <linux/slab.h> 28#include <net/pkt_sched.h> 29#include <net/dst.h> 30 31#include <typedefs.h> 32#include <bcmdefs.h> 33 34/* Main transmission queue. */ 35 36/* Modifications to data participating in scheduling must be protected with 37 * qdisc_lock(qdisc) spinlock. 38 * 39 * The idea is the following: 40 * - enqueue, dequeue are serialized via qdisc root lock 41 * - ingress filtering is also serialized via qdisc root lock 42 * - updates to tree and tree walking are only done under the rtnl mutex. 43 */ 44 45static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) 46{ 47 skb_dst_force(skb); 48 q->gso_skb = skb; 49 q->qstats.requeues++; 50 q->q.qlen++; /* it's still part of the queue */ 51 __netif_schedule(q); 52 53 return 0; 54} 55 56static inline struct sk_buff *dequeue_skb(struct Qdisc *q) 57{ 58 struct sk_buff *skb = q->gso_skb; 59 60 if (unlikely(skb)) { 61 struct net_device *dev = qdisc_dev(q); 62 struct netdev_queue *txq; 63 64 /* check the reason of requeuing without tx lock first */ 65 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 66 if (!netif_tx_queue_stopped(txq) && 67 !netif_tx_queue_frozen(txq)) { 68 q->gso_skb = NULL; 69 q->q.qlen--; 70 } else 71 skb = NULL; 72 } else { 73 skb = q->dequeue(q); 74 } 75 76 return skb; 77} 78 79static inline int handle_dev_cpu_collision(struct sk_buff *skb, 80 struct netdev_queue *dev_queue, 81 struct Qdisc *q) 82{ 83 int ret; 84 85 if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { 86 /* 87 * Same CPU holding the lock. It may be a transient 88 * configuration error, when hard_start_xmit() recurses. We 89 * detect it by checking xmit owner and drop the packet when 90 * deadloop is detected. Return OK to try the next skb. 91 */ 92 kfree_skb(skb); 93 if (net_ratelimit()) 94 printk(KERN_WARNING "Dead loop on netdevice %s, " 95 "fix it urgently!\n", dev_queue->dev->name); 96 ret = qdisc_qlen(q); 97 } else { 98 /* 99 * Another cpu is holding lock, requeue & delay xmits for 100 * some time. 101 */ 102 __this_cpu_inc(softnet_data.cpu_collision); 103 ret = dev_requeue_skb(skb, q); 104 } 105 106 return ret; 107} 108 109/* 110 * Transmit one skb, and handle the return status as required. Holding the 111 * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this 112 * function. 113 * 114 * Returns to the caller: 115 * 0 - queue is empty or throttled. 116 * >0 - queue is not empty. 117 */ 118int BCMFASTPATH_HOST sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, 119 struct net_device *dev, struct netdev_queue *txq, 120 spinlock_t *root_lock) 121{ 122 int ret = NETDEV_TX_BUSY; 123 124 /* And release qdisc */ 125 spin_unlock(root_lock); 126 127 HARD_TX_LOCK(dev, txq, smp_processor_id()); 128 if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) 129 ret = dev_hard_start_xmit(skb, dev, txq); 130 131 HARD_TX_UNLOCK(dev, txq); 132 133 spin_lock(root_lock); 134 135 if (dev_xmit_complete(ret)) { 136 /* Driver sent out skb successfully or skb was consumed */ 137 ret = qdisc_qlen(q); 138 } else if (ret == NETDEV_TX_LOCKED) { 139 /* Driver try lock failed */ 140 ret = handle_dev_cpu_collision(skb, txq, q); 141 } else { 142 /* Driver returned NETDEV_TX_BUSY - requeue skb */ 143 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) 144 printk(KERN_WARNING "BUG %s code %d qlen %d\n", 145 dev->name, ret, q->q.qlen); 146 147 ret = dev_requeue_skb(skb, q); 148 } 149 150 if (ret && (netif_tx_queue_stopped(txq) || 151 netif_tx_queue_frozen(txq))) 152 ret = 0; 153 154 return ret; 155} 156 157/* 158 * NOTE: Called under qdisc_lock(q) with locally disabled BH. 159 * 160 * __QDISC_STATE_RUNNING guarantees only one CPU can process 161 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for 162 * this queue. 163 * 164 * netif_tx_lock serializes accesses to device driver. 165 * 166 * qdisc_lock(q) and netif_tx_lock are mutually exclusive, 167 * if one is grabbed, another must be free. 168 * 169 * Note, that this procedure can be called by a watchdog timer 170 * 171 * Returns to the caller: 172 * 0 - queue is empty or throttled. 173 * >0 - queue is not empty. 174 * 175 */ 176static inline int qdisc_restart(struct Qdisc *q) 177{ 178 struct netdev_queue *txq; 179 struct net_device *dev; 180 spinlock_t *root_lock; 181 struct sk_buff *skb; 182 183 /* Dequeue packet */ 184 skb = dequeue_skb(q); 185 if (unlikely(!skb)) 186 return 0; 187 WARN_ON_ONCE(skb_dst_is_noref(skb)); 188 root_lock = qdisc_lock(q); 189 dev = qdisc_dev(q); 190 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 191 192 return sch_direct_xmit(skb, q, dev, txq, root_lock); 193} 194 195void BCMFASTPATH __qdisc_run(struct Qdisc *q) 196{ 197 unsigned long start_time = jiffies; 198 199 while (qdisc_restart(q)) { 200 /* 201 * Postpone processing if 202 * 1. another process needs the CPU; 203 * 2. we've been doing it for too long. 204 */ 205 if (need_resched() || jiffies != start_time) { 206 __netif_schedule(q); 207 break; 208 } 209 } 210 211 qdisc_run_end(q); 212} 213 214unsigned long dev_trans_start(struct net_device *dev) 215{ 216 unsigned long val, res = dev->trans_start; 217 unsigned int i; 218 219 for (i = 0; i < dev->num_tx_queues; i++) { 220 val = netdev_get_tx_queue(dev, i)->trans_start; 221 if (val && time_after(val, res)) 222 res = val; 223 } 224 dev->trans_start = res; 225 return res; 226} 227EXPORT_SYMBOL(dev_trans_start); 228 229static void dev_watchdog(unsigned long arg) 230{ 231 struct net_device *dev = (struct net_device *)arg; 232 233 netif_tx_lock(dev); 234 if (!qdisc_tx_is_noop(dev)) { 235 if (netif_device_present(dev) && 236 netif_running(dev) && 237 netif_carrier_ok(dev)) { 238 int some_queue_timedout = 0; 239 unsigned int i; 240 unsigned long trans_start; 241 242 for (i = 0; i < dev->num_tx_queues; i++) { 243 struct netdev_queue *txq; 244 245 txq = netdev_get_tx_queue(dev, i); 246 /* 247 * old device drivers set dev->trans_start 248 */ 249 trans_start = txq->trans_start ? : dev->trans_start; 250 if (netif_tx_queue_stopped(txq) && 251 time_after(jiffies, (trans_start + 252 dev->watchdog_timeo))) { 253 some_queue_timedout = 1; 254 break; 255 } 256 } 257 258 if (some_queue_timedout) { 259 char drivername[64]; 260 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", 261 dev->name, netdev_drivername(dev, drivername, 64), i); 262 dev->netdev_ops->ndo_tx_timeout(dev); 263 } 264 if (!mod_timer(&dev->watchdog_timer, 265 round_jiffies(jiffies + 266 dev->watchdog_timeo))) 267 dev_hold(dev); 268 } 269 } 270 netif_tx_unlock(dev); 271 272 dev_put(dev); 273} 274 275void __netdev_watchdog_up(struct net_device *dev) 276{ 277 if (dev->netdev_ops->ndo_tx_timeout) { 278 if (dev->watchdog_timeo <= 0) 279 dev->watchdog_timeo = 5*HZ; 280 if (!mod_timer(&dev->watchdog_timer, 281 round_jiffies(jiffies + dev->watchdog_timeo))) 282 dev_hold(dev); 283 } 284} 285 286static void dev_watchdog_up(struct net_device *dev) 287{ 288 __netdev_watchdog_up(dev); 289} 290 291static void dev_watchdog_down(struct net_device *dev) 292{ 293 netif_tx_lock_bh(dev); 294 if (del_timer(&dev->watchdog_timer)) 295 dev_put(dev); 296 netif_tx_unlock_bh(dev); 297} 298 299/** 300 * netif_carrier_on - set carrier 301 * @dev: network device 302 * 303 * Device has detected that carrier. 304 */ 305void netif_carrier_on(struct net_device *dev) 306{ 307 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { 308 if (dev->reg_state == NETREG_UNINITIALIZED) 309 return; 310 linkwatch_fire_event(dev); 311 if (netif_running(dev)) 312 __netdev_watchdog_up(dev); 313 } 314} 315EXPORT_SYMBOL(netif_carrier_on); 316 317/** 318 * netif_carrier_off - clear carrier 319 * @dev: network device 320 * 321 * Device has detected loss of carrier. 322 */ 323void netif_carrier_off(struct net_device *dev) 324{ 325 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { 326 if (dev->reg_state == NETREG_UNINITIALIZED) 327 return; 328 linkwatch_fire_event(dev); 329 } 330} 331EXPORT_SYMBOL(netif_carrier_off); 332 333/** 334 * netif_notify_peers - notify network peers about existence of @dev 335 * @dev: network device 336 * 337 * Generate traffic such that interested network peers are aware of 338 * @dev, such as by generating a gratuitous ARP. This may be used when 339 * a device wants to inform the rest of the network about some sort of 340 * reconfiguration such as a failover event or virtual machine 341 * migration. 342 */ 343void netif_notify_peers(struct net_device *dev) 344{ 345 rtnl_lock(); 346 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); 347 rtnl_unlock(); 348} 349EXPORT_SYMBOL(netif_notify_peers); 350 351/* "NOOP" scheduler: the best scheduler, recommended for all interfaces 352 under all circumstances. It is difficult to invent anything faster or 353 cheaper. 354 */ 355 356static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) 357{ 358 kfree_skb(skb); 359 return NET_XMIT_CN; 360} 361 362static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) 363{ 364 return NULL; 365} 366 367struct Qdisc_ops noop_qdisc_ops __read_mostly = { 368 .id = "noop", 369 .priv_size = 0, 370 .enqueue = noop_enqueue, 371 .dequeue = noop_dequeue, 372 .peek = noop_dequeue, 373 .owner = THIS_MODULE, 374}; 375 376static struct netdev_queue noop_netdev_queue = { 377 .qdisc = &noop_qdisc, 378 .qdisc_sleeping = &noop_qdisc, 379}; 380 381struct Qdisc noop_qdisc = { 382 .enqueue = noop_enqueue, 383 .dequeue = noop_dequeue, 384 .flags = TCQ_F_BUILTIN, 385 .ops = &noop_qdisc_ops, 386 .list = LIST_HEAD_INIT(noop_qdisc.list), 387 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), 388 .dev_queue = &noop_netdev_queue, 389}; 390EXPORT_SYMBOL(noop_qdisc); 391 392static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { 393 .id = "noqueue", 394 .priv_size = 0, 395 .enqueue = noop_enqueue, 396 .dequeue = noop_dequeue, 397 .peek = noop_dequeue, 398 .owner = THIS_MODULE, 399}; 400 401static struct Qdisc noqueue_qdisc; 402static struct netdev_queue noqueue_netdev_queue = { 403 .qdisc = &noqueue_qdisc, 404 .qdisc_sleeping = &noqueue_qdisc, 405}; 406 407static struct Qdisc noqueue_qdisc = { 408 .enqueue = NULL, 409 .dequeue = noop_dequeue, 410 .flags = TCQ_F_BUILTIN, 411 .ops = &noqueue_qdisc_ops, 412 .list = LIST_HEAD_INIT(noqueue_qdisc.list), 413 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), 414 .dev_queue = &noqueue_netdev_queue, 415}; 416 417 418static const u8 prio2band[TC_PRIO_MAX+1] = 419 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; 420 421/* 3-band FIFO queue: old style, but should be a bit faster than 422 generic prio+fifo combination. 423 */ 424 425#define PFIFO_FAST_BANDS 3 426 427/* 428 * Private data for a pfifo_fast scheduler containing: 429 * - queues for the three band 430 * - bitmap indicating which of the bands contain skbs 431 */ 432struct pfifo_fast_priv { 433 u32 bitmap; 434 struct sk_buff_head q[PFIFO_FAST_BANDS]; 435}; 436 437/* 438 * Convert a bitmap to the first band number where an skb is queued, where: 439 * bitmap=0 means there are no skbs on any band. 440 * bitmap=1 means there is an skb on band 0. 441 * bitmap=7 means there are skbs on all 3 bands, etc. 442 */ 443static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; 444 445static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, 446 int band) 447{ 448 return priv->q + band; 449} 450 451static int BCMFASTPATH pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 452{ 453 if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { 454 int band = prio2band[skb->priority & TC_PRIO_MAX]; 455 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 456 struct sk_buff_head *list = band2list(priv, band); 457 458 priv->bitmap |= (1 << band); 459 qdisc->q.qlen++; 460 return __qdisc_enqueue_tail(skb, qdisc, list); 461 } 462 463 return qdisc_drop(skb, qdisc); 464} 465 466static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) 467{ 468 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 469 int band = bitmap2band[priv->bitmap]; 470 471 if (likely(band >= 0)) { 472 struct sk_buff_head *list = band2list(priv, band); 473 struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); 474 475 qdisc->q.qlen--; 476 if (skb_queue_empty(list)) 477 priv->bitmap &= ~(1 << band); 478 479 return skb; 480 } 481 482 return NULL; 483} 484 485static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) 486{ 487 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 488 int band = bitmap2band[priv->bitmap]; 489 490 if (band >= 0) { 491 struct sk_buff_head *list = band2list(priv, band); 492 493 return skb_peek(list); 494 } 495 496 return NULL; 497} 498 499static void pfifo_fast_reset(struct Qdisc* qdisc) 500{ 501 int prio; 502 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 503 504 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) 505 __qdisc_reset_queue(qdisc, band2list(priv, prio)); 506 507 priv->bitmap = 0; 508 qdisc->qstats.backlog = 0; 509 qdisc->q.qlen = 0; 510} 511 512static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) 513{ 514 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; 515 516 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 517 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 518 return skb->len; 519 520nla_put_failure: 521 return -1; 522} 523 524static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) 525{ 526 int prio; 527 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 528 529 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) 530 skb_queue_head_init(band2list(priv, prio)); 531 532 return 0; 533} 534 535struct Qdisc_ops pfifo_fast_ops __read_mostly = { 536 .id = "pfifo_fast", 537 .priv_size = sizeof(struct pfifo_fast_priv), 538 .enqueue = pfifo_fast_enqueue, 539 .dequeue = pfifo_fast_dequeue, 540 .peek = pfifo_fast_peek, 541 .init = pfifo_fast_init, 542 .reset = pfifo_fast_reset, 543 .dump = pfifo_fast_dump, 544 .owner = THIS_MODULE, 545}; 546 547struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 548 struct Qdisc_ops *ops) 549{ 550 void *p; 551 struct Qdisc *sch; 552 unsigned int size; 553 int err = -ENOBUFS; 554 555 /* ensure that the Qdisc and the private data are 64-byte aligned */ 556 size = QDISC_ALIGN(sizeof(*sch)); 557 size += ops->priv_size + (QDISC_ALIGNTO - 1); 558 559 p = kzalloc(size, GFP_KERNEL); 560 if (!p) 561 goto errout; 562 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); 563 sch->padded = (char *) sch - (char *) p; 564 565 INIT_LIST_HEAD(&sch->list); 566 skb_queue_head_init(&sch->q); 567 spin_lock_init(&sch->busylock); 568 sch->ops = ops; 569 sch->enqueue = ops->enqueue; 570 sch->dequeue = ops->dequeue; 571 sch->dev_queue = dev_queue; 572 dev_hold(qdisc_dev(sch)); 573 atomic_set(&sch->refcnt, 1); 574 575 return sch; 576errout: 577 return ERR_PTR(err); 578} 579 580struct Qdisc * qdisc_create_dflt(struct net_device *dev, 581 struct netdev_queue *dev_queue, 582 struct Qdisc_ops *ops, 583 unsigned int parentid) 584{ 585 struct Qdisc *sch; 586 587 sch = qdisc_alloc(dev_queue, ops); 588 if (IS_ERR(sch)) 589 goto errout; 590 sch->parent = parentid; 591 592 if (!ops->init || ops->init(sch, NULL) == 0) 593 return sch; 594 595 qdisc_destroy(sch); 596errout: 597 return NULL; 598} 599EXPORT_SYMBOL(qdisc_create_dflt); 600 601/* Under qdisc_lock(qdisc) and BH! */ 602 603void qdisc_reset(struct Qdisc *qdisc) 604{ 605 const struct Qdisc_ops *ops = qdisc->ops; 606 607 if (ops->reset) 608 ops->reset(qdisc); 609 610 if (qdisc->gso_skb) { 611 kfree_skb(qdisc->gso_skb); 612 qdisc->gso_skb = NULL; 613 qdisc->q.qlen = 0; 614 } 615} 616EXPORT_SYMBOL(qdisc_reset); 617 618static void qdisc_rcu_free(struct rcu_head *head) 619{ 620 struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); 621 622 kfree((char *) qdisc - qdisc->padded); 623} 624 625void qdisc_destroy(struct Qdisc *qdisc) 626{ 627 const struct Qdisc_ops *ops = qdisc->ops; 628 629 if (qdisc->flags & TCQ_F_BUILTIN || 630 !atomic_dec_and_test(&qdisc->refcnt)) 631 return; 632 633#ifdef CONFIG_NET_SCHED 634 qdisc_list_del(qdisc); 635 636 qdisc_put_stab(qdisc->stab); 637#endif 638 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); 639 if (ops->reset) 640 ops->reset(qdisc); 641 if (ops->destroy) 642 ops->destroy(qdisc); 643 644 module_put(ops->owner); 645 dev_put(qdisc_dev(qdisc)); 646 647 kfree_skb(qdisc->gso_skb); 648 /* 649 * gen_estimator est_timer() might access qdisc->q.lock, 650 * wait a RCU grace period before freeing qdisc. 651 */ 652 call_rcu(&qdisc->rcu_head, qdisc_rcu_free); 653} 654EXPORT_SYMBOL(qdisc_destroy); 655 656/* Attach toplevel qdisc to device queue. */ 657struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, 658 struct Qdisc *qdisc) 659{ 660 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; 661 spinlock_t *root_lock; 662 663 root_lock = qdisc_lock(oqdisc); 664 spin_lock_bh(root_lock); 665 666 /* Prune old scheduler */ 667 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) 668 qdisc_reset(oqdisc); 669 670 /* ... and graft new one */ 671 if (qdisc == NULL) 672 qdisc = &noop_qdisc; 673 dev_queue->qdisc_sleeping = qdisc; 674 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); 675 676 spin_unlock_bh(root_lock); 677 678 return oqdisc; 679} 680 681static void attach_one_default_qdisc(struct net_device *dev, 682 struct netdev_queue *dev_queue, 683 void *_unused) 684{ 685 struct Qdisc *qdisc; 686 687 if (dev->tx_queue_len) { 688 qdisc = qdisc_create_dflt(dev, dev_queue, 689 &pfifo_fast_ops, TC_H_ROOT); 690 if (!qdisc) { 691 printk(KERN_INFO "%s: activation failed\n", dev->name); 692 return; 693 } 694 695 /* Can by-pass the queue discipline for default qdisc */ 696 qdisc->flags |= TCQ_F_CAN_BYPASS; 697 } else { 698 qdisc = &noqueue_qdisc; 699 } 700 dev_queue->qdisc_sleeping = qdisc; 701} 702 703static void attach_default_qdiscs(struct net_device *dev) 704{ 705 struct netdev_queue *txq; 706 struct Qdisc *qdisc; 707 708 txq = netdev_get_tx_queue(dev, 0); 709 710 if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) { 711 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); 712 dev->qdisc = txq->qdisc_sleeping; 713 atomic_inc(&dev->qdisc->refcnt); 714 } else { 715 qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); 716 if (qdisc) { 717 qdisc->ops->attach(qdisc); 718 dev->qdisc = qdisc; 719 } 720 } 721} 722 723static void transition_one_qdisc(struct net_device *dev, 724 struct netdev_queue *dev_queue, 725 void *_need_watchdog) 726{ 727 struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; 728 int *need_watchdog_p = _need_watchdog; 729 730 if (!(new_qdisc->flags & TCQ_F_BUILTIN)) 731 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); 732 733 rcu_assign_pointer(dev_queue->qdisc, new_qdisc); 734 if (need_watchdog_p && new_qdisc != &noqueue_qdisc) { 735 dev_queue->trans_start = 0; 736 *need_watchdog_p = 1; 737 } 738} 739 740void dev_activate(struct net_device *dev) 741{ 742 int need_watchdog; 743 744 /* No queueing discipline is attached to device; 745 create default one i.e. pfifo_fast for devices, 746 which need queueing and noqueue_qdisc for 747 virtual interfaces 748 */ 749 750 if (dev->qdisc == &noop_qdisc) 751 attach_default_qdiscs(dev); 752 753 if (!netif_carrier_ok(dev)) 754 /* Delay activation until next carrier-on event */ 755 return; 756 757 need_watchdog = 0; 758 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); 759 transition_one_qdisc(dev, &dev->rx_queue, NULL); 760 761 if (need_watchdog) { 762 dev->trans_start = jiffies; 763 dev_watchdog_up(dev); 764 } 765} 766 767static void dev_deactivate_queue(struct net_device *dev, 768 struct netdev_queue *dev_queue, 769 void *_qdisc_default) 770{ 771 struct Qdisc *qdisc_default = _qdisc_default; 772 struct Qdisc *qdisc; 773 774 qdisc = dev_queue->qdisc; 775 if (qdisc) { 776 spin_lock_bh(qdisc_lock(qdisc)); 777 778 if (!(qdisc->flags & TCQ_F_BUILTIN)) 779 set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); 780 781 rcu_assign_pointer(dev_queue->qdisc, qdisc_default); 782 qdisc_reset(qdisc); 783 784 spin_unlock_bh(qdisc_lock(qdisc)); 785 } 786} 787 788static bool some_qdisc_is_busy(struct net_device *dev) 789{ 790 unsigned int i; 791 792 for (i = 0; i < dev->num_tx_queues; i++) { 793 struct netdev_queue *dev_queue; 794 spinlock_t *root_lock; 795 struct Qdisc *q; 796 int val; 797 798 dev_queue = netdev_get_tx_queue(dev, i); 799 q = dev_queue->qdisc_sleeping; 800 root_lock = qdisc_lock(q); 801 802 spin_lock_bh(root_lock); 803 804 val = (qdisc_is_running(q) || 805 test_bit(__QDISC_STATE_SCHED, &q->state)); 806 807 spin_unlock_bh(root_lock); 808 809 if (val) 810 return true; 811 } 812 return false; 813} 814 815void dev_deactivate(struct net_device *dev) 816{ 817 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); 818 dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); 819 820 dev_watchdog_down(dev); 821 822 /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ 823 synchronize_rcu(); 824 825 /* Wait for outstanding qdisc_run calls. */ 826 while (some_qdisc_is_busy(dev)) 827 yield(); 828} 829 830static void dev_init_scheduler_queue(struct net_device *dev, 831 struct netdev_queue *dev_queue, 832 void *_qdisc) 833{ 834 struct Qdisc *qdisc = _qdisc; 835 836 dev_queue->qdisc = qdisc; 837 dev_queue->qdisc_sleeping = qdisc; 838} 839 840void dev_init_scheduler(struct net_device *dev) 841{ 842 dev->qdisc = &noop_qdisc; 843 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); 844 dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 845 846 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 847} 848 849static void shutdown_scheduler_queue(struct net_device *dev, 850 struct netdev_queue *dev_queue, 851 void *_qdisc_default) 852{ 853 struct Qdisc *qdisc = dev_queue->qdisc_sleeping; 854 struct Qdisc *qdisc_default = _qdisc_default; 855 856 if (qdisc) { 857 rcu_assign_pointer(dev_queue->qdisc, qdisc_default); 858 dev_queue->qdisc_sleeping = qdisc_default; 859 860 qdisc_destroy(qdisc); 861 } 862} 863 864void dev_shutdown(struct net_device *dev) 865{ 866 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); 867 shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 868 qdisc_destroy(dev->qdisc); 869 dev->qdisc = &noop_qdisc; 870 871 WARN_ON(timer_pending(&dev->watchdog_timer)); 872} 873