1/* 2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 3 * policies) 4 */ 5 6#ifdef CONFIG_RT_GROUP_SCHED 7 8#define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 9 10static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 11{ 12#ifdef CONFIG_SCHED_DEBUG 13 WARN_ON_ONCE(!rt_entity_is_task(rt_se)); 14#endif 15 return container_of(rt_se, struct task_struct, rt); 16} 17 18static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 19{ 20 return rt_rq->rq; 21} 22 23static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 24{ 25 return rt_se->rt_rq; 26} 27 28#else /* CONFIG_RT_GROUP_SCHED */ 29 30#define rt_entity_is_task(rt_se) (1) 31 32static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 33{ 34 return container_of(rt_se, struct task_struct, rt); 35} 36 37static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 38{ 39 return container_of(rt_rq, struct rq, rt); 40} 41 42static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 43{ 44 struct task_struct *p = rt_task_of(rt_se); 45 struct rq *rq = task_rq(p); 46 47 return &rq->rt; 48} 49 50#endif /* CONFIG_RT_GROUP_SCHED */ 51 52#ifdef CONFIG_SMP 53 54static inline int rt_overloaded(struct rq *rq) 55{ 56 return atomic_read(&rq->rd->rto_count); 57} 58 59static inline void rt_set_overload(struct rq *rq) 60{ 61 if (!rq->online) 62 return; 63 64 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 65 /* 66 * Make sure the mask is visible before we set 67 * the overload count. That is checked to determine 68 * if we should look at the mask. It would be a shame 69 * if we looked at the mask, but the mask was not 70 * updated yet. 71 */ 72 wmb(); 73 atomic_inc(&rq->rd->rto_count); 74} 75 76static inline void rt_clear_overload(struct rq *rq) 77{ 78 if (!rq->online) 79 return; 80 81 /* the order here really doesn't matter */ 82 atomic_dec(&rq->rd->rto_count); 83 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 84} 85 86static void update_rt_migration(struct rt_rq *rt_rq) 87{ 88 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { 89 if (!rt_rq->overloaded) { 90 rt_set_overload(rq_of_rt_rq(rt_rq)); 91 rt_rq->overloaded = 1; 92 } 93 } else if (rt_rq->overloaded) { 94 rt_clear_overload(rq_of_rt_rq(rt_rq)); 95 rt_rq->overloaded = 0; 96 } 97} 98 99static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 100{ 101 if (!rt_entity_is_task(rt_se)) 102 return; 103 104 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 105 106 rt_rq->rt_nr_total++; 107 if (rt_se->nr_cpus_allowed > 1) 108 rt_rq->rt_nr_migratory++; 109 110 update_rt_migration(rt_rq); 111} 112 113static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 114{ 115 if (!rt_entity_is_task(rt_se)) 116 return; 117 118 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 119 120 rt_rq->rt_nr_total--; 121 if (rt_se->nr_cpus_allowed > 1) 122 rt_rq->rt_nr_migratory--; 123 124 update_rt_migration(rt_rq); 125} 126 127static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 128{ 129 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 130 plist_node_init(&p->pushable_tasks, p->prio); 131 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 132} 133 134static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 135{ 136 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 137} 138 139static inline int has_pushable_tasks(struct rq *rq) 140{ 141 return !plist_head_empty(&rq->rt.pushable_tasks); 142} 143 144#else 145 146static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 147{ 148} 149 150static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 151{ 152} 153 154static inline 155void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 156{ 157} 158 159static inline 160void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 161{ 162} 163 164#endif /* CONFIG_SMP */ 165 166static inline int on_rt_rq(struct sched_rt_entity *rt_se) 167{ 168 return !list_empty(&rt_se->run_list); 169} 170 171#ifdef CONFIG_RT_GROUP_SCHED 172 173static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 174{ 175 if (!rt_rq->tg) 176 return RUNTIME_INF; 177 178 return rt_rq->rt_runtime; 179} 180 181static inline u64 sched_rt_period(struct rt_rq *rt_rq) 182{ 183 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 184} 185 186#define for_each_leaf_rt_rq(rt_rq, rq) \ 187 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) 188 189#define for_each_sched_rt_entity(rt_se) \ 190 for (; rt_se; rt_se = rt_se->parent) 191 192static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 193{ 194 return rt_se->my_q; 195} 196 197static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head); 198static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 199 200static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 201{ 202 int this_cpu = smp_processor_id(); 203 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 204 struct sched_rt_entity *rt_se; 205 206 rt_se = rt_rq->tg->rt_se[this_cpu]; 207 208 if (rt_rq->rt_nr_running) { 209 if (rt_se && !on_rt_rq(rt_se)) 210 enqueue_rt_entity(rt_se, false); 211 if (rt_rq->highest_prio.curr < curr->prio) 212 resched_task(curr); 213 } 214} 215 216static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 217{ 218 int this_cpu = smp_processor_id(); 219 struct sched_rt_entity *rt_se; 220 221 rt_se = rt_rq->tg->rt_se[this_cpu]; 222 223 if (rt_se && on_rt_rq(rt_se)) 224 dequeue_rt_entity(rt_se); 225} 226 227static inline int rt_rq_throttled(struct rt_rq *rt_rq) 228{ 229 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 230} 231 232static int rt_se_boosted(struct sched_rt_entity *rt_se) 233{ 234 struct rt_rq *rt_rq = group_rt_rq(rt_se); 235 struct task_struct *p; 236 237 if (rt_rq) 238 return !!rt_rq->rt_nr_boosted; 239 240 p = rt_task_of(rt_se); 241 return p->prio != p->normal_prio; 242} 243 244#ifdef CONFIG_SMP 245static inline const struct cpumask *sched_rt_period_mask(void) 246{ 247 return cpu_rq(smp_processor_id())->rd->span; 248} 249#else 250static inline const struct cpumask *sched_rt_period_mask(void) 251{ 252 return cpu_online_mask; 253} 254#endif 255 256static inline 257struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 258{ 259 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 260} 261 262static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 263{ 264 return &rt_rq->tg->rt_bandwidth; 265} 266 267#else /* !CONFIG_RT_GROUP_SCHED */ 268 269static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 270{ 271 return rt_rq->rt_runtime; 272} 273 274static inline u64 sched_rt_period(struct rt_rq *rt_rq) 275{ 276 return ktime_to_ns(def_rt_bandwidth.rt_period); 277} 278 279#define for_each_leaf_rt_rq(rt_rq, rq) \ 280 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 281 282#define for_each_sched_rt_entity(rt_se) \ 283 for (; rt_se; rt_se = NULL) 284 285static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 286{ 287 return NULL; 288} 289 290static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 291{ 292 if (rt_rq->rt_nr_running) 293 resched_task(rq_of_rt_rq(rt_rq)->curr); 294} 295 296static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 297{ 298} 299 300static inline int rt_rq_throttled(struct rt_rq *rt_rq) 301{ 302 return rt_rq->rt_throttled; 303} 304 305static inline const struct cpumask *sched_rt_period_mask(void) 306{ 307 return cpu_online_mask; 308} 309 310static inline 311struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 312{ 313 return &cpu_rq(cpu)->rt; 314} 315 316static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 317{ 318 return &def_rt_bandwidth; 319} 320 321#endif /* CONFIG_RT_GROUP_SCHED */ 322 323#ifdef CONFIG_SMP 324/* 325 * We ran out of runtime, see if we can borrow some from our neighbours. 326 */ 327static int do_balance_runtime(struct rt_rq *rt_rq) 328{ 329 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 330 struct root_domain *rd = cpu_rq(smp_processor_id())->rd; 331 int i, weight, more = 0; 332 u64 rt_period; 333 334 weight = cpumask_weight(rd->span); 335 336 raw_spin_lock(&rt_b->rt_runtime_lock); 337 rt_period = ktime_to_ns(rt_b->rt_period); 338 for_each_cpu(i, rd->span) { 339 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 340 s64 diff; 341 342 if (iter == rt_rq) 343 continue; 344 345 raw_spin_lock(&iter->rt_runtime_lock); 346 /* 347 * Either all rqs have inf runtime and there's nothing to steal 348 * or __disable_runtime() below sets a specific rq to inf to 349 * indicate its been disabled and disalow stealing. 350 */ 351 if (iter->rt_runtime == RUNTIME_INF) 352 goto next; 353 354 /* 355 * From runqueues with spare time, take 1/n part of their 356 * spare time, but no more than our period. 357 */ 358 diff = iter->rt_runtime - iter->rt_time; 359 if (diff > 0) { 360 diff = div_u64((u64)diff, weight); 361 if (rt_rq->rt_runtime + diff > rt_period) 362 diff = rt_period - rt_rq->rt_runtime; 363 iter->rt_runtime -= diff; 364 rt_rq->rt_runtime += diff; 365 more = 1; 366 if (rt_rq->rt_runtime == rt_period) { 367 raw_spin_unlock(&iter->rt_runtime_lock); 368 break; 369 } 370 } 371next: 372 raw_spin_unlock(&iter->rt_runtime_lock); 373 } 374 raw_spin_unlock(&rt_b->rt_runtime_lock); 375 376 return more; 377} 378 379/* 380 * Ensure this RQ takes back all the runtime it lend to its neighbours. 381 */ 382static void __disable_runtime(struct rq *rq) 383{ 384 struct root_domain *rd = rq->rd; 385 struct rt_rq *rt_rq; 386 387 if (unlikely(!scheduler_running)) 388 return; 389 390 for_each_leaf_rt_rq(rt_rq, rq) { 391 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 392 s64 want; 393 int i; 394 395 raw_spin_lock(&rt_b->rt_runtime_lock); 396 raw_spin_lock(&rt_rq->rt_runtime_lock); 397 /* 398 * Either we're all inf and nobody needs to borrow, or we're 399 * already disabled and thus have nothing to do, or we have 400 * exactly the right amount of runtime to take out. 401 */ 402 if (rt_rq->rt_runtime == RUNTIME_INF || 403 rt_rq->rt_runtime == rt_b->rt_runtime) 404 goto balanced; 405 raw_spin_unlock(&rt_rq->rt_runtime_lock); 406 407 /* 408 * Calculate the difference between what we started out with 409 * and what we current have, that's the amount of runtime 410 * we lend and now have to reclaim. 411 */ 412 want = rt_b->rt_runtime - rt_rq->rt_runtime; 413 414 /* 415 * Greedy reclaim, take back as much as we can. 416 */ 417 for_each_cpu(i, rd->span) { 418 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 419 s64 diff; 420 421 /* 422 * Can't reclaim from ourselves or disabled runqueues. 423 */ 424 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 425 continue; 426 427 raw_spin_lock(&iter->rt_runtime_lock); 428 if (want > 0) { 429 diff = min_t(s64, iter->rt_runtime, want); 430 iter->rt_runtime -= diff; 431 want -= diff; 432 } else { 433 iter->rt_runtime -= want; 434 want -= want; 435 } 436 raw_spin_unlock(&iter->rt_runtime_lock); 437 438 if (!want) 439 break; 440 } 441 442 raw_spin_lock(&rt_rq->rt_runtime_lock); 443 /* 444 * We cannot be left wanting - that would mean some runtime 445 * leaked out of the system. 446 */ 447 BUG_ON(want); 448balanced: 449 /* 450 * Disable all the borrow logic by pretending we have inf 451 * runtime - in which case borrowing doesn't make sense. 452 */ 453 rt_rq->rt_runtime = RUNTIME_INF; 454 raw_spin_unlock(&rt_rq->rt_runtime_lock); 455 raw_spin_unlock(&rt_b->rt_runtime_lock); 456 } 457} 458 459static void disable_runtime(struct rq *rq) 460{ 461 unsigned long flags; 462 463 raw_spin_lock_irqsave(&rq->lock, flags); 464 __disable_runtime(rq); 465 raw_spin_unlock_irqrestore(&rq->lock, flags); 466} 467 468static void __enable_runtime(struct rq *rq) 469{ 470 struct rt_rq *rt_rq; 471 472 if (unlikely(!scheduler_running)) 473 return; 474 475 /* 476 * Reset each runqueue's bandwidth settings 477 */ 478 for_each_leaf_rt_rq(rt_rq, rq) { 479 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 480 481 raw_spin_lock(&rt_b->rt_runtime_lock); 482 raw_spin_lock(&rt_rq->rt_runtime_lock); 483 rt_rq->rt_runtime = rt_b->rt_runtime; 484 rt_rq->rt_time = 0; 485 rt_rq->rt_throttled = 0; 486 raw_spin_unlock(&rt_rq->rt_runtime_lock); 487 raw_spin_unlock(&rt_b->rt_runtime_lock); 488 } 489} 490 491static void enable_runtime(struct rq *rq) 492{ 493 unsigned long flags; 494 495 raw_spin_lock_irqsave(&rq->lock, flags); 496 __enable_runtime(rq); 497 raw_spin_unlock_irqrestore(&rq->lock, flags); 498} 499 500static int balance_runtime(struct rt_rq *rt_rq) 501{ 502 int more = 0; 503 504 if (rt_rq->rt_time > rt_rq->rt_runtime) { 505 raw_spin_unlock(&rt_rq->rt_runtime_lock); 506 more = do_balance_runtime(rt_rq); 507 raw_spin_lock(&rt_rq->rt_runtime_lock); 508 } 509 510 return more; 511} 512#else /* !CONFIG_SMP */ 513static inline int balance_runtime(struct rt_rq *rt_rq) 514{ 515 return 0; 516} 517#endif /* CONFIG_SMP */ 518 519static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 520{ 521 int i, idle = 1; 522 const struct cpumask *span; 523 524 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 525 return 1; 526 527 span = sched_rt_period_mask(); 528 for_each_cpu(i, span) { 529 int enqueue = 0; 530 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 531 struct rq *rq = rq_of_rt_rq(rt_rq); 532 533 raw_spin_lock(&rq->lock); 534 if (rt_rq->rt_time) { 535 u64 runtime; 536 537 raw_spin_lock(&rt_rq->rt_runtime_lock); 538 if (rt_rq->rt_throttled) 539 balance_runtime(rt_rq); 540 runtime = rt_rq->rt_runtime; 541 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 542 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 543 rt_rq->rt_throttled = 0; 544 enqueue = 1; 545 } 546 if (rt_rq->rt_time || rt_rq->rt_nr_running) 547 idle = 0; 548 raw_spin_unlock(&rt_rq->rt_runtime_lock); 549 } else if (rt_rq->rt_nr_running) 550 idle = 0; 551 552 if (enqueue) 553 sched_rt_rq_enqueue(rt_rq); 554 raw_spin_unlock(&rq->lock); 555 } 556 557 return idle; 558} 559 560static inline int rt_se_prio(struct sched_rt_entity *rt_se) 561{ 562#ifdef CONFIG_RT_GROUP_SCHED 563 struct rt_rq *rt_rq = group_rt_rq(rt_se); 564 565 if (rt_rq) 566 return rt_rq->highest_prio.curr; 567#endif 568 569 return rt_task_of(rt_se)->prio; 570} 571 572static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 573{ 574 u64 runtime = sched_rt_runtime(rt_rq); 575 576 if (rt_rq->rt_throttled) 577 return rt_rq_throttled(rt_rq); 578 579 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) 580 return 0; 581 582 balance_runtime(rt_rq); 583 runtime = sched_rt_runtime(rt_rq); 584 if (runtime == RUNTIME_INF) 585 return 0; 586 587 if (rt_rq->rt_time > runtime) { 588 rt_rq->rt_throttled = 1; 589 if (rt_rq_throttled(rt_rq)) { 590 sched_rt_rq_dequeue(rt_rq); 591 return 1; 592 } 593 } 594 595 return 0; 596} 597 598/* 599 * Update the current task's runtime statistics. Skip current tasks that 600 * are not in our scheduling class. 601 */ 602static void update_curr_rt(struct rq *rq) 603{ 604 struct task_struct *curr = rq->curr; 605 struct sched_rt_entity *rt_se = &curr->rt; 606 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 607 u64 delta_exec; 608 609 if (!task_has_rt_policy(curr)) 610 return; 611 612 delta_exec = rq->clock - curr->se.exec_start; 613 if (unlikely((s64)delta_exec < 0)) 614 delta_exec = 0; 615 616 schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); 617 618 curr->se.sum_exec_runtime += delta_exec; 619 account_group_exec_runtime(curr, delta_exec); 620 621 curr->se.exec_start = rq->clock; 622 cpuacct_charge(curr, delta_exec); 623 624 sched_rt_avg_update(rq, delta_exec); 625 626 if (!rt_bandwidth_enabled()) 627 return; 628 629 for_each_sched_rt_entity(rt_se) { 630 rt_rq = rt_rq_of_se(rt_se); 631 632 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 633 raw_spin_lock(&rt_rq->rt_runtime_lock); 634 rt_rq->rt_time += delta_exec; 635 if (sched_rt_runtime_exceeded(rt_rq)) 636 resched_task(curr); 637 raw_spin_unlock(&rt_rq->rt_runtime_lock); 638 } 639 } 640} 641 642#if defined CONFIG_SMP 643 644static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); 645 646static inline int next_prio(struct rq *rq) 647{ 648 struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu); 649 650 if (next && rt_prio(next->prio)) 651 return next->prio; 652 else 653 return MAX_RT_PRIO; 654} 655 656static void 657inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 658{ 659 struct rq *rq = rq_of_rt_rq(rt_rq); 660 661 if (prio < prev_prio) { 662 663 /* 664 * If the new task is higher in priority than anything on the 665 * run-queue, we know that the previous high becomes our 666 * next-highest. 667 */ 668 rt_rq->highest_prio.next = prev_prio; 669 670 if (rq->online) 671 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 672 673 } else if (prio == rt_rq->highest_prio.curr) 674 /* 675 * If the next task is equal in priority to the highest on 676 * the run-queue, then we implicitly know that the next highest 677 * task cannot be any lower than current 678 */ 679 rt_rq->highest_prio.next = prio; 680 else if (prio < rt_rq->highest_prio.next) 681 /* 682 * Otherwise, we need to recompute next-highest 683 */ 684 rt_rq->highest_prio.next = next_prio(rq); 685} 686 687static void 688dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 689{ 690 struct rq *rq = rq_of_rt_rq(rt_rq); 691 692 if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next)) 693 rt_rq->highest_prio.next = next_prio(rq); 694 695 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 696 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 697} 698 699#else /* CONFIG_SMP */ 700 701static inline 702void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 703static inline 704void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 705 706#endif /* CONFIG_SMP */ 707 708#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 709static void 710inc_rt_prio(struct rt_rq *rt_rq, int prio) 711{ 712 int prev_prio = rt_rq->highest_prio.curr; 713 714 if (prio < prev_prio) 715 rt_rq->highest_prio.curr = prio; 716 717 inc_rt_prio_smp(rt_rq, prio, prev_prio); 718} 719 720static void 721dec_rt_prio(struct rt_rq *rt_rq, int prio) 722{ 723 int prev_prio = rt_rq->highest_prio.curr; 724 725 if (rt_rq->rt_nr_running) { 726 727 WARN_ON(prio < prev_prio); 728 729 /* 730 * This may have been our highest task, and therefore 731 * we may have some recomputation to do 732 */ 733 if (prio == prev_prio) { 734 struct rt_prio_array *array = &rt_rq->active; 735 736 rt_rq->highest_prio.curr = 737 sched_find_first_bit(array->bitmap); 738 } 739 740 } else 741 rt_rq->highest_prio.curr = MAX_RT_PRIO; 742 743 dec_rt_prio_smp(rt_rq, prio, prev_prio); 744} 745 746#else 747 748static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 749static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 750 751#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 752 753#ifdef CONFIG_RT_GROUP_SCHED 754 755static void 756inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 757{ 758 if (rt_se_boosted(rt_se)) 759 rt_rq->rt_nr_boosted++; 760 761 if (rt_rq->tg) 762 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 763} 764 765static void 766dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 767{ 768 if (rt_se_boosted(rt_se)) 769 rt_rq->rt_nr_boosted--; 770 771 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 772} 773 774#else /* CONFIG_RT_GROUP_SCHED */ 775 776static void 777inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 778{ 779 start_rt_bandwidth(&def_rt_bandwidth); 780} 781 782static inline 783void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 784 785#endif /* CONFIG_RT_GROUP_SCHED */ 786 787static inline 788void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 789{ 790 int prio = rt_se_prio(rt_se); 791 792 WARN_ON(!rt_prio(prio)); 793 rt_rq->rt_nr_running++; 794 795 inc_rt_prio(rt_rq, prio); 796 inc_rt_migration(rt_se, rt_rq); 797 inc_rt_group(rt_se, rt_rq); 798} 799 800static inline 801void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 802{ 803 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 804 WARN_ON(!rt_rq->rt_nr_running); 805 rt_rq->rt_nr_running--; 806 807 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 808 dec_rt_migration(rt_se, rt_rq); 809 dec_rt_group(rt_se, rt_rq); 810} 811 812static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 813{ 814 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 815 struct rt_prio_array *array = &rt_rq->active; 816 struct rt_rq *group_rq = group_rt_rq(rt_se); 817 struct list_head *queue = array->queue + rt_se_prio(rt_se); 818 819 /* 820 * Don't enqueue the group if its throttled, or when empty. 821 * The latter is a consequence of the former when a child group 822 * get throttled and the current group doesn't have any other 823 * active members. 824 */ 825 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 826 return; 827 828 if (head) 829 list_add(&rt_se->run_list, queue); 830 else 831 list_add_tail(&rt_se->run_list, queue); 832 __set_bit(rt_se_prio(rt_se), array->bitmap); 833 834 inc_rt_tasks(rt_se, rt_rq); 835} 836 837static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 838{ 839 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 840 struct rt_prio_array *array = &rt_rq->active; 841 842 list_del_init(&rt_se->run_list); 843 if (list_empty(array->queue + rt_se_prio(rt_se))) 844 __clear_bit(rt_se_prio(rt_se), array->bitmap); 845 846 dec_rt_tasks(rt_se, rt_rq); 847} 848 849/* 850 * Because the prio of an upper entry depends on the lower 851 * entries, we must remove entries top - down. 852 */ 853static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 854{ 855 struct sched_rt_entity *back = NULL; 856 857 for_each_sched_rt_entity(rt_se) { 858 rt_se->back = back; 859 back = rt_se; 860 } 861 862 for (rt_se = back; rt_se; rt_se = rt_se->back) { 863 if (on_rt_rq(rt_se)) 864 __dequeue_rt_entity(rt_se); 865 } 866} 867 868static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 869{ 870 dequeue_rt_stack(rt_se); 871 for_each_sched_rt_entity(rt_se) 872 __enqueue_rt_entity(rt_se, head); 873} 874 875static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 876{ 877 dequeue_rt_stack(rt_se); 878 879 for_each_sched_rt_entity(rt_se) { 880 struct rt_rq *rt_rq = group_rt_rq(rt_se); 881 882 if (rt_rq && rt_rq->rt_nr_running) 883 __enqueue_rt_entity(rt_se, false); 884 } 885} 886 887/* 888 * Adding/removing a task to/from a priority array: 889 */ 890static void 891enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) 892{ 893 struct sched_rt_entity *rt_se = &p->rt; 894 895 if (flags & ENQUEUE_WAKEUP) 896 rt_se->timeout = 0; 897 898 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 899 900 if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) 901 enqueue_pushable_task(rq, p); 902} 903 904static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) 905{ 906 struct sched_rt_entity *rt_se = &p->rt; 907 908 update_curr_rt(rq); 909 dequeue_rt_entity(rt_se); 910 911 dequeue_pushable_task(rq, p); 912} 913 914/* 915 * Put task to the end of the run list without the overhead of dequeue 916 * followed by enqueue. 917 */ 918static void 919requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 920{ 921 if (on_rt_rq(rt_se)) { 922 struct rt_prio_array *array = &rt_rq->active; 923 struct list_head *queue = array->queue + rt_se_prio(rt_se); 924 925 if (head) 926 list_move(&rt_se->run_list, queue); 927 else 928 list_move_tail(&rt_se->run_list, queue); 929 } 930} 931 932static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 933{ 934 struct sched_rt_entity *rt_se = &p->rt; 935 struct rt_rq *rt_rq; 936 937 for_each_sched_rt_entity(rt_se) { 938 rt_rq = rt_rq_of_se(rt_se); 939 requeue_rt_entity(rt_rq, rt_se, head); 940 } 941} 942 943static void yield_task_rt(struct rq *rq) 944{ 945 requeue_task_rt(rq, rq->curr, 0); 946} 947 948#ifdef CONFIG_SMP 949static int find_lowest_rq(struct task_struct *task); 950 951static int 952select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) 953{ 954 if (sd_flag != SD_BALANCE_WAKE) 955 return smp_processor_id(); 956 957 /* 958 * If the current task is an RT task, then 959 * try to see if we can wake this RT task up on another 960 * runqueue. Otherwise simply start this RT task 961 * on its current runqueue. 962 * 963 * We want to avoid overloading runqueues. Even if 964 * the RT task is of higher priority than the current RT task. 965 * RT tasks behave differently than other tasks. If 966 * one gets preempted, we try to push it off to another queue. 967 * So trying to keep a preempting RT task on the same 968 * cache hot CPU will force the running RT task to 969 * a cold CPU. So we waste all the cache for the lower 970 * RT task in hopes of saving some of a RT task 971 * that is just being woken and probably will have 972 * cold cache anyway. 973 */ 974 if (unlikely(rt_task(rq->curr)) && 975 (p->rt.nr_cpus_allowed > 1)) { 976 int cpu = find_lowest_rq(p); 977 978 return (cpu == -1) ? task_cpu(p) : cpu; 979 } 980 981 /* 982 * Otherwise, just let it ride on the affined RQ and the 983 * post-schedule router will push the preempted task away 984 */ 985 return task_cpu(p); 986} 987 988static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 989{ 990 if (rq->curr->rt.nr_cpus_allowed == 1) 991 return; 992 993 if (p->rt.nr_cpus_allowed != 1 994 && cpupri_find(&rq->rd->cpupri, p, NULL)) 995 return; 996 997 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 998 return; 999 1000 /* 1001 * There appears to be other cpus that can accept 1002 * current and none to run 'p', so lets reschedule 1003 * to try and push current away: 1004 */ 1005 requeue_task_rt(rq, p, 1); 1006 resched_task(rq->curr); 1007} 1008 1009#endif /* CONFIG_SMP */ 1010 1011/* 1012 * Preempt the current task with a newly woken task if needed: 1013 */ 1014static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1015{ 1016 if (p->prio < rq->curr->prio) { 1017 resched_task(rq->curr); 1018 return; 1019 } 1020 1021#ifdef CONFIG_SMP 1022 /* 1023 * If: 1024 * 1025 * - the newly woken task is of equal priority to the current task 1026 * - the newly woken task is non-migratable while current is migratable 1027 * - current will be preempted on the next reschedule 1028 * 1029 * we should check to see if current can readily move to a different 1030 * cpu. If so, we will reschedule to allow the push logic to try 1031 * to move current somewhere else, making room for our non-migratable 1032 * task. 1033 */ 1034 if (p->prio == rq->curr->prio && !need_resched()) 1035 check_preempt_equal_prio(rq, p); 1036#endif 1037} 1038 1039static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1040 struct rt_rq *rt_rq) 1041{ 1042 struct rt_prio_array *array = &rt_rq->active; 1043 struct sched_rt_entity *next = NULL; 1044 struct list_head *queue; 1045 int idx; 1046 1047 idx = sched_find_first_bit(array->bitmap); 1048 BUG_ON(idx >= MAX_RT_PRIO); 1049 1050 queue = array->queue + idx; 1051 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1052 1053 return next; 1054} 1055 1056static struct task_struct *_pick_next_task_rt(struct rq *rq) 1057{ 1058 struct sched_rt_entity *rt_se; 1059 struct task_struct *p; 1060 struct rt_rq *rt_rq; 1061 1062 rt_rq = &rq->rt; 1063 1064 if (unlikely(!rt_rq->rt_nr_running)) 1065 return NULL; 1066 1067 if (rt_rq_throttled(rt_rq)) 1068 return NULL; 1069 1070 do { 1071 rt_se = pick_next_rt_entity(rq, rt_rq); 1072 BUG_ON(!rt_se); 1073 rt_rq = group_rt_rq(rt_se); 1074 } while (rt_rq); 1075 1076 p = rt_task_of(rt_se); 1077 p->se.exec_start = rq->clock; 1078 1079 return p; 1080} 1081 1082static struct task_struct *pick_next_task_rt(struct rq *rq) 1083{ 1084 struct task_struct *p = _pick_next_task_rt(rq); 1085 1086 /* The running task is never eligible for pushing */ 1087 if (p) 1088 dequeue_pushable_task(rq, p); 1089 1090#ifdef CONFIG_SMP 1091 /* 1092 * We detect this state here so that we can avoid taking the RQ 1093 * lock again later if there is no need to push 1094 */ 1095 rq->post_schedule = has_pushable_tasks(rq); 1096#endif 1097 1098 return p; 1099} 1100 1101static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1102{ 1103 update_curr_rt(rq); 1104 p->se.exec_start = 0; 1105 1106 /* 1107 * The previous task needs to be made eligible for pushing 1108 * if it is still active 1109 */ 1110 if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) 1111 enqueue_pushable_task(rq, p); 1112} 1113 1114#ifdef CONFIG_SMP 1115 1116/* Only try algorithms three times */ 1117#define RT_MAX_TRIES 3 1118 1119static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); 1120 1121static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1122{ 1123 if (!task_running(rq, p) && 1124 (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && 1125 (p->rt.nr_cpus_allowed > 1)) 1126 return 1; 1127 return 0; 1128} 1129 1130/* Return the second highest RT task, NULL otherwise */ 1131static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) 1132{ 1133 struct task_struct *next = NULL; 1134 struct sched_rt_entity *rt_se; 1135 struct rt_prio_array *array; 1136 struct rt_rq *rt_rq; 1137 int idx; 1138 1139 for_each_leaf_rt_rq(rt_rq, rq) { 1140 array = &rt_rq->active; 1141 idx = sched_find_first_bit(array->bitmap); 1142 next_idx: 1143 if (idx >= MAX_RT_PRIO) 1144 continue; 1145 if (next && next->prio < idx) 1146 continue; 1147 list_for_each_entry(rt_se, array->queue + idx, run_list) { 1148 struct task_struct *p; 1149 1150 if (!rt_entity_is_task(rt_se)) 1151 continue; 1152 1153 p = rt_task_of(rt_se); 1154 if (pick_rt_task(rq, p, cpu)) { 1155 next = p; 1156 break; 1157 } 1158 } 1159 if (!next) { 1160 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); 1161 goto next_idx; 1162 } 1163 } 1164 1165 return next; 1166} 1167 1168static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1169 1170static int find_lowest_rq(struct task_struct *task) 1171{ 1172 struct sched_domain *sd; 1173 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); 1174 int this_cpu = smp_processor_id(); 1175 int cpu = task_cpu(task); 1176 1177 if (task->rt.nr_cpus_allowed == 1) 1178 return -1; /* No other targets possible */ 1179 1180 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1181 return -1; /* No targets found */ 1182 1183 /* 1184 * At this point we have built a mask of cpus representing the 1185 * lowest priority tasks in the system. Now we want to elect 1186 * the best one based on our affinity and topology. 1187 * 1188 * We prioritize the last cpu that the task executed on since 1189 * it is most likely cache-hot in that location. 1190 */ 1191 if (cpumask_test_cpu(cpu, lowest_mask)) 1192 return cpu; 1193 1194 /* 1195 * Otherwise, we consult the sched_domains span maps to figure 1196 * out which cpu is logically closest to our hot cache data. 1197 */ 1198 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1199 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1200 1201 for_each_domain(cpu, sd) { 1202 if (sd->flags & SD_WAKE_AFFINE) { 1203 int best_cpu; 1204 1205 /* 1206 * "this_cpu" is cheaper to preempt than a 1207 * remote processor. 1208 */ 1209 if (this_cpu != -1 && 1210 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) 1211 return this_cpu; 1212 1213 best_cpu = cpumask_first_and(lowest_mask, 1214 sched_domain_span(sd)); 1215 if (best_cpu < nr_cpu_ids) 1216 return best_cpu; 1217 } 1218 } 1219 1220 /* 1221 * And finally, if there were no matches within the domains 1222 * just give the caller *something* to work with from the compatible 1223 * locations. 1224 */ 1225 if (this_cpu != -1) 1226 return this_cpu; 1227 1228 cpu = cpumask_any(lowest_mask); 1229 if (cpu < nr_cpu_ids) 1230 return cpu; 1231 return -1; 1232} 1233 1234/* Will lock the rq it finds */ 1235static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1236{ 1237 struct rq *lowest_rq = NULL; 1238 int tries; 1239 int cpu; 1240 1241 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1242 cpu = find_lowest_rq(task); 1243 1244 if ((cpu == -1) || (cpu == rq->cpu)) 1245 break; 1246 1247 lowest_rq = cpu_rq(cpu); 1248 1249 /* if the prio of this runqueue changed, try again */ 1250 if (double_lock_balance(rq, lowest_rq)) { 1251 /* 1252 * We had to unlock the run queue. In 1253 * the mean time, task could have 1254 * migrated already or had its affinity changed. 1255 * Also make sure that it wasn't scheduled on its rq. 1256 */ 1257 if (unlikely(task_rq(task) != rq || 1258 !cpumask_test_cpu(lowest_rq->cpu, 1259 &task->cpus_allowed) || 1260 task_running(rq, task) || 1261 !task->se.on_rq)) { 1262 1263 raw_spin_unlock(&lowest_rq->lock); 1264 lowest_rq = NULL; 1265 break; 1266 } 1267 } 1268 1269 /* If this rq is still suitable use it. */ 1270 if (lowest_rq->rt.highest_prio.curr > task->prio) 1271 break; 1272 1273 /* try again */ 1274 double_unlock_balance(rq, lowest_rq); 1275 lowest_rq = NULL; 1276 } 1277 1278 return lowest_rq; 1279} 1280 1281static struct task_struct *pick_next_pushable_task(struct rq *rq) 1282{ 1283 struct task_struct *p; 1284 1285 if (!has_pushable_tasks(rq)) 1286 return NULL; 1287 1288 p = plist_first_entry(&rq->rt.pushable_tasks, 1289 struct task_struct, pushable_tasks); 1290 1291 BUG_ON(rq->cpu != task_cpu(p)); 1292 BUG_ON(task_current(rq, p)); 1293 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1294 1295 BUG_ON(!p->se.on_rq); 1296 BUG_ON(!rt_task(p)); 1297 1298 return p; 1299} 1300 1301/* 1302 * If the current CPU has more than one RT task, see if the non 1303 * running task can migrate over to a CPU that is running a task 1304 * of lesser priority. 1305 */ 1306static int push_rt_task(struct rq *rq) 1307{ 1308 struct task_struct *next_task; 1309 struct rq *lowest_rq; 1310 1311 if (!rq->rt.overloaded) 1312 return 0; 1313 1314 next_task = pick_next_pushable_task(rq); 1315 if (!next_task) 1316 return 0; 1317 1318 retry: 1319 if (unlikely(next_task == rq->curr)) { 1320 WARN_ON(1); 1321 return 0; 1322 } 1323 1324 /* 1325 * It's possible that the next_task slipped in of 1326 * higher priority than current. If that's the case 1327 * just reschedule current. 1328 */ 1329 if (unlikely(next_task->prio < rq->curr->prio)) { 1330 resched_task(rq->curr); 1331 return 0; 1332 } 1333 1334 /* We might release rq lock */ 1335 get_task_struct(next_task); 1336 1337 /* find_lock_lowest_rq locks the rq if found */ 1338 lowest_rq = find_lock_lowest_rq(next_task, rq); 1339 if (!lowest_rq) { 1340 struct task_struct *task; 1341 /* 1342 * find lock_lowest_rq releases rq->lock 1343 * so it is possible that next_task has migrated. 1344 * 1345 * We need to make sure that the task is still on the same 1346 * run-queue and is also still the next task eligible for 1347 * pushing. 1348 */ 1349 task = pick_next_pushable_task(rq); 1350 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1351 /* 1352 * If we get here, the task hasnt moved at all, but 1353 * it has failed to push. We will not try again, 1354 * since the other cpus will pull from us when they 1355 * are ready. 1356 */ 1357 dequeue_pushable_task(rq, next_task); 1358 goto out; 1359 } 1360 1361 if (!task) 1362 /* No more tasks, just exit */ 1363 goto out; 1364 1365 /* 1366 * Something has shifted, try again. 1367 */ 1368 put_task_struct(next_task); 1369 next_task = task; 1370 goto retry; 1371 } 1372 1373 deactivate_task(rq, next_task, 0); 1374 set_task_cpu(next_task, lowest_rq->cpu); 1375 activate_task(lowest_rq, next_task, 0); 1376 1377 resched_task(lowest_rq->curr); 1378 1379 double_unlock_balance(rq, lowest_rq); 1380 1381out: 1382 put_task_struct(next_task); 1383 1384 return 1; 1385} 1386 1387static void push_rt_tasks(struct rq *rq) 1388{ 1389 /* push_rt_task will return true if it moved an RT */ 1390 while (push_rt_task(rq)) 1391 ; 1392} 1393 1394static int pull_rt_task(struct rq *this_rq) 1395{ 1396 int this_cpu = this_rq->cpu, ret = 0, cpu; 1397 struct task_struct *p; 1398 struct rq *src_rq; 1399 1400 if (likely(!rt_overloaded(this_rq))) 1401 return 0; 1402 1403 for_each_cpu(cpu, this_rq->rd->rto_mask) { 1404 if (this_cpu == cpu) 1405 continue; 1406 1407 src_rq = cpu_rq(cpu); 1408 1409 /* 1410 * Don't bother taking the src_rq->lock if the next highest 1411 * task is known to be lower-priority than our current task. 1412 * This may look racy, but if this value is about to go 1413 * logically higher, the src_rq will push this task away. 1414 * And if its going logically lower, we do not care 1415 */ 1416 if (src_rq->rt.highest_prio.next >= 1417 this_rq->rt.highest_prio.curr) 1418 continue; 1419 1420 /* 1421 * We can potentially drop this_rq's lock in 1422 * double_lock_balance, and another CPU could 1423 * alter this_rq 1424 */ 1425 double_lock_balance(this_rq, src_rq); 1426 1427 /* 1428 * Are there still pullable RT tasks? 1429 */ 1430 if (src_rq->rt.rt_nr_running <= 1) 1431 goto skip; 1432 1433 p = pick_next_highest_task_rt(src_rq, this_cpu); 1434 1435 /* 1436 * Do we have an RT task that preempts 1437 * the to-be-scheduled task? 1438 */ 1439 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 1440 WARN_ON(p == src_rq->curr); 1441 WARN_ON(!p->se.on_rq); 1442 1443 /* 1444 * There's a chance that p is higher in priority 1445 * than what's currently running on its cpu. 1446 * This is just that p is wakeing up and hasn't 1447 * had a chance to schedule. We only pull 1448 * p if it is lower in priority than the 1449 * current task on the run queue 1450 */ 1451 if (p->prio < src_rq->curr->prio) 1452 goto skip; 1453 1454 ret = 1; 1455 1456 deactivate_task(src_rq, p, 0); 1457 set_task_cpu(p, this_cpu); 1458 activate_task(this_rq, p, 0); 1459 /* 1460 * We continue with the search, just in 1461 * case there's an even higher prio task 1462 * in another runqueue. (low likelyhood 1463 * but possible) 1464 */ 1465 } 1466 skip: 1467 double_unlock_balance(this_rq, src_rq); 1468 } 1469 1470 return ret; 1471} 1472 1473static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) 1474{ 1475 /* Try to pull RT tasks here if we lower this rq's prio */ 1476 if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) 1477 pull_rt_task(rq); 1478} 1479 1480static void post_schedule_rt(struct rq *rq) 1481{ 1482 push_rt_tasks(rq); 1483} 1484 1485/* 1486 * If we are not running and we are not going to reschedule soon, we should 1487 * try to push tasks away now 1488 */ 1489static void task_woken_rt(struct rq *rq, struct task_struct *p) 1490{ 1491 if (!task_running(rq, p) && 1492 !test_tsk_need_resched(rq->curr) && 1493 has_pushable_tasks(rq) && 1494 p->rt.nr_cpus_allowed > 1) 1495 push_rt_tasks(rq); 1496} 1497 1498static void set_cpus_allowed_rt(struct task_struct *p, 1499 const struct cpumask *new_mask) 1500{ 1501 int weight = cpumask_weight(new_mask); 1502 1503 BUG_ON(!rt_task(p)); 1504 1505 /* 1506 * Update the migration status of the RQ if we have an RT task 1507 * which is running AND changing its weight value. 1508 */ 1509 if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { 1510 struct rq *rq = task_rq(p); 1511 1512 if (!task_current(rq, p)) { 1513 /* 1514 * Make sure we dequeue this task from the pushable list 1515 * before going further. It will either remain off of 1516 * the list because we are no longer pushable, or it 1517 * will be requeued. 1518 */ 1519 if (p->rt.nr_cpus_allowed > 1) 1520 dequeue_pushable_task(rq, p); 1521 1522 /* 1523 * Requeue if our weight is changing and still > 1 1524 */ 1525 if (weight > 1) 1526 enqueue_pushable_task(rq, p); 1527 1528 } 1529 1530 if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { 1531 rq->rt.rt_nr_migratory++; 1532 } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { 1533 BUG_ON(!rq->rt.rt_nr_migratory); 1534 rq->rt.rt_nr_migratory--; 1535 } 1536 1537 update_rt_migration(&rq->rt); 1538 } 1539 1540 cpumask_copy(&p->cpus_allowed, new_mask); 1541 p->rt.nr_cpus_allowed = weight; 1542} 1543 1544/* Assumes rq->lock is held */ 1545static void rq_online_rt(struct rq *rq) 1546{ 1547 if (rq->rt.overloaded) 1548 rt_set_overload(rq); 1549 1550 __enable_runtime(rq); 1551 1552 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 1553} 1554 1555/* Assumes rq->lock is held */ 1556static void rq_offline_rt(struct rq *rq) 1557{ 1558 if (rq->rt.overloaded) 1559 rt_clear_overload(rq); 1560 1561 __disable_runtime(rq); 1562 1563 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 1564} 1565 1566/* 1567 * When switch from the rt queue, we bring ourselves to a position 1568 * that we might want to pull RT tasks from other runqueues. 1569 */ 1570static void switched_from_rt(struct rq *rq, struct task_struct *p, 1571 int running) 1572{ 1573 /* 1574 * If there are other RT tasks then we will reschedule 1575 * and the scheduling of the other RT tasks will handle 1576 * the balancing. But if we are the last RT task 1577 * we may need to handle the pulling of RT tasks 1578 * now. 1579 */ 1580 if (!rq->rt.rt_nr_running) 1581 pull_rt_task(rq); 1582} 1583 1584static inline void init_sched_rt_class(void) 1585{ 1586 unsigned int i; 1587 1588 for_each_possible_cpu(i) 1589 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 1590 GFP_KERNEL, cpu_to_node(i)); 1591} 1592#endif /* CONFIG_SMP */ 1593 1594/* 1595 * When switching a task to RT, we may overload the runqueue 1596 * with RT tasks. In this case we try to push them off to 1597 * other runqueues. 1598 */ 1599static void switched_to_rt(struct rq *rq, struct task_struct *p, 1600 int running) 1601{ 1602 int check_resched = 1; 1603 1604 /* 1605 * If we are already running, then there's nothing 1606 * that needs to be done. But if we are not running 1607 * we may need to preempt the current running task. 1608 * If that current running task is also an RT task 1609 * then see if we can move to another run queue. 1610 */ 1611 if (!running) { 1612#ifdef CONFIG_SMP 1613 if (rq->rt.overloaded && push_rt_task(rq) && 1614 /* Don't resched if we changed runqueues */ 1615 rq != task_rq(p)) 1616 check_resched = 0; 1617#endif /* CONFIG_SMP */ 1618 if (check_resched && p->prio < rq->curr->prio) 1619 resched_task(rq->curr); 1620 } 1621} 1622 1623/* 1624 * Priority of the task has changed. This may cause 1625 * us to initiate a push or pull. 1626 */ 1627static void prio_changed_rt(struct rq *rq, struct task_struct *p, 1628 int oldprio, int running) 1629{ 1630 if (running) { 1631#ifdef CONFIG_SMP 1632 /* 1633 * If our priority decreases while running, we 1634 * may need to pull tasks to this runqueue. 1635 */ 1636 if (oldprio < p->prio) 1637 pull_rt_task(rq); 1638 /* 1639 * If there's a higher priority task waiting to run 1640 * then reschedule. Note, the above pull_rt_task 1641 * can release the rq lock and p could migrate. 1642 * Only reschedule if p is still on the same runqueue. 1643 */ 1644 if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) 1645 resched_task(p); 1646#else 1647 /* For UP simply resched on drop of prio */ 1648 if (oldprio < p->prio) 1649 resched_task(p); 1650#endif /* CONFIG_SMP */ 1651 } else { 1652 /* 1653 * This task is not running, but if it is 1654 * greater than the current running task 1655 * then reschedule. 1656 */ 1657 if (p->prio < rq->curr->prio) 1658 resched_task(rq->curr); 1659 } 1660} 1661 1662static void watchdog(struct rq *rq, struct task_struct *p) 1663{ 1664 unsigned long soft, hard; 1665 1666 /* max may change after cur was read, this will be fixed next tick */ 1667 soft = task_rlimit(p, RLIMIT_RTTIME); 1668 hard = task_rlimit_max(p, RLIMIT_RTTIME); 1669 1670 if (soft != RLIM_INFINITY) { 1671 unsigned long next; 1672 1673 p->rt.timeout++; 1674 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 1675 if (p->rt.timeout > next) 1676 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 1677 } 1678} 1679 1680static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 1681{ 1682 update_curr_rt(rq); 1683 1684 watchdog(rq, p); 1685 1686 /* 1687 * RR tasks need a special form of timeslice management. 1688 * FIFO tasks have no timeslices. 1689 */ 1690 if (p->policy != SCHED_RR) 1691 return; 1692 1693 if (--p->rt.time_slice) 1694 return; 1695 1696 p->rt.time_slice = DEF_TIMESLICE; 1697 1698 /* 1699 * Requeue to the end of queue if we are not the only element 1700 * on the queue: 1701 */ 1702 if (p->rt.run_list.prev != p->rt.run_list.next) { 1703 requeue_task_rt(rq, p, 0); 1704 set_tsk_need_resched(p); 1705 } 1706} 1707 1708static void set_curr_task_rt(struct rq *rq) 1709{ 1710 struct task_struct *p = rq->curr; 1711 1712 p->se.exec_start = rq->clock; 1713 1714 /* The running task is never eligible for pushing */ 1715 dequeue_pushable_task(rq, p); 1716} 1717 1718static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) 1719{ 1720 /* 1721 * Time slice is 0 for SCHED_FIFO tasks 1722 */ 1723 if (task->policy == SCHED_RR) 1724 return DEF_TIMESLICE; 1725 else 1726 return 0; 1727} 1728 1729static const struct sched_class rt_sched_class = { 1730 .next = &fair_sched_class, 1731 .enqueue_task = enqueue_task_rt, 1732 .dequeue_task = dequeue_task_rt, 1733 .yield_task = yield_task_rt, 1734 1735 .check_preempt_curr = check_preempt_curr_rt, 1736 1737 .pick_next_task = pick_next_task_rt, 1738 .put_prev_task = put_prev_task_rt, 1739 1740#ifdef CONFIG_SMP 1741 .select_task_rq = select_task_rq_rt, 1742 1743 .set_cpus_allowed = set_cpus_allowed_rt, 1744 .rq_online = rq_online_rt, 1745 .rq_offline = rq_offline_rt, 1746 .pre_schedule = pre_schedule_rt, 1747 .post_schedule = post_schedule_rt, 1748 .task_woken = task_woken_rt, 1749 .switched_from = switched_from_rt, 1750#endif 1751 1752 .set_curr_task = set_curr_task_rt, 1753 .task_tick = task_tick_rt, 1754 1755 .get_rr_interval = get_rr_interval_rt, 1756 1757 .prio_changed = prio_changed_rt, 1758 .switched_to = switched_to_rt, 1759}; 1760 1761#ifdef CONFIG_SCHED_DEBUG 1762extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); 1763 1764static void print_rt_stats(struct seq_file *m, int cpu) 1765{ 1766 struct rt_rq *rt_rq; 1767 1768 rcu_read_lock(); 1769 for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) 1770 print_rt_rq(m, cpu, rt_rq); 1771 rcu_read_unlock(); 1772} 1773#endif /* CONFIG_SCHED_DEBUG */ 1774