1/* 2 * linux/kernel/timer.c 3 * 4 * Kernel internal timers, basic process system calls 5 * 6 * Copyright (C) 1991, 1992 Linus Torvalds 7 * 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 9 * 10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 11 * "A Kernel Model for Precision Timekeeping" by Dave Mills 12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 13 * serialize accesses to xtime/lost_ticks). 14 * Copyright (C) 1998 Andrea Arcangeli 15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar 19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 20 */ 21 22#include <linux/kernel_stat.h> 23#include <linux/module.h> 24#include <linux/interrupt.h> 25#include <linux/percpu.h> 26#include <linux/init.h> 27#include <linux/mm.h> 28#include <linux/swap.h> 29#include <linux/pid_namespace.h> 30#include <linux/notifier.h> 31#include <linux/thread_info.h> 32#include <linux/time.h> 33#include <linux/jiffies.h> 34#include <linux/posix-timers.h> 35#include <linux/cpu.h> 36#include <linux/syscalls.h> 37#include <linux/delay.h> 38#include <linux/tick.h> 39#include <linux/kallsyms.h> 40#include <linux/perf_event.h> 41#include <linux/sched.h> 42#include <linux/slab.h> 43 44#include <asm/uaccess.h> 45#include <asm/unistd.h> 46#include <asm/div64.h> 47#include <asm/timex.h> 48#include <asm/io.h> 49 50#define CREATE_TRACE_POINTS 51#include <trace/events/timer.h> 52 53u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 54 55EXPORT_SYMBOL(jiffies_64); 56 57/* 58 * per-CPU timer vector definitions: 59 */ 60#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 61#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 62#define TVN_SIZE (1 << TVN_BITS) 63#define TVR_SIZE (1 << TVR_BITS) 64#define TVN_MASK (TVN_SIZE - 1) 65#define TVR_MASK (TVR_SIZE - 1) 66 67struct tvec { 68 struct list_head vec[TVN_SIZE]; 69}; 70 71struct tvec_root { 72 struct list_head vec[TVR_SIZE]; 73}; 74 75struct tvec_base { 76 spinlock_t lock; 77 struct timer_list *running_timer; 78 unsigned long timer_jiffies; 79 unsigned long next_timer; 80 struct tvec_root tv1; 81 struct tvec tv2; 82 struct tvec tv3; 83 struct tvec tv4; 84 struct tvec tv5; 85} ____cacheline_aligned; 86 87struct tvec_base boot_tvec_bases; 88EXPORT_SYMBOL(boot_tvec_bases); 89static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 90 91/* 92 * Note that all tvec_bases are 2 byte aligned and lower bit of 93 * base in timer_list is guaranteed to be zero. Use the LSB to 94 * indicate whether the timer is deferrable. 95 * 96 * A deferrable timer will work normally when the system is busy, but 97 * will not cause a CPU to come out of idle just to service it; instead, 98 * the timer will be serviced when the CPU eventually wakes up with a 99 * subsequent non-deferrable timer. 100 */ 101#define TBASE_DEFERRABLE_FLAG (0x1) 102 103/* Functions below help us manage 'deferrable' flag */ 104static inline unsigned int tbase_get_deferrable(struct tvec_base *base) 105{ 106 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); 107} 108 109static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 110{ 111 return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); 112} 113 114static inline void timer_set_deferrable(struct timer_list *timer) 115{ 116 timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | 117 TBASE_DEFERRABLE_FLAG)); 118} 119 120static inline void 121timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 122{ 123 timer->base = (struct tvec_base *)((unsigned long)(new_base) | 124 tbase_get_deferrable(timer->base)); 125} 126 127static unsigned long round_jiffies_common(unsigned long j, int cpu, 128 bool force_up) 129{ 130 int rem; 131 unsigned long original = j; 132 133 /* 134 * We don't want all cpus firing their timers at once hitting the 135 * same lock or cachelines, so we skew each extra cpu with an extra 136 * 3 jiffies. This 3 jiffies came originally from the mm/ code which 137 * already did this. 138 * The skew is done by adding 3*cpunr, then round, then subtract this 139 * extra offset again. 140 */ 141 j += cpu * 3; 142 143 rem = j % HZ; 144 145 /* 146 * If the target jiffie is just after a whole second (which can happen 147 * due to delays of the timer irq, long irq off times etc etc) then 148 * we should round down to the whole second, not up. Use 1/4th second 149 * as cutoff for this rounding as an extreme upper bound for this. 150 * But never round down if @force_up is set. 151 */ 152 if (rem < HZ/4 && !force_up) /* round down */ 153 j = j - rem; 154 else /* round up */ 155 j = j - rem + HZ; 156 157 /* now that we have rounded, subtract the extra skew again */ 158 j -= cpu * 3; 159 160 if (j <= jiffies) /* rounding ate our timeout entirely; */ 161 return original; 162 return j; 163} 164 165/** 166 * __round_jiffies - function to round jiffies to a full second 167 * @j: the time in (absolute) jiffies that should be rounded 168 * @cpu: the processor number on which the timeout will happen 169 * 170 * __round_jiffies() rounds an absolute time in the future (in jiffies) 171 * up or down to (approximately) full seconds. This is useful for timers 172 * for which the exact time they fire does not matter too much, as long as 173 * they fire approximately every X seconds. 174 * 175 * By rounding these timers to whole seconds, all such timers will fire 176 * at the same time, rather than at various times spread out. The goal 177 * of this is to have the CPU wake up less, which saves power. 178 * 179 * The exact rounding is skewed for each processor to avoid all 180 * processors firing at the exact same time, which could lead 181 * to lock contention or spurious cache line bouncing. 182 * 183 * The return value is the rounded version of the @j parameter. 184 */ 185unsigned long __round_jiffies(unsigned long j, int cpu) 186{ 187 return round_jiffies_common(j, cpu, false); 188} 189EXPORT_SYMBOL_GPL(__round_jiffies); 190 191/** 192 * __round_jiffies_relative - function to round jiffies to a full second 193 * @j: the time in (relative) jiffies that should be rounded 194 * @cpu: the processor number on which the timeout will happen 195 * 196 * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 197 * up or down to (approximately) full seconds. This is useful for timers 198 * for which the exact time they fire does not matter too much, as long as 199 * they fire approximately every X seconds. 200 * 201 * By rounding these timers to whole seconds, all such timers will fire 202 * at the same time, rather than at various times spread out. The goal 203 * of this is to have the CPU wake up less, which saves power. 204 * 205 * The exact rounding is skewed for each processor to avoid all 206 * processors firing at the exact same time, which could lead 207 * to lock contention or spurious cache line bouncing. 208 * 209 * The return value is the rounded version of the @j parameter. 210 */ 211unsigned long __round_jiffies_relative(unsigned long j, int cpu) 212{ 213 unsigned long j0 = jiffies; 214 215 /* Use j0 because jiffies might change while we run */ 216 return round_jiffies_common(j + j0, cpu, false) - j0; 217} 218EXPORT_SYMBOL_GPL(__round_jiffies_relative); 219 220/** 221 * round_jiffies - function to round jiffies to a full second 222 * @j: the time in (absolute) jiffies that should be rounded 223 * 224 * round_jiffies() rounds an absolute time in the future (in jiffies) 225 * up or down to (approximately) full seconds. This is useful for timers 226 * for which the exact time they fire does not matter too much, as long as 227 * they fire approximately every X seconds. 228 * 229 * By rounding these timers to whole seconds, all such timers will fire 230 * at the same time, rather than at various times spread out. The goal 231 * of this is to have the CPU wake up less, which saves power. 232 * 233 * The return value is the rounded version of the @j parameter. 234 */ 235unsigned long round_jiffies(unsigned long j) 236{ 237 return round_jiffies_common(j, raw_smp_processor_id(), false); 238} 239EXPORT_SYMBOL_GPL(round_jiffies); 240 241/** 242 * round_jiffies_relative - function to round jiffies to a full second 243 * @j: the time in (relative) jiffies that should be rounded 244 * 245 * round_jiffies_relative() rounds a time delta in the future (in jiffies) 246 * up or down to (approximately) full seconds. This is useful for timers 247 * for which the exact time they fire does not matter too much, as long as 248 * they fire approximately every X seconds. 249 * 250 * By rounding these timers to whole seconds, all such timers will fire 251 * at the same time, rather than at various times spread out. The goal 252 * of this is to have the CPU wake up less, which saves power. 253 * 254 * The return value is the rounded version of the @j parameter. 255 */ 256unsigned long round_jiffies_relative(unsigned long j) 257{ 258 return __round_jiffies_relative(j, raw_smp_processor_id()); 259} 260EXPORT_SYMBOL_GPL(round_jiffies_relative); 261 262/** 263 * __round_jiffies_up - function to round jiffies up to a full second 264 * @j: the time in (absolute) jiffies that should be rounded 265 * @cpu: the processor number on which the timeout will happen 266 * 267 * This is the same as __round_jiffies() except that it will never 268 * round down. This is useful for timeouts for which the exact time 269 * of firing does not matter too much, as long as they don't fire too 270 * early. 271 */ 272unsigned long __round_jiffies_up(unsigned long j, int cpu) 273{ 274 return round_jiffies_common(j, cpu, true); 275} 276EXPORT_SYMBOL_GPL(__round_jiffies_up); 277 278/** 279 * __round_jiffies_up_relative - function to round jiffies up to a full second 280 * @j: the time in (relative) jiffies that should be rounded 281 * @cpu: the processor number on which the timeout will happen 282 * 283 * This is the same as __round_jiffies_relative() except that it will never 284 * round down. This is useful for timeouts for which the exact time 285 * of firing does not matter too much, as long as they don't fire too 286 * early. 287 */ 288unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) 289{ 290 unsigned long j0 = jiffies; 291 292 /* Use j0 because jiffies might change while we run */ 293 return round_jiffies_common(j + j0, cpu, true) - j0; 294} 295EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); 296 297/** 298 * round_jiffies_up - function to round jiffies up to a full second 299 * @j: the time in (absolute) jiffies that should be rounded 300 * 301 * This is the same as round_jiffies() except that it will never 302 * round down. This is useful for timeouts for which the exact time 303 * of firing does not matter too much, as long as they don't fire too 304 * early. 305 */ 306unsigned long round_jiffies_up(unsigned long j) 307{ 308 return round_jiffies_common(j, raw_smp_processor_id(), true); 309} 310EXPORT_SYMBOL_GPL(round_jiffies_up); 311 312/** 313 * round_jiffies_up_relative - function to round jiffies up to a full second 314 * @j: the time in (relative) jiffies that should be rounded 315 * 316 * This is the same as round_jiffies_relative() except that it will never 317 * round down. This is useful for timeouts for which the exact time 318 * of firing does not matter too much, as long as they don't fire too 319 * early. 320 */ 321unsigned long round_jiffies_up_relative(unsigned long j) 322{ 323 return __round_jiffies_up_relative(j, raw_smp_processor_id()); 324} 325EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 326 327/** 328 * set_timer_slack - set the allowed slack for a timer 329 * @timer: the timer to be modified 330 * @slack_hz: the amount of time (in jiffies) allowed for rounding 331 * 332 * Set the amount of time, in jiffies, that a certain timer has 333 * in terms of slack. By setting this value, the timer subsystem 334 * will schedule the actual timer somewhere between 335 * the time mod_timer() asks for, and that time plus the slack. 336 * 337 * By setting the slack to -1, a percentage of the delay is used 338 * instead. 339 */ 340void set_timer_slack(struct timer_list *timer, int slack_hz) 341{ 342 timer->slack = slack_hz; 343} 344EXPORT_SYMBOL_GPL(set_timer_slack); 345 346 347static inline void set_running_timer(struct tvec_base *base, 348 struct timer_list *timer) 349{ 350#ifdef CONFIG_SMP 351 base->running_timer = timer; 352#endif 353} 354 355static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 356{ 357 unsigned long expires = timer->expires; 358 unsigned long idx = expires - base->timer_jiffies; 359 struct list_head *vec; 360 361 if (idx < TVR_SIZE) { 362 int i = expires & TVR_MASK; 363 vec = base->tv1.vec + i; 364 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { 365 int i = (expires >> TVR_BITS) & TVN_MASK; 366 vec = base->tv2.vec + i; 367 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { 368 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; 369 vec = base->tv3.vec + i; 370 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { 371 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; 372 vec = base->tv4.vec + i; 373 } else if ((signed long) idx < 0) { 374 /* 375 * Can happen if you add a timer with expires == jiffies, 376 * or you set a timer to go off in the past 377 */ 378 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 379 } else { 380 int i; 381 /* If the timeout is larger than 0xffffffff on 64-bit 382 * architectures then we use the maximum timeout: 383 */ 384 if (idx > 0xffffffffUL) { 385 idx = 0xffffffffUL; 386 expires = idx + base->timer_jiffies; 387 } 388 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 389 vec = base->tv5.vec + i; 390 } 391 /* 392 * Timers are FIFO: 393 */ 394 list_add_tail(&timer->entry, vec); 395} 396 397#ifdef CONFIG_TIMER_STATS 398void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 399{ 400 if (timer->start_site) 401 return; 402 403 timer->start_site = addr; 404 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 405 timer->start_pid = current->pid; 406} 407 408static void timer_stats_account_timer(struct timer_list *timer) 409{ 410 unsigned int flag = 0; 411 412 if (likely(!timer->start_site)) 413 return; 414 if (unlikely(tbase_get_deferrable(timer->base))) 415 flag |= TIMER_STATS_FLAG_DEFERRABLE; 416 417 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 418 timer->function, timer->start_comm, flag); 419} 420 421#else 422static void timer_stats_account_timer(struct timer_list *timer) {} 423#endif 424 425#ifdef CONFIG_DEBUG_OBJECTS_TIMERS 426 427static struct debug_obj_descr timer_debug_descr; 428 429/* 430 * fixup_init is called when: 431 * - an active object is initialized 432 */ 433static int timer_fixup_init(void *addr, enum debug_obj_state state) 434{ 435 struct timer_list *timer = addr; 436 437 switch (state) { 438 case ODEBUG_STATE_ACTIVE: 439 del_timer_sync(timer); 440 debug_object_init(timer, &timer_debug_descr); 441 return 1; 442 default: 443 return 0; 444 } 445} 446 447/* 448 * fixup_activate is called when: 449 * - an active object is activated 450 * - an unknown object is activated (might be a statically initialized object) 451 */ 452static int timer_fixup_activate(void *addr, enum debug_obj_state state) 453{ 454 struct timer_list *timer = addr; 455 456 switch (state) { 457 458 case ODEBUG_STATE_NOTAVAILABLE: 459 /* 460 * This is not really a fixup. The timer was 461 * statically initialized. We just make sure that it 462 * is tracked in the object tracker. 463 */ 464 if (timer->entry.next == NULL && 465 timer->entry.prev == TIMER_ENTRY_STATIC) { 466 debug_object_init(timer, &timer_debug_descr); 467 debug_object_activate(timer, &timer_debug_descr); 468 return 0; 469 } else { 470 WARN_ON_ONCE(1); 471 } 472 return 0; 473 474 case ODEBUG_STATE_ACTIVE: 475 WARN_ON(1); 476 477 default: 478 return 0; 479 } 480} 481 482/* 483 * fixup_free is called when: 484 * - an active object is freed 485 */ 486static int timer_fixup_free(void *addr, enum debug_obj_state state) 487{ 488 struct timer_list *timer = addr; 489 490 switch (state) { 491 case ODEBUG_STATE_ACTIVE: 492 del_timer_sync(timer); 493 debug_object_free(timer, &timer_debug_descr); 494 return 1; 495 default: 496 return 0; 497 } 498} 499 500static struct debug_obj_descr timer_debug_descr = { 501 .name = "timer_list", 502 .fixup_init = timer_fixup_init, 503 .fixup_activate = timer_fixup_activate, 504 .fixup_free = timer_fixup_free, 505}; 506 507static inline void debug_timer_init(struct timer_list *timer) 508{ 509 debug_object_init(timer, &timer_debug_descr); 510} 511 512static inline void debug_timer_activate(struct timer_list *timer) 513{ 514 debug_object_activate(timer, &timer_debug_descr); 515} 516 517static inline void debug_timer_deactivate(struct timer_list *timer) 518{ 519 debug_object_deactivate(timer, &timer_debug_descr); 520} 521 522static inline void debug_timer_free(struct timer_list *timer) 523{ 524 debug_object_free(timer, &timer_debug_descr); 525} 526 527static void __init_timer(struct timer_list *timer, 528 const char *name, 529 struct lock_class_key *key); 530 531void init_timer_on_stack_key(struct timer_list *timer, 532 const char *name, 533 struct lock_class_key *key) 534{ 535 debug_object_init_on_stack(timer, &timer_debug_descr); 536 __init_timer(timer, name, key); 537} 538EXPORT_SYMBOL_GPL(init_timer_on_stack_key); 539 540void destroy_timer_on_stack(struct timer_list *timer) 541{ 542 debug_object_free(timer, &timer_debug_descr); 543} 544EXPORT_SYMBOL_GPL(destroy_timer_on_stack); 545 546#else 547static inline void debug_timer_init(struct timer_list *timer) { } 548static inline void debug_timer_activate(struct timer_list *timer) { } 549static inline void debug_timer_deactivate(struct timer_list *timer) { } 550#endif 551 552static inline void debug_init(struct timer_list *timer) 553{ 554 debug_timer_init(timer); 555 trace_timer_init(timer); 556} 557 558static inline void 559debug_activate(struct timer_list *timer, unsigned long expires) 560{ 561 debug_timer_activate(timer); 562 trace_timer_start(timer, expires); 563} 564 565static inline void debug_deactivate(struct timer_list *timer) 566{ 567 debug_timer_deactivate(timer); 568 trace_timer_cancel(timer); 569} 570 571static void __init_timer(struct timer_list *timer, 572 const char *name, 573 struct lock_class_key *key) 574{ 575 timer->entry.next = NULL; 576 timer->base = __raw_get_cpu_var(tvec_bases); 577 timer->slack = -1; 578#ifdef CONFIG_TIMER_STATS 579 timer->start_site = NULL; 580 timer->start_pid = -1; 581 memset(timer->start_comm, 0, TASK_COMM_LEN); 582#endif 583 lockdep_init_map(&timer->lockdep_map, name, key, 0); 584} 585 586void setup_deferrable_timer_on_stack_key(struct timer_list *timer, 587 const char *name, 588 struct lock_class_key *key, 589 void (*function)(unsigned long), 590 unsigned long data) 591{ 592 timer->function = function; 593 timer->data = data; 594 init_timer_on_stack_key(timer, name, key); 595 timer_set_deferrable(timer); 596} 597EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key); 598 599/** 600 * init_timer_key - initialize a timer 601 * @timer: the timer to be initialized 602 * @name: name of the timer 603 * @key: lockdep class key of the fake lock used for tracking timer 604 * sync lock dependencies 605 * 606 * init_timer_key() must be done to a timer prior calling *any* of the 607 * other timer functions. 608 */ 609void init_timer_key(struct timer_list *timer, 610 const char *name, 611 struct lock_class_key *key) 612{ 613 debug_init(timer); 614 __init_timer(timer, name, key); 615} 616EXPORT_SYMBOL(init_timer_key); 617 618void init_timer_deferrable_key(struct timer_list *timer, 619 const char *name, 620 struct lock_class_key *key) 621{ 622 init_timer_key(timer, name, key); 623 timer_set_deferrable(timer); 624} 625EXPORT_SYMBOL(init_timer_deferrable_key); 626 627static inline void detach_timer(struct timer_list *timer, 628 int clear_pending) 629{ 630 struct list_head *entry = &timer->entry; 631 632 debug_deactivate(timer); 633 634 __list_del(entry->prev, entry->next); 635 if (clear_pending) 636 entry->next = NULL; 637 entry->prev = LIST_POISON2; 638} 639 640/* 641 * We are using hashed locking: holding per_cpu(tvec_bases).lock 642 * means that all timers which are tied to this base via timer->base are 643 * locked, and the base itself is locked too. 644 * 645 * So __run_timers/migrate_timers can safely modify all timers which could 646 * be found on ->tvX lists. 647 * 648 * When the timer's base is locked, and the timer removed from list, it is 649 * possible to set timer->base = NULL and drop the lock: the timer remains 650 * locked. 651 */ 652static struct tvec_base *lock_timer_base(struct timer_list *timer, 653 unsigned long *flags) 654 __acquires(timer->base->lock) 655{ 656 struct tvec_base *base; 657 658 for (;;) { 659 struct tvec_base *prelock_base = timer->base; 660 base = tbase_get_base(prelock_base); 661 if (likely(base != NULL)) { 662 spin_lock_irqsave(&base->lock, *flags); 663 if (likely(prelock_base == timer->base)) 664 return base; 665 /* The timer has migrated to another CPU */ 666 spin_unlock_irqrestore(&base->lock, *flags); 667 } 668 cpu_relax(); 669 } 670} 671 672static inline int 673__mod_timer(struct timer_list *timer, unsigned long expires, 674 bool pending_only, int pinned) 675{ 676 struct tvec_base *base, *new_base; 677 unsigned long flags; 678 int ret = 0 , cpu; 679 680 timer_stats_timer_set_start_info(timer); 681 BUG_ON(!timer->function); 682 683 base = lock_timer_base(timer, &flags); 684 685 if (timer_pending(timer)) { 686 detach_timer(timer, 0); 687 if (timer->expires == base->next_timer && 688 !tbase_get_deferrable(timer->base)) 689 base->next_timer = base->timer_jiffies; 690 ret = 1; 691 } else { 692 if (pending_only) 693 goto out_unlock; 694 } 695 696 debug_activate(timer, expires); 697 698 cpu = smp_processor_id(); 699 700#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 701 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) 702 cpu = get_nohz_timer_target(); 703#endif 704 new_base = per_cpu(tvec_bases, cpu); 705 706 if (base != new_base) { 707 /* 708 * We are trying to schedule the timer on the local CPU. 709 * However we can't change timer's base while it is running, 710 * otherwise del_timer_sync() can't detect that the timer's 711 * handler yet has not finished. This also guarantees that 712 * the timer is serialized wrt itself. 713 */ 714 if (likely(base->running_timer != timer)) { 715 /* See the comment in lock_timer_base() */ 716 timer_set_base(timer, NULL); 717 spin_unlock(&base->lock); 718 base = new_base; 719 spin_lock(&base->lock); 720 timer_set_base(timer, base); 721 } 722 } 723 724 timer->expires = expires; 725 if (time_before(timer->expires, base->next_timer) && 726 !tbase_get_deferrable(timer->base)) 727 base->next_timer = timer->expires; 728 internal_add_timer(base, timer); 729 730out_unlock: 731 spin_unlock_irqrestore(&base->lock, flags); 732 733 return ret; 734} 735 736/** 737 * mod_timer_pending - modify a pending timer's timeout 738 * @timer: the pending timer to be modified 739 * @expires: new timeout in jiffies 740 * 741 * mod_timer_pending() is the same for pending timers as mod_timer(), 742 * but will not re-activate and modify already deleted timers. 743 * 744 * It is useful for unserialized use of timers. 745 */ 746int mod_timer_pending(struct timer_list *timer, unsigned long expires) 747{ 748 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); 749} 750EXPORT_SYMBOL(mod_timer_pending); 751 752/* 753 * Decide where to put the timer while taking the slack into account 754 * 755 * Algorithm: 756 * 1) calculate the maximum (absolute) time 757 * 2) calculate the highest bit where the expires and new max are different 758 * 3) use this bit to make a mask 759 * 4) use the bitmask to round down the maximum time, so that all last 760 * bits are zeros 761 */ 762static inline 763unsigned long apply_slack(struct timer_list *timer, unsigned long expires) 764{ 765 unsigned long expires_limit, mask; 766 int bit; 767 768 expires_limit = expires; 769 770 if (timer->slack >= 0) { 771 expires_limit = expires + timer->slack; 772 } else { 773 unsigned long now = jiffies; 774 775 /* No slack, if already expired else auto slack 0.4% */ 776 if (time_after(expires, now)) 777 expires_limit = expires + (expires - now)/256; 778 } 779 mask = expires ^ expires_limit; 780 if (mask == 0) 781 return expires; 782 783 bit = find_last_bit(&mask, BITS_PER_LONG); 784 785 mask = (1 << bit) - 1; 786 787 expires_limit = expires_limit & ~(mask); 788 789 return expires_limit; 790} 791 792/** 793 * mod_timer - modify a timer's timeout 794 * @timer: the timer to be modified 795 * @expires: new timeout in jiffies 796 * 797 * mod_timer() is a more efficient way to update the expire field of an 798 * active timer (if the timer is inactive it will be activated) 799 * 800 * mod_timer(timer, expires) is equivalent to: 801 * 802 * del_timer(timer); timer->expires = expires; add_timer(timer); 803 * 804 * Note that if there are multiple unserialized concurrent users of the 805 * same timer, then mod_timer() is the only safe way to modify the timeout, 806 * since add_timer() cannot modify an already running timer. 807 * 808 * The function returns whether it has modified a pending timer or not. 809 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 810 * active timer returns 1.) 811 */ 812int mod_timer(struct timer_list *timer, unsigned long expires) 813{ 814 /* 815 * This is a common optimization triggered by the 816 * networking code - if the timer is re-modified 817 * to be the same thing then just return: 818 */ 819 if (timer_pending(timer) && timer->expires == expires) 820 return 1; 821 822 expires = apply_slack(timer, expires); 823 824 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 825} 826EXPORT_SYMBOL(mod_timer); 827 828/** 829 * mod_timer_pinned - modify a timer's timeout 830 * @timer: the timer to be modified 831 * @expires: new timeout in jiffies 832 * 833 * mod_timer_pinned() is a way to update the expire field of an 834 * active timer (if the timer is inactive it will be activated) 835 * and not allow the timer to be migrated to a different CPU. 836 * 837 * mod_timer_pinned(timer, expires) is equivalent to: 838 * 839 * del_timer(timer); timer->expires = expires; add_timer(timer); 840 */ 841int mod_timer_pinned(struct timer_list *timer, unsigned long expires) 842{ 843 if (timer->expires == expires && timer_pending(timer)) 844 return 1; 845 846 return __mod_timer(timer, expires, false, TIMER_PINNED); 847} 848EXPORT_SYMBOL(mod_timer_pinned); 849 850/** 851 * add_timer - start a timer 852 * @timer: the timer to be added 853 * 854 * The kernel will do a ->function(->data) callback from the 855 * timer interrupt at the ->expires point in the future. The 856 * current time is 'jiffies'. 857 * 858 * The timer's ->expires, ->function (and if the handler uses it, ->data) 859 * fields must be set prior calling this function. 860 * 861 * Timers with an ->expires field in the past will be executed in the next 862 * timer tick. 863 */ 864void add_timer(struct timer_list *timer) 865{ 866 BUG_ON(timer_pending(timer)); 867 mod_timer(timer, timer->expires); 868} 869EXPORT_SYMBOL(add_timer); 870 871/** 872 * add_timer_on - start a timer on a particular CPU 873 * @timer: the timer to be added 874 * @cpu: the CPU to start it on 875 * 876 * This is not very scalable on SMP. Double adds are not possible. 877 */ 878void add_timer_on(struct timer_list *timer, int cpu) 879{ 880 struct tvec_base *base = per_cpu(tvec_bases, cpu); 881 unsigned long flags; 882 883 timer_stats_timer_set_start_info(timer); 884 BUG_ON(timer_pending(timer) || !timer->function); 885 spin_lock_irqsave(&base->lock, flags); 886 timer_set_base(timer, base); 887 debug_activate(timer, timer->expires); 888 if (time_before(timer->expires, base->next_timer) && 889 !tbase_get_deferrable(timer->base)) 890 base->next_timer = timer->expires; 891 internal_add_timer(base, timer); 892 /* 893 * Check whether the other CPU is idle and needs to be 894 * triggered to reevaluate the timer wheel when nohz is 895 * active. We are protected against the other CPU fiddling 896 * with the timer by holding the timer base lock. This also 897 * makes sure that a CPU on the way to idle can not evaluate 898 * the timer wheel. 899 */ 900 wake_up_idle_cpu(cpu); 901 spin_unlock_irqrestore(&base->lock, flags); 902} 903EXPORT_SYMBOL_GPL(add_timer_on); 904 905/** 906 * del_timer - deactive a timer. 907 * @timer: the timer to be deactivated 908 * 909 * del_timer() deactivates a timer - this works on both active and inactive 910 * timers. 911 * 912 * The function returns whether it has deactivated a pending timer or not. 913 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 914 * active timer returns 1.) 915 */ 916int del_timer(struct timer_list *timer) 917{ 918 struct tvec_base *base; 919 unsigned long flags; 920 int ret = 0; 921 922 timer_stats_timer_clear_start_info(timer); 923 if (timer_pending(timer)) { 924 base = lock_timer_base(timer, &flags); 925 if (timer_pending(timer)) { 926 detach_timer(timer, 1); 927 if (timer->expires == base->next_timer && 928 !tbase_get_deferrable(timer->base)) 929 base->next_timer = base->timer_jiffies; 930 ret = 1; 931 } 932 spin_unlock_irqrestore(&base->lock, flags); 933 } 934 935 return ret; 936} 937EXPORT_SYMBOL(del_timer); 938 939#ifdef CONFIG_SMP 940/** 941 * try_to_del_timer_sync - Try to deactivate a timer 942 * @timer: timer do del 943 * 944 * This function tries to deactivate a timer. Upon successful (ret >= 0) 945 * exit the timer is not queued and the handler is not running on any CPU. 946 * 947 * It must not be called from interrupt contexts. 948 */ 949int try_to_del_timer_sync(struct timer_list *timer) 950{ 951 struct tvec_base *base; 952 unsigned long flags; 953 int ret = -1; 954 955 base = lock_timer_base(timer, &flags); 956 957 if (base->running_timer == timer) 958 goto out; 959 960 timer_stats_timer_clear_start_info(timer); 961 ret = 0; 962 if (timer_pending(timer)) { 963 detach_timer(timer, 1); 964 if (timer->expires == base->next_timer && 965 !tbase_get_deferrable(timer->base)) 966 base->next_timer = base->timer_jiffies; 967 ret = 1; 968 } 969out: 970 spin_unlock_irqrestore(&base->lock, flags); 971 972 return ret; 973} 974EXPORT_SYMBOL(try_to_del_timer_sync); 975 976/** 977 * del_timer_sync - deactivate a timer and wait for the handler to finish. 978 * @timer: the timer to be deactivated 979 * 980 * This function only differs from del_timer() on SMP: besides deactivating 981 * the timer it also makes sure the handler has finished executing on other 982 * CPUs. 983 * 984 * Synchronization rules: Callers must prevent restarting of the timer, 985 * otherwise this function is meaningless. It must not be called from 986 * interrupt contexts. The caller must not hold locks which would prevent 987 * completion of the timer's handler. The timer's handler must not call 988 * add_timer_on(). Upon exit the timer is not queued and the handler is 989 * not running on any CPU. 990 * 991 * The function returns whether it has deactivated a pending timer or not. 992 */ 993int del_timer_sync(struct timer_list *timer) 994{ 995#ifdef CONFIG_LOCKDEP 996 unsigned long flags; 997 998 local_irq_save(flags); 999 lock_map_acquire(&timer->lockdep_map); 1000 lock_map_release(&timer->lockdep_map); 1001 local_irq_restore(flags); 1002#endif 1003 1004 for (;;) { 1005 int ret = try_to_del_timer_sync(timer); 1006 if (ret >= 0) 1007 return ret; 1008 cpu_relax(); 1009 } 1010} 1011EXPORT_SYMBOL(del_timer_sync); 1012#endif 1013 1014static int cascade(struct tvec_base *base, struct tvec *tv, int index) 1015{ 1016 /* cascade all the timers from tv up one level */ 1017 struct timer_list *timer, *tmp; 1018 struct list_head tv_list; 1019 1020 list_replace_init(tv->vec + index, &tv_list); 1021 1022 /* 1023 * We are removing _all_ timers from the list, so we 1024 * don't have to detach them individually. 1025 */ 1026 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1027 BUG_ON(tbase_get_base(timer->base) != base); 1028 internal_add_timer(base, timer); 1029 } 1030 1031 return index; 1032} 1033 1034static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), 1035 unsigned long data) 1036{ 1037 int preempt_count = preempt_count(); 1038 1039#ifdef CONFIG_LOCKDEP 1040 /* 1041 * It is permissible to free the timer from inside the 1042 * function that is called from it, this we need to take into 1043 * account for lockdep too. To avoid bogus "held lock freed" 1044 * warnings as well as problems when looking into 1045 * timer->lockdep_map, make a copy and use that here. 1046 */ 1047 struct lockdep_map lockdep_map = timer->lockdep_map; 1048#endif 1049 /* 1050 * Couple the lock chain with the lock chain at 1051 * del_timer_sync() by acquiring the lock_map around the fn() 1052 * call here and in del_timer_sync(). 1053 */ 1054 lock_map_acquire(&lockdep_map); 1055 1056 trace_timer_expire_entry(timer); 1057 fn(data); 1058 trace_timer_expire_exit(timer); 1059 1060 lock_map_release(&lockdep_map); 1061 1062 if (preempt_count != preempt_count()) { 1063 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", 1064 fn, preempt_count, preempt_count()); 1065 /* 1066 * Restore the preempt count. That gives us a decent 1067 * chance to survive and extract information. If the 1068 * callback kept a lock held, bad luck, but not worse 1069 * than the BUG() we had. 1070 */ 1071 preempt_count() = preempt_count; 1072 } 1073} 1074 1075#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 1076 1077/** 1078 * __run_timers - run all expired timers (if any) on this CPU. 1079 * @base: the timer vector to be processed. 1080 * 1081 * This function cascades all vectors and executes all expired timer 1082 * vectors. 1083 */ 1084static inline void __run_timers(struct tvec_base *base) 1085{ 1086 struct timer_list *timer; 1087 1088 spin_lock_irq(&base->lock); 1089 while (time_after_eq(jiffies, base->timer_jiffies)) { 1090 struct list_head work_list; 1091 struct list_head *head = &work_list; 1092 int index = base->timer_jiffies & TVR_MASK; 1093 1094 /* 1095 * Cascade timers: 1096 */ 1097 if (!index && 1098 (!cascade(base, &base->tv2, INDEX(0))) && 1099 (!cascade(base, &base->tv3, INDEX(1))) && 1100 !cascade(base, &base->tv4, INDEX(2))) 1101 cascade(base, &base->tv5, INDEX(3)); 1102 ++base->timer_jiffies; 1103 list_replace_init(base->tv1.vec + index, &work_list); 1104 while (!list_empty(head)) { 1105 void (*fn)(unsigned long); 1106 unsigned long data; 1107 1108 timer = list_first_entry(head, struct timer_list,entry); 1109 fn = timer->function; 1110 data = timer->data; 1111 1112 timer_stats_account_timer(timer); 1113 1114 set_running_timer(base, timer); 1115 detach_timer(timer, 1); 1116 1117 spin_unlock_irq(&base->lock); 1118 call_timer_fn(timer, fn, data); 1119 spin_lock_irq(&base->lock); 1120 } 1121 } 1122 set_running_timer(base, NULL); 1123 spin_unlock_irq(&base->lock); 1124} 1125 1126#ifdef CONFIG_NO_HZ 1127/* 1128 * Find out when the next timer event is due to happen. This 1129 * is used on S/390 to stop all activity when a CPU is idle. 1130 * This function needs to be called with interrupts disabled. 1131 */ 1132static unsigned long __next_timer_interrupt(struct tvec_base *base) 1133{ 1134 unsigned long timer_jiffies = base->timer_jiffies; 1135 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; 1136 int index, slot, array, found = 0; 1137 struct timer_list *nte; 1138 struct tvec *varray[4]; 1139 1140 /* Look for timer events in tv1. */ 1141 index = slot = timer_jiffies & TVR_MASK; 1142 do { 1143 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 1144 if (tbase_get_deferrable(nte->base)) 1145 continue; 1146 1147 found = 1; 1148 expires = nte->expires; 1149 /* Look at the cascade bucket(s)? */ 1150 if (!index || slot < index) 1151 goto cascade; 1152 return expires; 1153 } 1154 slot = (slot + 1) & TVR_MASK; 1155 } while (slot != index); 1156 1157cascade: 1158 /* Calculate the next cascade event */ 1159 if (index) 1160 timer_jiffies += TVR_SIZE - index; 1161 timer_jiffies >>= TVR_BITS; 1162 1163 /* Check tv2-tv5. */ 1164 varray[0] = &base->tv2; 1165 varray[1] = &base->tv3; 1166 varray[2] = &base->tv4; 1167 varray[3] = &base->tv5; 1168 1169 for (array = 0; array < 4; array++) { 1170 struct tvec *varp = varray[array]; 1171 1172 index = slot = timer_jiffies & TVN_MASK; 1173 do { 1174 list_for_each_entry(nte, varp->vec + slot, entry) { 1175 if (tbase_get_deferrable(nte->base)) 1176 continue; 1177 1178 found = 1; 1179 if (time_before(nte->expires, expires)) 1180 expires = nte->expires; 1181 } 1182 /* 1183 * Do we still search for the first timer or are 1184 * we looking up the cascade buckets ? 1185 */ 1186 if (found) { 1187 /* Look at the cascade bucket(s)? */ 1188 if (!index || slot < index) 1189 break; 1190 return expires; 1191 } 1192 slot = (slot + 1) & TVN_MASK; 1193 } while (slot != index); 1194 1195 if (index) 1196 timer_jiffies += TVN_SIZE - index; 1197 timer_jiffies >>= TVN_BITS; 1198 } 1199 return expires; 1200} 1201 1202/* 1203 * Check, if the next hrtimer event is before the next timer wheel 1204 * event: 1205 */ 1206static unsigned long cmp_next_hrtimer_event(unsigned long now, 1207 unsigned long expires) 1208{ 1209 ktime_t hr_delta = hrtimer_get_next_event(); 1210 struct timespec tsdelta; 1211 unsigned long delta; 1212 1213 if (hr_delta.tv64 == KTIME_MAX) 1214 return expires; 1215 1216 /* 1217 * Expired timer available, let it expire in the next tick 1218 */ 1219 if (hr_delta.tv64 <= 0) 1220 return now + 1; 1221 1222 tsdelta = ktime_to_timespec(hr_delta); 1223 delta = timespec_to_jiffies(&tsdelta); 1224 1225 /* 1226 * Limit the delta to the max value, which is checked in 1227 * tick_nohz_stop_sched_tick(): 1228 */ 1229 if (delta > NEXT_TIMER_MAX_DELTA) 1230 delta = NEXT_TIMER_MAX_DELTA; 1231 1232 /* 1233 * Take rounding errors in to account and make sure, that it 1234 * expires in the next tick. Otherwise we go into an endless 1235 * ping pong due to tick_nohz_stop_sched_tick() retriggering 1236 * the timer softirq 1237 */ 1238 if (delta < 1) 1239 delta = 1; 1240 now += delta; 1241 if (time_before(now, expires)) 1242 return now; 1243 return expires; 1244} 1245 1246/** 1247 * get_next_timer_interrupt - return the jiffy of the next pending timer 1248 * @now: current time (in jiffies) 1249 */ 1250unsigned long get_next_timer_interrupt(unsigned long now) 1251{ 1252 struct tvec_base *base = __get_cpu_var(tvec_bases); 1253 unsigned long expires; 1254 1255 /* 1256 * Pretend that there is no timer pending if the cpu is offline. 1257 * Possible pending timers will be migrated later to an active cpu. 1258 */ 1259 if (cpu_is_offline(smp_processor_id())) 1260 return now + NEXT_TIMER_MAX_DELTA; 1261 spin_lock(&base->lock); 1262 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1263 base->next_timer = __next_timer_interrupt(base); 1264 expires = base->next_timer; 1265 spin_unlock(&base->lock); 1266 1267 if (time_before_eq(expires, now)) 1268 return now; 1269 1270 return cmp_next_hrtimer_event(now, expires); 1271} 1272#endif 1273 1274/* 1275 * Called from the timer interrupt handler to charge one tick to the current 1276 * process. user_tick is 1 if the tick is user time, 0 for system. 1277 */ 1278void update_process_times(int user_tick) 1279{ 1280 struct task_struct *p = current; 1281 int cpu = smp_processor_id(); 1282 1283 /* Note: this timer irq context must be accounted for as well. */ 1284 account_process_tick(p, user_tick); 1285 run_local_timers(); 1286 rcu_check_callbacks(cpu, user_tick); 1287 printk_tick(); 1288 perf_event_do_pending(); 1289 scheduler_tick(); 1290 run_posix_cpu_timers(p); 1291} 1292 1293/* 1294 * This function runs timers and the timer-tq in bottom half context. 1295 */ 1296static void run_timer_softirq(struct softirq_action *h) 1297{ 1298 struct tvec_base *base = __get_cpu_var(tvec_bases); 1299 1300 hrtimer_run_pending(); 1301 1302 if (time_after_eq(jiffies, base->timer_jiffies)) 1303 __run_timers(base); 1304} 1305 1306/* 1307 * Called by the local, per-CPU timer interrupt on SMP. 1308 */ 1309void run_local_timers(void) 1310{ 1311 hrtimer_run_queues(); 1312 raise_softirq(TIMER_SOFTIRQ); 1313} 1314 1315/* 1316 * The 64-bit jiffies value is not atomic - you MUST NOT read it 1317 * without sampling the sequence number in xtime_lock. 1318 * jiffies is defined in the linker script... 1319 */ 1320 1321void do_timer(unsigned long ticks) 1322{ 1323 jiffies_64 += ticks; 1324 update_wall_time(); 1325 calc_global_load(ticks); 1326} 1327 1328#ifdef __ARCH_WANT_SYS_ALARM 1329 1330/* 1331 * For backwards compatibility? This can be done in libc so Alpha 1332 * and all newer ports shouldn't need it. 1333 */ 1334SYSCALL_DEFINE1(alarm, unsigned int, seconds) 1335{ 1336 return alarm_setitimer(seconds); 1337} 1338 1339#endif 1340 1341#ifndef __alpha__ 1342 1343/* 1344 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 1345 * should be moved into arch/i386 instead? 1346 */ 1347 1348/** 1349 * sys_getpid - return the thread group id of the current process 1350 * 1351 * Note, despite the name, this returns the tgid not the pid. The tgid and 1352 * the pid are identical unless CLONE_THREAD was specified on clone() in 1353 * which case the tgid is the same in all threads of the same group. 1354 * 1355 * This is SMP safe as current->tgid does not change. 1356 */ 1357SYSCALL_DEFINE0(getpid) 1358{ 1359 return task_tgid_vnr(current); 1360} 1361 1362/* 1363 * Accessing ->real_parent is not SMP-safe, it could 1364 * change from under us. However, we can use a stale 1365 * value of ->real_parent under rcu_read_lock(), see 1366 * release_task()->call_rcu(delayed_put_task_struct). 1367 */ 1368SYSCALL_DEFINE0(getppid) 1369{ 1370 int pid; 1371 1372 rcu_read_lock(); 1373 pid = task_tgid_vnr(current->real_parent); 1374 rcu_read_unlock(); 1375 1376 return pid; 1377} 1378 1379SYSCALL_DEFINE0(getuid) 1380{ 1381 /* Only we change this so SMP safe */ 1382 return current_uid(); 1383} 1384 1385SYSCALL_DEFINE0(geteuid) 1386{ 1387 /* Only we change this so SMP safe */ 1388 return current_euid(); 1389} 1390 1391SYSCALL_DEFINE0(getgid) 1392{ 1393 /* Only we change this so SMP safe */ 1394 return current_gid(); 1395} 1396 1397SYSCALL_DEFINE0(getegid) 1398{ 1399 /* Only we change this so SMP safe */ 1400 return current_egid(); 1401} 1402 1403#endif 1404 1405static void process_timeout(unsigned long __data) 1406{ 1407 wake_up_process((struct task_struct *)__data); 1408} 1409 1410/** 1411 * schedule_timeout - sleep until timeout 1412 * @timeout: timeout value in jiffies 1413 * 1414 * Make the current task sleep until @timeout jiffies have 1415 * elapsed. The routine will return immediately unless 1416 * the current task state has been set (see set_current_state()). 1417 * 1418 * You can set the task state as follows - 1419 * 1420 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 1421 * pass before the routine returns. The routine will return 0 1422 * 1423 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1424 * delivered to the current task. In this case the remaining time 1425 * in jiffies will be returned, or 0 if the timer expired in time 1426 * 1427 * The current task state is guaranteed to be TASK_RUNNING when this 1428 * routine returns. 1429 * 1430 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 1431 * the CPU away without a bound on the timeout. In this case the return 1432 * value will be %MAX_SCHEDULE_TIMEOUT. 1433 * 1434 * In all cases the return value is guaranteed to be non-negative. 1435 */ 1436signed long __sched schedule_timeout(signed long timeout) 1437{ 1438 struct timer_list timer; 1439 unsigned long expire; 1440 1441 switch (timeout) 1442 { 1443 case MAX_SCHEDULE_TIMEOUT: 1444 /* 1445 * These two special cases are useful to be comfortable 1446 * in the caller. Nothing more. We could take 1447 * MAX_SCHEDULE_TIMEOUT from one of the negative value 1448 * but I' d like to return a valid offset (>=0) to allow 1449 * the caller to do everything it want with the retval. 1450 */ 1451 schedule(); 1452 goto out; 1453 default: 1454 /* 1455 * Another bit of PARANOID. Note that the retval will be 1456 * 0 since no piece of kernel is supposed to do a check 1457 * for a negative retval of schedule_timeout() (since it 1458 * should never happens anyway). You just have the printk() 1459 * that will tell you if something is gone wrong and where. 1460 */ 1461 if (timeout < 0) { 1462 printk(KERN_ERR "schedule_timeout: wrong timeout " 1463 "value %lx\n", timeout); 1464 dump_stack(); 1465 current->state = TASK_RUNNING; 1466 goto out; 1467 } 1468 } 1469 1470 expire = timeout + jiffies; 1471 1472 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1473 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); 1474 schedule(); 1475 del_singleshot_timer_sync(&timer); 1476 1477 /* Remove the timer from the object tracker */ 1478 destroy_timer_on_stack(&timer); 1479 1480 timeout = expire - jiffies; 1481 1482 out: 1483 return timeout < 0 ? 0 : timeout; 1484} 1485EXPORT_SYMBOL(schedule_timeout); 1486 1487/* 1488 * We can use __set_current_state() here because schedule_timeout() calls 1489 * schedule() unconditionally. 1490 */ 1491signed long __sched schedule_timeout_interruptible(signed long timeout) 1492{ 1493 __set_current_state(TASK_INTERRUPTIBLE); 1494 return schedule_timeout(timeout); 1495} 1496EXPORT_SYMBOL(schedule_timeout_interruptible); 1497 1498signed long __sched schedule_timeout_killable(signed long timeout) 1499{ 1500 __set_current_state(TASK_KILLABLE); 1501 return schedule_timeout(timeout); 1502} 1503EXPORT_SYMBOL(schedule_timeout_killable); 1504 1505signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1506{ 1507 __set_current_state(TASK_UNINTERRUPTIBLE); 1508 return schedule_timeout(timeout); 1509} 1510EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1511 1512/* Thread ID - the internal kernel "pid" */ 1513SYSCALL_DEFINE0(gettid) 1514{ 1515 return task_pid_vnr(current); 1516} 1517 1518/** 1519 * do_sysinfo - fill in sysinfo struct 1520 * @info: pointer to buffer to fill 1521 */ 1522int do_sysinfo(struct sysinfo *info) 1523{ 1524 unsigned long mem_total, sav_total; 1525 unsigned int mem_unit, bitcount; 1526 struct timespec tp; 1527 1528 memset(info, 0, sizeof(struct sysinfo)); 1529 1530 ktime_get_ts(&tp); 1531 monotonic_to_bootbased(&tp); 1532 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1533 1534 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 1535 1536 info->procs = nr_threads; 1537 1538 si_meminfo(info); 1539 si_swapinfo(info); 1540 1541 /* 1542 * If the sum of all the available memory (i.e. ram + swap) 1543 * is less than can be stored in a 32 bit unsigned long then 1544 * we can be binary compatible with 2.2.x kernels. If not, 1545 * well, in that case 2.2.x was broken anyways... 1546 * 1547 * -Erik Andersen <andersee@debian.org> 1548 */ 1549 1550 mem_total = info->totalram + info->totalswap; 1551 if (mem_total < info->totalram || mem_total < info->totalswap) 1552 goto out; 1553 bitcount = 0; 1554 mem_unit = info->mem_unit; 1555 while (mem_unit > 1) { 1556 bitcount++; 1557 mem_unit >>= 1; 1558 sav_total = mem_total; 1559 mem_total <<= 1; 1560 if (mem_total < sav_total) 1561 goto out; 1562 } 1563 1564 /* 1565 * If mem_total did not overflow, multiply all memory values by 1566 * info->mem_unit and set it to 1. This leaves things compatible 1567 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1568 * kernels... 1569 */ 1570 1571 info->mem_unit = 1; 1572 info->totalram <<= bitcount; 1573 info->freeram <<= bitcount; 1574 info->sharedram <<= bitcount; 1575 info->bufferram <<= bitcount; 1576 info->totalswap <<= bitcount; 1577 info->freeswap <<= bitcount; 1578 info->totalhigh <<= bitcount; 1579 info->freehigh <<= bitcount; 1580 1581out: 1582 return 0; 1583} 1584 1585SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 1586{ 1587 struct sysinfo val; 1588 1589 do_sysinfo(&val); 1590 1591 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1592 return -EFAULT; 1593 1594 return 0; 1595} 1596 1597static int __cpuinit init_timers_cpu(int cpu) 1598{ 1599 int j; 1600 struct tvec_base *base; 1601 static char __cpuinitdata tvec_base_done[NR_CPUS]; 1602 1603 if (!tvec_base_done[cpu]) { 1604 static char boot_done; 1605 1606 if (boot_done) { 1607 /* 1608 * The APs use this path later in boot 1609 */ 1610 base = kmalloc_node(sizeof(*base), 1611 GFP_KERNEL | __GFP_ZERO, 1612 cpu_to_node(cpu)); 1613 if (!base) 1614 return -ENOMEM; 1615 1616 /* Make sure that tvec_base is 2 byte aligned */ 1617 if (tbase_get_deferrable(base)) { 1618 WARN_ON(1); 1619 kfree(base); 1620 return -ENOMEM; 1621 } 1622 per_cpu(tvec_bases, cpu) = base; 1623 } else { 1624 /* 1625 * This is for the boot CPU - we use compile-time 1626 * static initialisation because per-cpu memory isn't 1627 * ready yet and because the memory allocators are not 1628 * initialised either. 1629 */ 1630 boot_done = 1; 1631 base = &boot_tvec_bases; 1632 } 1633 tvec_base_done[cpu] = 1; 1634 } else { 1635 base = per_cpu(tvec_bases, cpu); 1636 } 1637 1638 spin_lock_init(&base->lock); 1639 1640 for (j = 0; j < TVN_SIZE; j++) { 1641 INIT_LIST_HEAD(base->tv5.vec + j); 1642 INIT_LIST_HEAD(base->tv4.vec + j); 1643 INIT_LIST_HEAD(base->tv3.vec + j); 1644 INIT_LIST_HEAD(base->tv2.vec + j); 1645 } 1646 for (j = 0; j < TVR_SIZE; j++) 1647 INIT_LIST_HEAD(base->tv1.vec + j); 1648 1649 base->timer_jiffies = jiffies; 1650 base->next_timer = base->timer_jiffies; 1651 return 0; 1652} 1653 1654#ifdef CONFIG_HOTPLUG_CPU 1655static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1656{ 1657 struct timer_list *timer; 1658 1659 while (!list_empty(head)) { 1660 timer = list_first_entry(head, struct timer_list, entry); 1661 detach_timer(timer, 0); 1662 timer_set_base(timer, new_base); 1663 if (time_before(timer->expires, new_base->next_timer) && 1664 !tbase_get_deferrable(timer->base)) 1665 new_base->next_timer = timer->expires; 1666 internal_add_timer(new_base, timer); 1667 } 1668} 1669 1670static void __cpuinit migrate_timers(int cpu) 1671{ 1672 struct tvec_base *old_base; 1673 struct tvec_base *new_base; 1674 int i; 1675 1676 BUG_ON(cpu_online(cpu)); 1677 old_base = per_cpu(tvec_bases, cpu); 1678 new_base = get_cpu_var(tvec_bases); 1679 /* 1680 * The caller is globally serialized and nobody else 1681 * takes two locks at once, deadlock is not possible. 1682 */ 1683 spin_lock_irq(&new_base->lock); 1684 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1685 1686 BUG_ON(old_base->running_timer); 1687 1688 for (i = 0; i < TVR_SIZE; i++) 1689 migrate_timer_list(new_base, old_base->tv1.vec + i); 1690 for (i = 0; i < TVN_SIZE; i++) { 1691 migrate_timer_list(new_base, old_base->tv2.vec + i); 1692 migrate_timer_list(new_base, old_base->tv3.vec + i); 1693 migrate_timer_list(new_base, old_base->tv4.vec + i); 1694 migrate_timer_list(new_base, old_base->tv5.vec + i); 1695 } 1696 1697 spin_unlock(&old_base->lock); 1698 spin_unlock_irq(&new_base->lock); 1699 put_cpu_var(tvec_bases); 1700} 1701#endif /* CONFIG_HOTPLUG_CPU */ 1702 1703static int __cpuinit timer_cpu_notify(struct notifier_block *self, 1704 unsigned long action, void *hcpu) 1705{ 1706 long cpu = (long)hcpu; 1707 int err; 1708 1709 switch(action) { 1710 case CPU_UP_PREPARE: 1711 case CPU_UP_PREPARE_FROZEN: 1712 err = init_timers_cpu(cpu); 1713 if (err < 0) 1714 return notifier_from_errno(err); 1715 break; 1716#ifdef CONFIG_HOTPLUG_CPU 1717 case CPU_DEAD: 1718 case CPU_DEAD_FROZEN: 1719 migrate_timers(cpu); 1720 break; 1721#endif 1722 default: 1723 break; 1724 } 1725 return NOTIFY_OK; 1726} 1727 1728static struct notifier_block __cpuinitdata timers_nb = { 1729 .notifier_call = timer_cpu_notify, 1730}; 1731 1732 1733void __init init_timers(void) 1734{ 1735 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1736 (void *)(long)smp_processor_id()); 1737 1738 init_timer_stats(); 1739 1740 BUG_ON(err != NOTIFY_OK); 1741 register_cpu_notifier(&timers_nb); 1742 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1743} 1744 1745/** 1746 * msleep - sleep safely even with waitqueue interruptions 1747 * @msecs: Time in milliseconds to sleep for 1748 */ 1749void msleep(unsigned int msecs) 1750{ 1751 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1752 1753 while (timeout) 1754 timeout = schedule_timeout_uninterruptible(timeout); 1755} 1756 1757EXPORT_SYMBOL(msleep); 1758 1759/** 1760 * msleep_interruptible - sleep waiting for signals 1761 * @msecs: Time in milliseconds to sleep for 1762 */ 1763unsigned long msleep_interruptible(unsigned int msecs) 1764{ 1765 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1766 1767 while (timeout && !signal_pending(current)) 1768 timeout = schedule_timeout_interruptible(timeout); 1769 return jiffies_to_msecs(timeout); 1770} 1771 1772EXPORT_SYMBOL(msleep_interruptible); 1773 1774static int __sched do_usleep_range(unsigned long min, unsigned long max) 1775{ 1776 ktime_t kmin; 1777 unsigned long delta; 1778 1779 kmin = ktime_set(0, min * NSEC_PER_USEC); 1780 delta = (max - min) * NSEC_PER_USEC; 1781 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); 1782} 1783 1784/** 1785 * usleep_range - Drop in replacement for udelay where wakeup is flexible 1786 * @min: Minimum time in usecs to sleep 1787 * @max: Maximum time in usecs to sleep 1788 */ 1789void usleep_range(unsigned long min, unsigned long max) 1790{ 1791 __set_current_state(TASK_UNINTERRUPTIBLE); 1792 do_usleep_range(min, max); 1793} 1794EXPORT_SYMBOL(usleep_range); 1795