1/* 2 * linux/kernel/timer.c 3 * 4 * Kernel internal timers, basic process system calls 5 * 6 * Copyright (C) 1991, 1992 Linus Torvalds 7 * 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 9 * 10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 11 * "A Kernel Model for Precision Timekeeping" by Dave Mills 12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 13 * serialize accesses to xtime/lost_ticks). 14 * Copyright (C) 1998 Andrea Arcangeli 15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar 19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 20 */ 21 22#include <linux/kernel_stat.h> 23#include <linux/module.h> 24#include <linux/interrupt.h> 25#include <linux/percpu.h> 26#include <linux/init.h> 27#include <linux/mm.h> 28#include <linux/swap.h> 29#include <linux/notifier.h> 30#include <linux/thread_info.h> 31#include <linux/time.h> 32#include <linux/jiffies.h> 33#include <linux/posix-timers.h> 34#include <linux/cpu.h> 35#include <linux/syscalls.h> 36#include <linux/delay.h> 37#include <linux/tick.h> 38#include <linux/kallsyms.h> 39 40#include <asm/uaccess.h> 41#include <asm/unistd.h> 42#include <asm/div64.h> 43#include <asm/timex.h> 44#include <asm/io.h> 45 46u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 47 48EXPORT_SYMBOL(jiffies_64); 49 50/* 51 * per-CPU timer vector definitions: 52 */ 53#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 54#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 55#define TVN_SIZE (1 << TVN_BITS) 56#define TVR_SIZE (1 << TVR_BITS) 57#define TVN_MASK (TVN_SIZE - 1) 58#define TVR_MASK (TVR_SIZE - 1) 59 60typedef struct tvec_s { 61 struct list_head vec[TVN_SIZE]; 62} tvec_t; 63 64typedef struct tvec_root_s { 65 struct list_head vec[TVR_SIZE]; 66} tvec_root_t; 67 68struct tvec_t_base_s { 69 spinlock_t lock; 70 struct timer_list *running_timer; 71 unsigned long timer_jiffies; 72 tvec_root_t tv1; 73 tvec_t tv2; 74 tvec_t tv3; 75 tvec_t tv4; 76 tvec_t tv5; 77} ____cacheline_aligned; 78 79typedef struct tvec_t_base_s tvec_base_t; 80 81tvec_base_t boot_tvec_bases; 82EXPORT_SYMBOL(boot_tvec_bases); 83static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; 84 85/* 86 * Note that all tvec_bases is 2 byte aligned and lower bit of 87 * base in timer_list is guaranteed to be zero. Use the LSB for 88 * the new flag to indicate whether the timer is deferrable 89 */ 90#define TBASE_DEFERRABLE_FLAG (0x1) 91 92/* Functions below help us manage 'deferrable' flag */ 93static inline unsigned int tbase_get_deferrable(tvec_base_t *base) 94{ 95 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); 96} 97 98static inline tvec_base_t *tbase_get_base(tvec_base_t *base) 99{ 100 return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); 101} 102 103static inline void timer_set_deferrable(struct timer_list *timer) 104{ 105 timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | 106 TBASE_DEFERRABLE_FLAG)); 107} 108 109static inline void 110timer_set_base(struct timer_list *timer, tvec_base_t *new_base) 111{ 112 timer->base = (tvec_base_t *)((unsigned long)(new_base) | 113 tbase_get_deferrable(timer->base)); 114} 115 116/** 117 * __round_jiffies - function to round jiffies to a full second 118 * @j: the time in (absolute) jiffies that should be rounded 119 * @cpu: the processor number on which the timeout will happen 120 * 121 * __round_jiffies() rounds an absolute time in the future (in jiffies) 122 * up or down to (approximately) full seconds. This is useful for timers 123 * for which the exact time they fire does not matter too much, as long as 124 * they fire approximately every X seconds. 125 * 126 * By rounding these timers to whole seconds, all such timers will fire 127 * at the same time, rather than at various times spread out. The goal 128 * of this is to have the CPU wake up less, which saves power. 129 * 130 * The exact rounding is skewed for each processor to avoid all 131 * processors firing at the exact same time, which could lead 132 * to lock contention or spurious cache line bouncing. 133 * 134 * The return value is the rounded version of the @j parameter. 135 */ 136unsigned long __round_jiffies(unsigned long j, int cpu) 137{ 138 int rem; 139 unsigned long original = j; 140 141 /* 142 * We don't want all cpus firing their timers at once hitting the 143 * same lock or cachelines, so we skew each extra cpu with an extra 144 * 3 jiffies. This 3 jiffies came originally from the mm/ code which 145 * already did this. 146 * The skew is done by adding 3*cpunr, then round, then subtract this 147 * extra offset again. 148 */ 149 j += cpu * 3; 150 151 rem = j % HZ; 152 153 /* 154 * If the target jiffie is just after a whole second (which can happen 155 * due to delays of the timer irq, long irq off times etc etc) then 156 * we should round down to the whole second, not up. Use 1/4th second 157 * as cutoff for this rounding as an extreme upper bound for this. 158 */ 159 if (rem < HZ/4) /* round down */ 160 j = j - rem; 161 else /* round up */ 162 j = j - rem + HZ; 163 164 /* now that we have rounded, subtract the extra skew again */ 165 j -= cpu * 3; 166 167 if (j <= jiffies) /* rounding ate our timeout entirely; */ 168 return original; 169 return j; 170} 171EXPORT_SYMBOL_GPL(__round_jiffies); 172 173/** 174 * __round_jiffies_relative - function to round jiffies to a full second 175 * @j: the time in (relative) jiffies that should be rounded 176 * @cpu: the processor number on which the timeout will happen 177 * 178 * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 179 * up or down to (approximately) full seconds. This is useful for timers 180 * for which the exact time they fire does not matter too much, as long as 181 * they fire approximately every X seconds. 182 * 183 * By rounding these timers to whole seconds, all such timers will fire 184 * at the same time, rather than at various times spread out. The goal 185 * of this is to have the CPU wake up less, which saves power. 186 * 187 * The exact rounding is skewed for each processor to avoid all 188 * processors firing at the exact same time, which could lead 189 * to lock contention or spurious cache line bouncing. 190 * 191 * The return value is the rounded version of the @j parameter. 192 */ 193unsigned long __round_jiffies_relative(unsigned long j, int cpu) 194{ 195 /* 196 * In theory the following code can skip a jiffy in case jiffies 197 * increments right between the addition and the later subtraction. 198 * However since the entire point of this function is to use approximate 199 * timeouts, it's entirely ok to not handle that. 200 */ 201 return __round_jiffies(j + jiffies, cpu) - jiffies; 202} 203EXPORT_SYMBOL_GPL(__round_jiffies_relative); 204 205/** 206 * round_jiffies - function to round jiffies to a full second 207 * @j: the time in (absolute) jiffies that should be rounded 208 * 209 * round_jiffies() rounds an absolute time in the future (in jiffies) 210 * up or down to (approximately) full seconds. This is useful for timers 211 * for which the exact time they fire does not matter too much, as long as 212 * they fire approximately every X seconds. 213 * 214 * By rounding these timers to whole seconds, all such timers will fire 215 * at the same time, rather than at various times spread out. The goal 216 * of this is to have the CPU wake up less, which saves power. 217 * 218 * The return value is the rounded version of the @j parameter. 219 */ 220unsigned long round_jiffies(unsigned long j) 221{ 222 return __round_jiffies(j, raw_smp_processor_id()); 223} 224EXPORT_SYMBOL_GPL(round_jiffies); 225 226/** 227 * round_jiffies_relative - function to round jiffies to a full second 228 * @j: the time in (relative) jiffies that should be rounded 229 * 230 * round_jiffies_relative() rounds a time delta in the future (in jiffies) 231 * up or down to (approximately) full seconds. This is useful for timers 232 * for which the exact time they fire does not matter too much, as long as 233 * they fire approximately every X seconds. 234 * 235 * By rounding these timers to whole seconds, all such timers will fire 236 * at the same time, rather than at various times spread out. The goal 237 * of this is to have the CPU wake up less, which saves power. 238 * 239 * The return value is the rounded version of the @j parameter. 240 */ 241unsigned long round_jiffies_relative(unsigned long j) 242{ 243 return __round_jiffies_relative(j, raw_smp_processor_id()); 244} 245EXPORT_SYMBOL_GPL(round_jiffies_relative); 246 247 248static inline void set_running_timer(tvec_base_t *base, 249 struct timer_list *timer) 250{ 251#ifdef CONFIG_SMP 252 base->running_timer = timer; 253#endif 254} 255 256static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) 257{ 258 unsigned long expires = timer->expires; 259 unsigned long idx = expires - base->timer_jiffies; 260 struct list_head *vec; 261 262 if (idx < TVR_SIZE) { 263 int i = expires & TVR_MASK; 264 vec = base->tv1.vec + i; 265 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { 266 int i = (expires >> TVR_BITS) & TVN_MASK; 267 vec = base->tv2.vec + i; 268 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { 269 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; 270 vec = base->tv3.vec + i; 271 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { 272 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; 273 vec = base->tv4.vec + i; 274 } else if ((signed long) idx < 0) { 275 /* 276 * Can happen if you add a timer with expires == jiffies, 277 * or you set a timer to go off in the past 278 */ 279 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 280 } else { 281 int i; 282 /* If the timeout is larger than 0xffffffff on 64-bit 283 * architectures then we use the maximum timeout: 284 */ 285 if (idx > 0xffffffffUL) { 286 idx = 0xffffffffUL; 287 expires = idx + base->timer_jiffies; 288 } 289 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 290 vec = base->tv5.vec + i; 291 } 292 /* 293 * Timers are FIFO: 294 */ 295 list_add_tail(&timer->entry, vec); 296} 297 298#ifdef CONFIG_TIMER_STATS 299void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 300{ 301 if (timer->start_site) 302 return; 303 304 timer->start_site = addr; 305 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 306 timer->start_pid = current->pid; 307} 308#endif 309 310/** 311 * init_timer - initialize a timer. 312 * @timer: the timer to be initialized 313 * 314 * init_timer() must be done to a timer prior calling *any* of the 315 * other timer functions. 316 */ 317void fastcall init_timer(struct timer_list *timer) 318{ 319 timer->entry.next = NULL; 320 timer->base = __raw_get_cpu_var(tvec_bases); 321#ifdef CONFIG_TIMER_STATS 322 timer->start_site = NULL; 323 timer->start_pid = -1; 324 memset(timer->start_comm, 0, TASK_COMM_LEN); 325#endif 326} 327EXPORT_SYMBOL(init_timer); 328 329void fastcall init_timer_deferrable(struct timer_list *timer) 330{ 331 init_timer(timer); 332 timer_set_deferrable(timer); 333} 334EXPORT_SYMBOL(init_timer_deferrable); 335 336static inline void detach_timer(struct timer_list *timer, 337 int clear_pending) 338{ 339 struct list_head *entry = &timer->entry; 340 341 __list_del(entry->prev, entry->next); 342 if (clear_pending) 343 entry->next = NULL; 344 entry->prev = LIST_POISON2; 345} 346 347/* 348 * We are using hashed locking: holding per_cpu(tvec_bases).lock 349 * means that all timers which are tied to this base via timer->base are 350 * locked, and the base itself is locked too. 351 * 352 * So __run_timers/migrate_timers can safely modify all timers which could 353 * be found on ->tvX lists. 354 * 355 * When the timer's base is locked, and the timer removed from list, it is 356 * possible to set timer->base = NULL and drop the lock: the timer remains 357 * locked. 358 */ 359static tvec_base_t *lock_timer_base(struct timer_list *timer, 360 unsigned long *flags) 361 __acquires(timer->base->lock) 362{ 363 tvec_base_t *base; 364 365 for (;;) { 366 tvec_base_t *prelock_base = timer->base; 367 base = tbase_get_base(prelock_base); 368 if (likely(base != NULL)) { 369 spin_lock_irqsave(&base->lock, *flags); 370 if (likely(prelock_base == timer->base)) 371 return base; 372 /* The timer has migrated to another CPU */ 373 spin_unlock_irqrestore(&base->lock, *flags); 374 } 375 cpu_relax(); 376 } 377} 378 379int __mod_timer(struct timer_list *timer, unsigned long expires) 380{ 381 tvec_base_t *base, *new_base; 382 unsigned long flags; 383 int ret = 0; 384 385 timer_stats_timer_set_start_info(timer); 386 BUG_ON(!timer->function); 387 388 base = lock_timer_base(timer, &flags); 389 390 if (timer_pending(timer)) { 391 detach_timer(timer, 0); 392 ret = 1; 393 } 394 395 new_base = __get_cpu_var(tvec_bases); 396 397 if (base != new_base) { 398 /* 399 * We are trying to schedule the timer on the local CPU. 400 * However we can't change timer's base while it is running, 401 * otherwise del_timer_sync() can't detect that the timer's 402 * handler yet has not finished. This also guarantees that 403 * the timer is serialized wrt itself. 404 */ 405 if (likely(base->running_timer != timer)) { 406 /* See the comment in lock_timer_base() */ 407 timer_set_base(timer, NULL); 408 spin_unlock(&base->lock); 409 base = new_base; 410 spin_lock(&base->lock); 411 timer_set_base(timer, base); 412 } 413 } 414 415 timer->expires = expires; 416 internal_add_timer(base, timer); 417 spin_unlock_irqrestore(&base->lock, flags); 418 419 return ret; 420} 421 422EXPORT_SYMBOL(__mod_timer); 423 424/** 425 * add_timer_on - start a timer on a particular CPU 426 * @timer: the timer to be added 427 * @cpu: the CPU to start it on 428 * 429 * This is not very scalable on SMP. Double adds are not possible. 430 */ 431void add_timer_on(struct timer_list *timer, int cpu) 432{ 433 tvec_base_t *base = per_cpu(tvec_bases, cpu); 434 unsigned long flags; 435 436 timer_stats_timer_set_start_info(timer); 437 BUG_ON(timer_pending(timer) || !timer->function); 438 spin_lock_irqsave(&base->lock, flags); 439 timer_set_base(timer, base); 440 internal_add_timer(base, timer); 441 spin_unlock_irqrestore(&base->lock, flags); 442} 443 444 445/** 446 * mod_timer - modify a timer's timeout 447 * @timer: the timer to be modified 448 * @expires: new timeout in jiffies 449 * 450 * mod_timer() is a more efficient way to update the expire field of an 451 * active timer (if the timer is inactive it will be activated) 452 * 453 * mod_timer(timer, expires) is equivalent to: 454 * 455 * del_timer(timer); timer->expires = expires; add_timer(timer); 456 * 457 * Note that if there are multiple unserialized concurrent users of the 458 * same timer, then mod_timer() is the only safe way to modify the timeout, 459 * since add_timer() cannot modify an already running timer. 460 * 461 * The function returns whether it has modified a pending timer or not. 462 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 463 * active timer returns 1.) 464 */ 465int mod_timer(struct timer_list *timer, unsigned long expires) 466{ 467 BUG_ON(!timer->function); 468 469 timer_stats_timer_set_start_info(timer); 470 /* 471 * This is a common optimization triggered by the 472 * networking code - if the timer is re-modified 473 * to be the same thing then just return: 474 */ 475 if (timer->expires == expires && timer_pending(timer)) 476 return 1; 477 478 return __mod_timer(timer, expires); 479} 480 481EXPORT_SYMBOL(mod_timer); 482 483/** 484 * del_timer - deactive a timer. 485 * @timer: the timer to be deactivated 486 * 487 * del_timer() deactivates a timer - this works on both active and inactive 488 * timers. 489 * 490 * The function returns whether it has deactivated a pending timer or not. 491 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 492 * active timer returns 1.) 493 */ 494int del_timer(struct timer_list *timer) 495{ 496 tvec_base_t *base; 497 unsigned long flags; 498 int ret = 0; 499 500 timer_stats_timer_clear_start_info(timer); 501 if (timer_pending(timer)) { 502 base = lock_timer_base(timer, &flags); 503 if (timer_pending(timer)) { 504 detach_timer(timer, 1); 505 ret = 1; 506 } 507 spin_unlock_irqrestore(&base->lock, flags); 508 } 509 510 return ret; 511} 512 513EXPORT_SYMBOL(del_timer); 514 515#ifdef CONFIG_SMP 516/** 517 * try_to_del_timer_sync - Try to deactivate a timer 518 * @timer: timer do del 519 * 520 * This function tries to deactivate a timer. Upon successful (ret >= 0) 521 * exit the timer is not queued and the handler is not running on any CPU. 522 * 523 * It must not be called from interrupt contexts. 524 */ 525int try_to_del_timer_sync(struct timer_list *timer) 526{ 527 tvec_base_t *base; 528 unsigned long flags; 529 int ret = -1; 530 531 base = lock_timer_base(timer, &flags); 532 533 if (base->running_timer == timer) 534 goto out; 535 536 ret = 0; 537 if (timer_pending(timer)) { 538 detach_timer(timer, 1); 539 ret = 1; 540 } 541out: 542 spin_unlock_irqrestore(&base->lock, flags); 543 544 return ret; 545} 546 547EXPORT_SYMBOL(try_to_del_timer_sync); 548 549/** 550 * del_timer_sync - deactivate a timer and wait for the handler to finish. 551 * @timer: the timer to be deactivated 552 * 553 * This function only differs from del_timer() on SMP: besides deactivating 554 * the timer it also makes sure the handler has finished executing on other 555 * CPUs. 556 * 557 * Synchronization rules: Callers must prevent restarting of the timer, 558 * otherwise this function is meaningless. It must not be called from 559 * interrupt contexts. The caller must not hold locks which would prevent 560 * completion of the timer's handler. The timer's handler must not call 561 * add_timer_on(). Upon exit the timer is not queued and the handler is 562 * not running on any CPU. 563 * 564 * The function returns whether it has deactivated a pending timer or not. 565 */ 566int del_timer_sync(struct timer_list *timer) 567{ 568 for (;;) { 569 int ret = try_to_del_timer_sync(timer); 570 if (ret >= 0) 571 return ret; 572 cpu_relax(); 573 } 574} 575 576EXPORT_SYMBOL(del_timer_sync); 577#endif 578 579static int cascade(tvec_base_t *base, tvec_t *tv, int index) 580{ 581 /* cascade all the timers from tv up one level */ 582 struct timer_list *timer, *tmp; 583 struct list_head tv_list; 584 585 list_replace_init(tv->vec + index, &tv_list); 586 587 /* 588 * We are removing _all_ timers from the list, so we 589 * don't have to detach them individually. 590 */ 591 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 592 BUG_ON(tbase_get_base(timer->base) != base); 593 internal_add_timer(base, timer); 594 } 595 596 return index; 597} 598 599#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 600 601/** 602 * __run_timers - run all expired timers (if any) on this CPU. 603 * @base: the timer vector to be processed. 604 * 605 * This function cascades all vectors and executes all expired timer 606 * vectors. 607 */ 608static inline void __run_timers(tvec_base_t *base) 609{ 610 struct timer_list *timer; 611 612 spin_lock_irq(&base->lock); 613 while (time_after_eq(jiffies, base->timer_jiffies)) { 614 struct list_head work_list; 615 struct list_head *head = &work_list; 616 int index = base->timer_jiffies & TVR_MASK; 617 618 /* 619 * Cascade timers: 620 */ 621 if (!index && 622 (!cascade(base, &base->tv2, INDEX(0))) && 623 (!cascade(base, &base->tv3, INDEX(1))) && 624 !cascade(base, &base->tv4, INDEX(2))) 625 cascade(base, &base->tv5, INDEX(3)); 626 ++base->timer_jiffies; 627 list_replace_init(base->tv1.vec + index, &work_list); 628 while (!list_empty(head)) { 629 void (*fn)(unsigned long); 630 unsigned long data; 631 632 timer = list_first_entry(head, struct timer_list,entry); 633 fn = timer->function; 634 data = timer->data; 635 636 timer_stats_account_timer(timer); 637 638 set_running_timer(base, timer); 639 detach_timer(timer, 1); 640 spin_unlock_irq(&base->lock); 641 { 642 int preempt_count = preempt_count(); 643 fn(data); 644 if (preempt_count != preempt_count()) { 645 printk(KERN_WARNING "huh, entered %p " 646 "with preempt_count %08x, exited" 647 " with %08x?\n", 648 fn, preempt_count, 649 preempt_count()); 650 BUG(); 651 } 652 } 653 spin_lock_irq(&base->lock); 654 } 655 } 656 set_running_timer(base, NULL); 657 spin_unlock_irq(&base->lock); 658} 659 660#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) 661/* 662 * Find out when the next timer event is due to happen. This 663 * is used on S/390 to stop all activity when a cpus is idle. 664 * This functions needs to be called disabled. 665 */ 666static unsigned long __next_timer_interrupt(tvec_base_t *base) 667{ 668 unsigned long timer_jiffies = base->timer_jiffies; 669 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; 670 int index, slot, array, found = 0; 671 struct timer_list *nte; 672 tvec_t *varray[4]; 673 674 /* Look for timer events in tv1. */ 675 index = slot = timer_jiffies & TVR_MASK; 676 do { 677 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 678 if (tbase_get_deferrable(nte->base)) 679 continue; 680 681 found = 1; 682 expires = nte->expires; 683 /* Look at the cascade bucket(s)? */ 684 if (!index || slot < index) 685 goto cascade; 686 return expires; 687 } 688 slot = (slot + 1) & TVR_MASK; 689 } while (slot != index); 690 691cascade: 692 /* Calculate the next cascade event */ 693 if (index) 694 timer_jiffies += TVR_SIZE - index; 695 timer_jiffies >>= TVR_BITS; 696 697 /* Check tv2-tv5. */ 698 varray[0] = &base->tv2; 699 varray[1] = &base->tv3; 700 varray[2] = &base->tv4; 701 varray[3] = &base->tv5; 702 703 for (array = 0; array < 4; array++) { 704 tvec_t *varp = varray[array]; 705 706 index = slot = timer_jiffies & TVN_MASK; 707 do { 708 list_for_each_entry(nte, varp->vec + slot, entry) { 709 found = 1; 710 if (time_before(nte->expires, expires)) 711 expires = nte->expires; 712 } 713 /* 714 * Do we still search for the first timer or are 715 * we looking up the cascade buckets ? 716 */ 717 if (found) { 718 /* Look at the cascade bucket(s)? */ 719 if (!index || slot < index) 720 break; 721 return expires; 722 } 723 slot = (slot + 1) & TVN_MASK; 724 } while (slot != index); 725 726 if (index) 727 timer_jiffies += TVN_SIZE - index; 728 timer_jiffies >>= TVN_BITS; 729 } 730 return expires; 731} 732 733/* 734 * Check, if the next hrtimer event is before the next timer wheel 735 * event: 736 */ 737static unsigned long cmp_next_hrtimer_event(unsigned long now, 738 unsigned long expires) 739{ 740 ktime_t hr_delta = hrtimer_get_next_event(); 741 struct timespec tsdelta; 742 unsigned long delta; 743 744 if (hr_delta.tv64 == KTIME_MAX) 745 return expires; 746 747 /* 748 * Expired timer available, let it expire in the next tick 749 */ 750 if (hr_delta.tv64 <= 0) 751 return now + 1; 752 753 tsdelta = ktime_to_timespec(hr_delta); 754 delta = timespec_to_jiffies(&tsdelta); 755 756 /* 757 * Limit the delta to the max value, which is checked in 758 * tick_nohz_stop_sched_tick(): 759 */ 760 if (delta > NEXT_TIMER_MAX_DELTA) 761 delta = NEXT_TIMER_MAX_DELTA; 762 763 /* 764 * Take rounding errors in to account and make sure, that it 765 * expires in the next tick. Otherwise we go into an endless 766 * ping pong due to tick_nohz_stop_sched_tick() retriggering 767 * the timer softirq 768 */ 769 if (delta < 1) 770 delta = 1; 771 now += delta; 772 if (time_before(now, expires)) 773 return now; 774 return expires; 775} 776 777/** 778 * next_timer_interrupt - return the jiffy of the next pending timer 779 * @now: current time (in jiffies) 780 */ 781unsigned long get_next_timer_interrupt(unsigned long now) 782{ 783 tvec_base_t *base = __get_cpu_var(tvec_bases); 784 unsigned long expires; 785 786 spin_lock(&base->lock); 787 expires = __next_timer_interrupt(base); 788 spin_unlock(&base->lock); 789 790 if (time_before_eq(expires, now)) 791 return now; 792 793 return cmp_next_hrtimer_event(now, expires); 794} 795 796#ifdef CONFIG_NO_IDLE_HZ 797unsigned long next_timer_interrupt(void) 798{ 799 return get_next_timer_interrupt(jiffies); 800} 801#endif 802 803#endif 804 805/* 806 * Called from the timer interrupt handler to charge one tick to the current 807 * process. user_tick is 1 if the tick is user time, 0 for system. 808 */ 809void update_process_times(int user_tick) 810{ 811 struct task_struct *p = current; 812 int cpu = smp_processor_id(); 813 814 /* Note: this timer irq context must be accounted for as well. */ 815 if (user_tick) { 816 account_user_time(p, jiffies_to_cputime(1)); 817 account_user_time_scaled(p, jiffies_to_cputime(1)); 818 } else { 819 account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); 820 account_system_time_scaled(p, jiffies_to_cputime(1)); 821 } 822 run_local_timers(); 823 if (rcu_pending(cpu)) 824 rcu_check_callbacks(cpu, user_tick); 825 scheduler_tick(); 826 run_posix_cpu_timers(p); 827} 828 829/* 830 * Nr of active tasks - counted in fixed-point numbers 831 */ 832static unsigned long count_active_tasks(void) 833{ 834 return nr_active() * FIXED_1; 835} 836 837/* 838 * Hmm.. Changed this, as the GNU make sources (load.c) seems to 839 * imply that avenrun[] is the standard name for this kind of thing. 840 * Nothing else seems to be standardized: the fractional size etc 841 * all seem to differ on different machines. 842 * 843 * Requires xtime_lock to access. 844 */ 845unsigned long avenrun[3]; 846 847EXPORT_SYMBOL(avenrun); 848 849/* 850 * calc_load - given tick count, update the avenrun load estimates. 851 * This is called while holding a write_lock on xtime_lock. 852 */ 853static inline void calc_load(unsigned long ticks) 854{ 855 unsigned long active_tasks; /* fixed-point */ 856 static int count = LOAD_FREQ; 857 858 count -= ticks; 859 if (unlikely(count < 0)) { 860 active_tasks = count_active_tasks(); 861 do { 862 CALC_LOAD(avenrun[0], EXP_1, active_tasks); 863 CALC_LOAD(avenrun[1], EXP_5, active_tasks); 864 CALC_LOAD(avenrun[2], EXP_15, active_tasks); 865 count += LOAD_FREQ; 866 } while (count < 0); 867 } 868} 869 870/* 871 * This function runs timers and the timer-tq in bottom half context. 872 */ 873static void run_timer_softirq(struct softirq_action *h) 874{ 875 tvec_base_t *base = __get_cpu_var(tvec_bases); 876 877 hrtimer_run_queues(); 878 879 if (time_after_eq(jiffies, base->timer_jiffies)) 880 __run_timers(base); 881} 882 883/* 884 * Called by the local, per-CPU timer interrupt on SMP. 885 */ 886void run_local_timers(void) 887{ 888 raise_softirq(TIMER_SOFTIRQ); 889 softlockup_tick(); 890} 891 892/* 893 * Called by the timer interrupt. xtime_lock must already be taken 894 * by the timer IRQ! 895 */ 896static inline void update_times(unsigned long ticks) 897{ 898 update_wall_time(); 899 calc_load(ticks); 900} 901 902/* 903 * The 64-bit jiffies value is not atomic - you MUST NOT read it 904 * without sampling the sequence number in xtime_lock. 905 * jiffies is defined in the linker script... 906 */ 907 908void do_timer(unsigned long ticks) 909{ 910 jiffies_64 += ticks; 911 update_times(ticks); 912} 913 914#ifdef __ARCH_WANT_SYS_ALARM 915 916/* 917 * For backwards compatibility? This can be done in libc so Alpha 918 * and all newer ports shouldn't need it. 919 */ 920asmlinkage unsigned long sys_alarm(unsigned int seconds) 921{ 922 return alarm_setitimer(seconds); 923} 924 925#endif 926 927#ifndef __alpha__ 928 929/* 930 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 931 * should be moved into arch/i386 instead? 932 */ 933 934/** 935 * sys_getpid - return the thread group id of the current process 936 * 937 * Note, despite the name, this returns the tgid not the pid. The tgid and 938 * the pid are identical unless CLONE_THREAD was specified on clone() in 939 * which case the tgid is the same in all threads of the same group. 940 * 941 * This is SMP safe as current->tgid does not change. 942 */ 943asmlinkage long sys_getpid(void) 944{ 945 return current->tgid; 946} 947 948/* 949 * Accessing ->real_parent is not SMP-safe, it could 950 * change from under us. However, we can use a stale 951 * value of ->real_parent under rcu_read_lock(), see 952 * release_task()->call_rcu(delayed_put_task_struct). 953 */ 954asmlinkage long sys_getppid(void) 955{ 956 int pid; 957 958 rcu_read_lock(); 959 pid = rcu_dereference(current->real_parent)->tgid; 960 rcu_read_unlock(); 961 962 return pid; 963} 964 965asmlinkage long sys_getuid(void) 966{ 967 /* Only we change this so SMP safe */ 968 return current->uid; 969} 970 971asmlinkage long sys_geteuid(void) 972{ 973 /* Only we change this so SMP safe */ 974 return current->euid; 975} 976 977asmlinkage long sys_getgid(void) 978{ 979 /* Only we change this so SMP safe */ 980 return current->gid; 981} 982 983asmlinkage long sys_getegid(void) 984{ 985 /* Only we change this so SMP safe */ 986 return current->egid; 987} 988 989#endif 990 991static void process_timeout(unsigned long __data) 992{ 993 wake_up_process((struct task_struct *)__data); 994} 995 996/** 997 * schedule_timeout - sleep until timeout 998 * @timeout: timeout value in jiffies 999 * 1000 * Make the current task sleep until @timeout jiffies have 1001 * elapsed. The routine will return immediately unless 1002 * the current task state has been set (see set_current_state()). 1003 * 1004 * You can set the task state as follows - 1005 * 1006 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 1007 * pass before the routine returns. The routine will return 0 1008 * 1009 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1010 * delivered to the current task. In this case the remaining time 1011 * in jiffies will be returned, or 0 if the timer expired in time 1012 * 1013 * The current task state is guaranteed to be TASK_RUNNING when this 1014 * routine returns. 1015 * 1016 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 1017 * the CPU away without a bound on the timeout. In this case the return 1018 * value will be %MAX_SCHEDULE_TIMEOUT. 1019 * 1020 * In all cases the return value is guaranteed to be non-negative. 1021 */ 1022fastcall signed long __sched schedule_timeout(signed long timeout) 1023{ 1024 struct timer_list timer; 1025 unsigned long expire; 1026 1027 switch (timeout) 1028 { 1029 case MAX_SCHEDULE_TIMEOUT: 1030 /* 1031 * These two special cases are useful to be comfortable 1032 * in the caller. Nothing more. We could take 1033 * MAX_SCHEDULE_TIMEOUT from one of the negative value 1034 * but I' d like to return a valid offset (>=0) to allow 1035 * the caller to do everything it want with the retval. 1036 */ 1037 schedule(); 1038 goto out; 1039 default: 1040 /* 1041 * Another bit of PARANOID. Note that the retval will be 1042 * 0 since no piece of kernel is supposed to do a check 1043 * for a negative retval of schedule_timeout() (since it 1044 * should never happens anyway). You just have the printk() 1045 * that will tell you if something is gone wrong and where. 1046 */ 1047 if (timeout < 0) { 1048 printk(KERN_ERR "schedule_timeout: wrong timeout " 1049 "value %lx\n", timeout); 1050 dump_stack(); 1051 current->state = TASK_RUNNING; 1052 goto out; 1053 } 1054 } 1055 1056 expire = timeout + jiffies; 1057 1058 setup_timer(&timer, process_timeout, (unsigned long)current); 1059 __mod_timer(&timer, expire); 1060 schedule(); 1061 del_singleshot_timer_sync(&timer); 1062 1063 timeout = expire - jiffies; 1064 1065 out: 1066 return timeout < 0 ? 0 : timeout; 1067} 1068EXPORT_SYMBOL(schedule_timeout); 1069 1070/* 1071 * We can use __set_current_state() here because schedule_timeout() calls 1072 * schedule() unconditionally. 1073 */ 1074signed long __sched schedule_timeout_interruptible(signed long timeout) 1075{ 1076 __set_current_state(TASK_INTERRUPTIBLE); 1077 return schedule_timeout(timeout); 1078} 1079EXPORT_SYMBOL(schedule_timeout_interruptible); 1080 1081signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1082{ 1083 __set_current_state(TASK_UNINTERRUPTIBLE); 1084 return schedule_timeout(timeout); 1085} 1086EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1087 1088/* Thread ID - the internal kernel "pid" */ 1089asmlinkage long sys_gettid(void) 1090{ 1091 return current->pid; 1092} 1093 1094/** 1095 * do_sysinfo - fill in sysinfo struct 1096 * @info: pointer to buffer to fill 1097 */ 1098int do_sysinfo(struct sysinfo *info) 1099{ 1100 unsigned long mem_total, sav_total; 1101 unsigned int mem_unit, bitcount; 1102 unsigned long seq; 1103 1104 memset(info, 0, sizeof(struct sysinfo)); 1105 1106 do { 1107 struct timespec tp; 1108 seq = read_seqbegin(&xtime_lock); 1109 1110 /* 1111 * This is annoying. The below is the same thing 1112 * posix_get_clock_monotonic() does, but it wants to 1113 * take the lock which we want to cover the loads stuff 1114 * too. 1115 */ 1116 1117 getnstimeofday(&tp); 1118 tp.tv_sec += wall_to_monotonic.tv_sec; 1119 tp.tv_nsec += wall_to_monotonic.tv_nsec; 1120 if (tp.tv_nsec - NSEC_PER_SEC >= 0) { 1121 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; 1122 tp.tv_sec++; 1123 } 1124 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1125 1126 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); 1127 info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); 1128 info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); 1129 1130 info->procs = nr_threads; 1131 } while (read_seqretry(&xtime_lock, seq)); 1132 1133 si_meminfo(info); 1134 si_swapinfo(info); 1135 1136 /* 1137 * If the sum of all the available memory (i.e. ram + swap) 1138 * is less than can be stored in a 32 bit unsigned long then 1139 * we can be binary compatible with 2.2.x kernels. If not, 1140 * well, in that case 2.2.x was broken anyways... 1141 * 1142 * -Erik Andersen <andersee@debian.org> 1143 */ 1144 1145 mem_total = info->totalram + info->totalswap; 1146 if (mem_total < info->totalram || mem_total < info->totalswap) 1147 goto out; 1148 bitcount = 0; 1149 mem_unit = info->mem_unit; 1150 while (mem_unit > 1) { 1151 bitcount++; 1152 mem_unit >>= 1; 1153 sav_total = mem_total; 1154 mem_total <<= 1; 1155 if (mem_total < sav_total) 1156 goto out; 1157 } 1158 1159 /* 1160 * If mem_total did not overflow, multiply all memory values by 1161 * info->mem_unit and set it to 1. This leaves things compatible 1162 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1163 * kernels... 1164 */ 1165 1166 info->mem_unit = 1; 1167 info->totalram <<= bitcount; 1168 info->freeram <<= bitcount; 1169 info->sharedram <<= bitcount; 1170 info->bufferram <<= bitcount; 1171 info->totalswap <<= bitcount; 1172 info->freeswap <<= bitcount; 1173 info->totalhigh <<= bitcount; 1174 info->freehigh <<= bitcount; 1175 1176out: 1177 return 0; 1178} 1179 1180asmlinkage long sys_sysinfo(struct sysinfo __user *info) 1181{ 1182 struct sysinfo val; 1183 1184 do_sysinfo(&val); 1185 1186 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1187 return -EFAULT; 1188 1189 return 0; 1190} 1191 1192/* 1193 * lockdep: we want to track each per-CPU base as a separate lock-class, 1194 * but timer-bases are kmalloc()-ed, so we need to attach separate 1195 * keys to them: 1196 */ 1197static struct lock_class_key base_lock_keys[NR_CPUS]; 1198 1199static int __devinit init_timers_cpu(int cpu) 1200{ 1201 int j; 1202 tvec_base_t *base; 1203 static char __devinitdata tvec_base_done[NR_CPUS]; 1204 1205 if (!tvec_base_done[cpu]) { 1206 static char boot_done; 1207 1208 if (boot_done) { 1209 /* 1210 * The APs use this path later in boot 1211 */ 1212 base = kmalloc_node(sizeof(*base), GFP_KERNEL, 1213 cpu_to_node(cpu)); 1214 if (!base) 1215 return -ENOMEM; 1216 1217 /* Make sure that tvec_base is 2 byte aligned */ 1218 if (tbase_get_deferrable(base)) { 1219 WARN_ON(1); 1220 kfree(base); 1221 return -ENOMEM; 1222 } 1223 memset(base, 0, sizeof(*base)); 1224 per_cpu(tvec_bases, cpu) = base; 1225 } else { 1226 /* 1227 * This is for the boot CPU - we use compile-time 1228 * static initialisation because per-cpu memory isn't 1229 * ready yet and because the memory allocators are not 1230 * initialised either. 1231 */ 1232 boot_done = 1; 1233 base = &boot_tvec_bases; 1234 } 1235 tvec_base_done[cpu] = 1; 1236 } else { 1237 base = per_cpu(tvec_bases, cpu); 1238 } 1239 1240 spin_lock_init(&base->lock); 1241 lockdep_set_class(&base->lock, base_lock_keys + cpu); 1242 1243 for (j = 0; j < TVN_SIZE; j++) { 1244 INIT_LIST_HEAD(base->tv5.vec + j); 1245 INIT_LIST_HEAD(base->tv4.vec + j); 1246 INIT_LIST_HEAD(base->tv3.vec + j); 1247 INIT_LIST_HEAD(base->tv2.vec + j); 1248 } 1249 for (j = 0; j < TVR_SIZE; j++) 1250 INIT_LIST_HEAD(base->tv1.vec + j); 1251 1252 base->timer_jiffies = jiffies; 1253 return 0; 1254} 1255 1256#ifdef CONFIG_HOTPLUG_CPU 1257static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) 1258{ 1259 struct timer_list *timer; 1260 1261 while (!list_empty(head)) { 1262 timer = list_first_entry(head, struct timer_list, entry); 1263 detach_timer(timer, 0); 1264 timer_set_base(timer, new_base); 1265 internal_add_timer(new_base, timer); 1266 } 1267} 1268 1269static void __devinit migrate_timers(int cpu) 1270{ 1271 tvec_base_t *old_base; 1272 tvec_base_t *new_base; 1273 int i; 1274 1275 BUG_ON(cpu_online(cpu)); 1276 old_base = per_cpu(tvec_bases, cpu); 1277 new_base = get_cpu_var(tvec_bases); 1278 1279 local_irq_disable(); 1280 double_spin_lock(&new_base->lock, &old_base->lock, 1281 smp_processor_id() < cpu); 1282 1283 BUG_ON(old_base->running_timer); 1284 1285 for (i = 0; i < TVR_SIZE; i++) 1286 migrate_timer_list(new_base, old_base->tv1.vec + i); 1287 for (i = 0; i < TVN_SIZE; i++) { 1288 migrate_timer_list(new_base, old_base->tv2.vec + i); 1289 migrate_timer_list(new_base, old_base->tv3.vec + i); 1290 migrate_timer_list(new_base, old_base->tv4.vec + i); 1291 migrate_timer_list(new_base, old_base->tv5.vec + i); 1292 } 1293 1294 double_spin_unlock(&new_base->lock, &old_base->lock, 1295 smp_processor_id() < cpu); 1296 local_irq_enable(); 1297 put_cpu_var(tvec_bases); 1298} 1299#endif /* CONFIG_HOTPLUG_CPU */ 1300 1301static int __cpuinit timer_cpu_notify(struct notifier_block *self, 1302 unsigned long action, void *hcpu) 1303{ 1304 long cpu = (long)hcpu; 1305 switch(action) { 1306 case CPU_UP_PREPARE: 1307 case CPU_UP_PREPARE_FROZEN: 1308 if (init_timers_cpu(cpu) < 0) 1309 return NOTIFY_BAD; 1310 break; 1311#ifdef CONFIG_HOTPLUG_CPU 1312 case CPU_DEAD: 1313 case CPU_DEAD_FROZEN: 1314 migrate_timers(cpu); 1315 break; 1316#endif 1317 default: 1318 break; 1319 } 1320 return NOTIFY_OK; 1321} 1322 1323static struct notifier_block __cpuinitdata timers_nb = { 1324 .notifier_call = timer_cpu_notify, 1325}; 1326 1327 1328void __init init_timers(void) 1329{ 1330 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1331 (void *)(long)smp_processor_id()); 1332 1333 init_timer_stats(); 1334 1335 BUG_ON(err == NOTIFY_BAD); 1336 register_cpu_notifier(&timers_nb); 1337 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); 1338} 1339 1340#ifdef CONFIG_TIME_INTERPOLATION 1341 1342struct time_interpolator *time_interpolator __read_mostly; 1343static struct time_interpolator *time_interpolator_list __read_mostly; 1344static DEFINE_SPINLOCK(time_interpolator_lock); 1345 1346static inline cycles_t time_interpolator_get_cycles(unsigned int src) 1347{ 1348 unsigned long (*x)(void); 1349 1350 switch (src) 1351 { 1352 case TIME_SOURCE_FUNCTION: 1353 x = time_interpolator->addr; 1354 return x(); 1355 1356 case TIME_SOURCE_MMIO64 : 1357 return readq_relaxed((void __iomem *)time_interpolator->addr); 1358 1359 case TIME_SOURCE_MMIO32 : 1360 return readl_relaxed((void __iomem *)time_interpolator->addr); 1361 1362 default: return get_cycles(); 1363 } 1364} 1365 1366static inline u64 time_interpolator_get_counter(int writelock) 1367{ 1368 unsigned int src = time_interpolator->source; 1369 1370 if (time_interpolator->jitter) 1371 { 1372 cycles_t lcycle; 1373 cycles_t now; 1374 1375 do { 1376 lcycle = time_interpolator->last_cycle; 1377 now = time_interpolator_get_cycles(src); 1378 if (lcycle && time_after(lcycle, now)) 1379 return lcycle; 1380 1381 /* When holding the xtime write lock, there's no need 1382 * to add the overhead of the cmpxchg. Readers are 1383 * force to retry until the write lock is released. 1384 */ 1385 if (writelock) { 1386 time_interpolator->last_cycle = now; 1387 return now; 1388 } 1389 /* Keep track of the last timer value returned. The use of cmpxchg here 1390 * will cause contention in an SMP environment. 1391 */ 1392 } while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle)); 1393 return now; 1394 } 1395 else 1396 return time_interpolator_get_cycles(src); 1397} 1398 1399void time_interpolator_reset(void) 1400{ 1401 time_interpolator->offset = 0; 1402 time_interpolator->last_counter = time_interpolator_get_counter(1); 1403} 1404 1405#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) 1406 1407unsigned long time_interpolator_get_offset(void) 1408{ 1409 /* If we do not have a time interpolator set up then just return zero */ 1410 if (!time_interpolator) 1411 return 0; 1412 1413 return time_interpolator->offset + 1414 GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator); 1415} 1416 1417#define INTERPOLATOR_ADJUST 65536 1418#define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST 1419 1420void time_interpolator_update(long delta_nsec) 1421{ 1422 u64 counter; 1423 unsigned long offset; 1424 1425 /* If there is no time interpolator set up then do nothing */ 1426 if (!time_interpolator) 1427 return; 1428 1429 /* 1430 * The interpolator compensates for late ticks by accumulating the late 1431 * time in time_interpolator->offset. A tick earlier than expected will 1432 * lead to a reset of the offset and a corresponding jump of the clock 1433 * forward. Again this only works if the interpolator clock is running 1434 * slightly slower than the regular clock and the tuning logic insures 1435 * that. 1436 */ 1437 1438 counter = time_interpolator_get_counter(1); 1439 offset = time_interpolator->offset + 1440 GET_TI_NSECS(counter, time_interpolator); 1441 1442 if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) 1443 time_interpolator->offset = offset - delta_nsec; 1444 else { 1445 time_interpolator->skips++; 1446 time_interpolator->ns_skipped += delta_nsec - offset; 1447 time_interpolator->offset = 0; 1448 } 1449 time_interpolator->last_counter = counter; 1450 1451 /* Tuning logic for time interpolator invoked every minute or so. 1452 * Decrease interpolator clock speed if no skips occurred and an offset is carried. 1453 * Increase interpolator clock speed if we skip too much time. 1454 */ 1455 if (jiffies % INTERPOLATOR_ADJUST == 0) 1456 { 1457 if (time_interpolator->skips == 0 && time_interpolator->offset > tick_nsec) 1458 time_interpolator->nsec_per_cyc--; 1459 if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) 1460 time_interpolator->nsec_per_cyc++; 1461 time_interpolator->skips = 0; 1462 time_interpolator->ns_skipped = 0; 1463 } 1464} 1465 1466static inline int 1467is_better_time_interpolator(struct time_interpolator *new) 1468{ 1469 if (!time_interpolator) 1470 return 1; 1471 return new->frequency > 2*time_interpolator->frequency || 1472 (unsigned long)new->drift < (unsigned long)time_interpolator->drift; 1473} 1474 1475void 1476register_time_interpolator(struct time_interpolator *ti) 1477{ 1478 unsigned long flags; 1479 1480 /* Sanity check */ 1481 BUG_ON(ti->frequency == 0 || ti->mask == 0); 1482 1483 ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; 1484 spin_lock(&time_interpolator_lock); 1485 write_seqlock_irqsave(&xtime_lock, flags); 1486 if (is_better_time_interpolator(ti)) { 1487 time_interpolator = ti; 1488 time_interpolator_reset(); 1489 } 1490 write_sequnlock_irqrestore(&xtime_lock, flags); 1491 1492 ti->next = time_interpolator_list; 1493 time_interpolator_list = ti; 1494 spin_unlock(&time_interpolator_lock); 1495} 1496 1497void 1498unregister_time_interpolator(struct time_interpolator *ti) 1499{ 1500 struct time_interpolator *curr, **prev; 1501 unsigned long flags; 1502 1503 spin_lock(&time_interpolator_lock); 1504 prev = &time_interpolator_list; 1505 for (curr = *prev; curr; curr = curr->next) { 1506 if (curr == ti) { 1507 *prev = curr->next; 1508 break; 1509 } 1510 prev = &curr->next; 1511 } 1512 1513 write_seqlock_irqsave(&xtime_lock, flags); 1514 if (ti == time_interpolator) { 1515 /* we lost the best time-interpolator: */ 1516 time_interpolator = NULL; 1517 /* find the next-best interpolator */ 1518 for (curr = time_interpolator_list; curr; curr = curr->next) 1519 if (is_better_time_interpolator(curr)) 1520 time_interpolator = curr; 1521 time_interpolator_reset(); 1522 } 1523 write_sequnlock_irqrestore(&xtime_lock, flags); 1524 spin_unlock(&time_interpolator_lock); 1525} 1526#endif /* CONFIG_TIME_INTERPOLATION */ 1527 1528/** 1529 * msleep - sleep safely even with waitqueue interruptions 1530 * @msecs: Time in milliseconds to sleep for 1531 */ 1532void msleep(unsigned int msecs) 1533{ 1534 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1535 1536 while (timeout) 1537 timeout = schedule_timeout_uninterruptible(timeout); 1538} 1539 1540EXPORT_SYMBOL(msleep); 1541 1542/** 1543 * msleep_interruptible - sleep waiting for signals 1544 * @msecs: Time in milliseconds to sleep for 1545 */ 1546unsigned long msleep_interruptible(unsigned int msecs) 1547{ 1548 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1549 1550 while (timeout && !signal_pending(current)) 1551 timeout = schedule_timeout_interruptible(timeout); 1552 return jiffies_to_msecs(timeout); 1553} 1554 1555EXPORT_SYMBOL(msleep_interruptible); 1556