1/* $OpenBSD: sched_bsd.c,v 1.93 2024/06/03 12:48:25 claudio Exp $ */ 2/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4/*- 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/clockintr.h> 43#include <sys/proc.h> 44#include <sys/kernel.h> 45#include <sys/malloc.h> 46#include <sys/resourcevar.h> 47#include <uvm/uvm_extern.h> 48#include <sys/sched.h> 49#include <sys/timeout.h> 50#include <sys/smr.h> 51#include <sys/tracepoint.h> 52 53#ifdef KTRACE 54#include <sys/ktrace.h> 55#endif 56 57uint64_t roundrobin_period; /* [I] roundrobin period (ns) */ 58int lbolt; /* once a second sleep address */ 59 60struct mutex sched_lock; 61 62void update_loadavg(void *); 63void schedcpu(void *); 64uint32_t decay_aftersleep(uint32_t, uint32_t); 65 66extern struct cpuset sched_idle_cpus; 67 68/* 69 * constants for averages over 1, 5, and 15 minutes when sampling at 70 * 5 second intervals. 71 */ 72static const fixpt_t cexp[3] = { 73 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 74 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 75 0.9944598480048967 * FSCALE, /* exp(-1/180) */ 76}; 77 78struct loadavg averunnable; 79 80/* 81 * Force switch among equal priority processes every 100ms. 82 */ 83void 84roundrobin(struct clockrequest *cr, void *cf, void *arg) 85{ 86 uint64_t count; 87 struct cpu_info *ci = curcpu(); 88 struct schedstate_percpu *spc = &ci->ci_schedstate; 89 90 count = clockrequest_advance(cr, roundrobin_period); 91 92 if (ci->ci_curproc != NULL) { 93 if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { 94 /* 95 * The process has already been through a roundrobin 96 * without switching and may be hogging the CPU. 97 * Indicate that the process should yield. 98 */ 99 atomic_setbits_int(&spc->spc_schedflags, 100 SPCF_SEENRR | SPCF_SHOULDYIELD); 101 } else { 102 atomic_setbits_int(&spc->spc_schedflags, 103 SPCF_SEENRR); 104 } 105 } 106 107 if (spc->spc_nrun || spc->spc_schedflags & SPCF_SHOULDYIELD) 108 need_resched(ci); 109} 110 111 112 113/* 114 * update_loadav: compute a tenex style load average of a quantity on 115 * 1, 5, and 15 minute intervals. 116 */ 117void 118update_loadavg(void *unused) 119{ 120 static struct timeout to = TIMEOUT_INITIALIZER(update_loadavg, NULL); 121 CPU_INFO_ITERATOR cii; 122 struct cpu_info *ci; 123 u_int i, nrun = 0; 124 125 CPU_INFO_FOREACH(cii, ci) { 126 if (!cpuset_isset(&sched_idle_cpus, ci)) 127 nrun++; 128 nrun += ci->ci_schedstate.spc_nrun; 129 } 130 131 for (i = 0; i < 3; i++) { 132 averunnable.ldavg[i] = (cexp[i] * averunnable.ldavg[i] + 133 nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; 134 } 135 136 timeout_add_sec(&to, 5); 137} 138 139/* 140 * Constants for digital decay and forget: 141 * 90% of (p_estcpu) usage in 5 * loadav time 142 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 143 * Note that, as ps(1) mentions, this can let percentages 144 * total over 100% (I've seen 137.9% for 3 processes). 145 * 146 * Note that hardclock updates p_estcpu and p_cpticks independently. 147 * 148 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 149 * That is, the system wants to compute a value of decay such 150 * that the following for loop: 151 * for (i = 0; i < (5 * loadavg); i++) 152 * p_estcpu *= decay; 153 * will compute 154 * p_estcpu *= 0.1; 155 * for all values of loadavg: 156 * 157 * Mathematically this loop can be expressed by saying: 158 * decay ** (5 * loadavg) ~= .1 159 * 160 * The system computes decay as: 161 * decay = (2 * loadavg) / (2 * loadavg + 1) 162 * 163 * We wish to prove that the system's computation of decay 164 * will always fulfill the equation: 165 * decay ** (5 * loadavg) ~= .1 166 * 167 * If we compute b as: 168 * b = 2 * loadavg 169 * then 170 * decay = b / (b + 1) 171 * 172 * We now need to prove two things: 173 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 174 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 175 * 176 * Facts: 177 * For x close to zero, exp(x) =~ 1 + x, since 178 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 179 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 180 * For x close to zero, ln(1+x) =~ x, since 181 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 182 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 183 * ln(.1) =~ -2.30 184 * 185 * Proof of (1): 186 * Solve (factor)**(power) =~ .1 given power (5*loadav): 187 * solving for factor, 188 * ln(factor) =~ (-2.30/5*loadav), or 189 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 190 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 191 * 192 * Proof of (2): 193 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 194 * solving for power, 195 * power*ln(b/(b+1)) =~ -2.30, or 196 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 197 * 198 * Actual power values for the implemented algorithm are as follows: 199 * loadav: 1 2 3 4 200 * power: 5.68 10.32 14.94 19.55 201 */ 202 203/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 204#define loadfactor(loadav) (2 * (loadav)) 205#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 206 207/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 208fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 209 210/* 211 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 212 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 213 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 214 * 215 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 216 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 217 * 218 * If you don't want to bother with the faster/more-accurate formula, you 219 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 220 * (more general) method of calculating the %age of CPU used by a process. 221 */ 222#define CCPU_SHIFT 11 223 224/* 225 * Recompute process priorities, every second. 226 */ 227void 228schedcpu(void *unused) 229{ 230 static struct timeout to = TIMEOUT_INITIALIZER(schedcpu, NULL); 231 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 232 struct proc *p; 233 unsigned int newcpu; 234 235 LIST_FOREACH(p, &allproc, p_list) { 236 /* 237 * Idle threads are never placed on the runqueue, 238 * therefore computing their priority is pointless. 239 */ 240 if (p->p_cpu != NULL && 241 p->p_cpu->ci_schedstate.spc_idleproc == p) 242 continue; 243 /* 244 * Increment sleep time (if sleeping). We ignore overflow. 245 */ 246 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 247 p->p_slptime++; 248 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 249 /* 250 * If the process has slept the entire second, 251 * stop recalculating its priority until it wakes up. 252 */ 253 if (p->p_slptime > 1) 254 continue; 255 SCHED_LOCK(); 256 /* 257 * p_pctcpu is only for diagnostic tools such as ps. 258 */ 259#if (FSHIFT >= CCPU_SHIFT) 260 p->p_pctcpu += (stathz == 100)? 261 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 262 100 * (((fixpt_t) p->p_cpticks) 263 << (FSHIFT - CCPU_SHIFT)) / stathz; 264#else 265 p->p_pctcpu += ((FSCALE - ccpu) * 266 (p->p_cpticks * FSCALE / stathz)) >> FSHIFT; 267#endif 268 p->p_cpticks = 0; 269 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); 270 setpriority(p, newcpu, p->p_p->ps_nice); 271 272 if (p->p_stat == SRUN && 273 (p->p_runpri / SCHED_PPQ) != (p->p_usrpri / SCHED_PPQ)) { 274 remrunqueue(p); 275 setrunqueue(p->p_cpu, p, p->p_usrpri); 276 } 277 SCHED_UNLOCK(); 278 } 279 wakeup(&lbolt); 280 timeout_add_sec(&to, 1); 281} 282 283/* 284 * Recalculate the priority of a process after it has slept for a while. 285 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 286 * least six times the loadfactor will decay p_estcpu to zero. 287 */ 288uint32_t 289decay_aftersleep(uint32_t estcpu, uint32_t slptime) 290{ 291 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 292 uint32_t newcpu; 293 294 if (slptime > 5 * loadfac) 295 newcpu = 0; 296 else { 297 newcpu = estcpu; 298 slptime--; /* the first time was done in schedcpu */ 299 while (newcpu && --slptime) 300 newcpu = decay_cpu(loadfac, newcpu); 301 302 } 303 304 return (newcpu); 305} 306 307/* 308 * General yield call. Puts the current process back on its run queue and 309 * performs a voluntary context switch. 310 */ 311void 312yield(void) 313{ 314 struct proc *p = curproc; 315 316 SCHED_LOCK(); 317 setrunqueue(p->p_cpu, p, p->p_usrpri); 318 p->p_ru.ru_nvcsw++; 319 mi_switch(); 320 SCHED_UNLOCK(); 321} 322 323/* 324 * General preemption call. Puts the current process back on its run queue 325 * and performs an involuntary context switch. If a process is supplied, 326 * we switch to that process. Otherwise, we use the normal process selection 327 * criteria. 328 */ 329void 330preempt(void) 331{ 332 struct proc *p = curproc; 333 334 SCHED_LOCK(); 335 setrunqueue(p->p_cpu, p, p->p_usrpri); 336 p->p_ru.ru_nivcsw++; 337 mi_switch(); 338 SCHED_UNLOCK(); 339} 340 341void 342mi_switch(void) 343{ 344 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 345 struct proc *p = curproc; 346 struct proc *nextproc; 347 struct process *pr = p->p_p; 348 struct timespec ts; 349 int oldipl; 350#ifdef MULTIPROCESSOR 351 int hold_count; 352#endif 353 354 KASSERT(p->p_stat != SONPROC); 355 356 SCHED_ASSERT_LOCKED(); 357 358#ifdef MULTIPROCESSOR 359 /* 360 * Release the kernel_lock, as we are about to yield the CPU. 361 */ 362 if (_kernel_lock_held()) 363 hold_count = __mp_release_all(&kernel_lock); 364 else 365 hold_count = 0; 366#endif 367 368 /* 369 * Compute the amount of time during which the current 370 * process was running, and add that to its total so far. 371 */ 372 nanouptime(&ts); 373 if (timespeccmp(&ts, &spc->spc_runtime, <)) { 374#if 0 375 printf("uptime is not monotonic! " 376 "ts=%lld.%09lu, runtime=%lld.%09lu\n", 377 (long long)tv.tv_sec, tv.tv_nsec, 378 (long long)spc->spc_runtime.tv_sec, 379 spc->spc_runtime.tv_nsec); 380#endif 381 timespecclear(&ts); 382 } else { 383 timespecsub(&ts, &spc->spc_runtime, &ts); 384 } 385 386 /* add the time counts for this thread to the process's total */ 387 tuagg_locked(pr, p, &ts); 388 389 /* Stop any optional clock interrupts. */ 390 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { 391 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); 392 clockintr_cancel(&spc->spc_itimer); 393 } 394 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { 395 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); 396 clockintr_cancel(&spc->spc_profclock); 397 } 398 399 /* 400 * Process is about to yield the CPU; clear the appropriate 401 * scheduling flags. 402 */ 403 atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR); 404 405 nextproc = sched_chooseproc(); 406 407 /* preserve old IPL level so we can switch back to that */ 408 oldipl = MUTEX_OLDIPL(&sched_lock); 409 410 if (p != nextproc) { 411 uvmexp.swtch++; 412 TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, 413 nextproc->p_p->ps_pid); 414 cpu_switchto(p, nextproc); 415 TRACEPOINT(sched, on__cpu, NULL); 416 } else { 417 TRACEPOINT(sched, remain__cpu, NULL); 418 p->p_stat = SONPROC; 419 } 420 421 clear_resched(curcpu()); 422 423 SCHED_ASSERT_LOCKED(); 424 425 /* Restore proc's IPL. */ 426 MUTEX_OLDIPL(&sched_lock) = oldipl; 427 SCHED_UNLOCK(); 428 429 SCHED_ASSERT_UNLOCKED(); 430 431 assertwaitok(); 432 smr_idle(); 433 434 /* 435 * We're running again; record our new start time. We might 436 * be running on a new CPU now, so refetch the schedstate_percpu 437 * pointer. 438 */ 439 KASSERT(p->p_cpu == curcpu()); 440 spc = &p->p_cpu->ci_schedstate; 441 442 /* Start any optional clock interrupts needed by the thread. */ 443 if (ISSET(p->p_p->ps_flags, PS_ITIMER)) { 444 atomic_setbits_int(&spc->spc_schedflags, SPCF_ITIMER); 445 clockintr_advance(&spc->spc_itimer, hardclock_period); 446 } 447 if (ISSET(p->p_p->ps_flags, PS_PROFIL)) { 448 atomic_setbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); 449 clockintr_advance(&spc->spc_profclock, profclock_period); 450 } 451 452 nanouptime(&spc->spc_runtime); 453 454#ifdef MULTIPROCESSOR 455 /* 456 * Reacquire the kernel_lock now. We do this after we've 457 * released the scheduler lock to avoid deadlock, and before 458 * we reacquire the interlock and the scheduler lock. 459 */ 460 if (hold_count) 461 __mp_acquire_count(&kernel_lock, hold_count); 462#endif 463 SCHED_LOCK(); 464} 465 466/* 467 * Change process state to be runnable, 468 * placing it on the run queue. 469 */ 470void 471setrunnable(struct proc *p) 472{ 473 struct process *pr = p->p_p; 474 u_char prio; 475 476 SCHED_ASSERT_LOCKED(); 477 478 switch (p->p_stat) { 479 case 0: 480 case SRUN: 481 case SONPROC: 482 case SDEAD: 483 case SIDL: 484 default: 485 panic("setrunnable"); 486 case SSTOP: 487 /* 488 * If we're being traced (possibly because someone attached us 489 * while we were stopped), check for a signal from the debugger. 490 */ 491 if ((pr->ps_flags & PS_TRACED) != 0 && pr->ps_xsig != 0) 492 atomic_setbits_int(&p->p_siglist, sigmask(pr->ps_xsig)); 493 prio = p->p_usrpri; 494 setrunqueue(NULL, p, prio); 495 break; 496 case SSLEEP: 497 prio = p->p_slppri; 498 499 /* if not yet asleep, don't add to runqueue */ 500 if (ISSET(p->p_flag, P_WSLEEP)) 501 return; 502 setrunqueue(NULL, p, prio); 503 TRACEPOINT(sched, wakeup, p->p_tid + THREAD_PID_OFFSET, 504 p->p_p->ps_pid, CPU_INFO_UNIT(p->p_cpu)); 505 break; 506 } 507 if (p->p_slptime > 1) { 508 uint32_t newcpu; 509 510 newcpu = decay_aftersleep(p->p_estcpu, p->p_slptime); 511 setpriority(p, newcpu, pr->ps_nice); 512 } 513 p->p_slptime = 0; 514} 515 516/* 517 * Compute the priority of a process. 518 */ 519void 520setpriority(struct proc *p, uint32_t newcpu, uint8_t nice) 521{ 522 unsigned int newprio; 523 524 newprio = min((PUSER + newcpu + NICE_WEIGHT * (nice - NZERO)), MAXPRI); 525 526 SCHED_ASSERT_LOCKED(); 527 p->p_estcpu = newcpu; 528 p->p_usrpri = newprio; 529} 530 531/* 532 * We adjust the priority of the current process. The priority of a process 533 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 534 * is increased here. The formula for computing priorities (in kern_synch.c) 535 * will compute a different value each time p_estcpu increases. This can 536 * cause a switch, but unless the priority crosses a PPQ boundary the actual 537 * queue will not change. The cpu usage estimator ramps up quite quickly 538 * when the process is running (linearly), and decays away exponentially, at 539 * a rate which is proportionally slower when the system is busy. The basic 540 * principle is that the system will 90% forget that the process used a lot 541 * of CPU time in 5 * loadav seconds. This causes the system to favor 542 * processes which haven't run much recently, and to round-robin among other 543 * processes. 544 */ 545void 546schedclock(struct proc *p) 547{ 548 struct cpu_info *ci = curcpu(); 549 struct schedstate_percpu *spc = &ci->ci_schedstate; 550 uint32_t newcpu; 551 552 if (p == spc->spc_idleproc || spc->spc_spinning) 553 return; 554 555 SCHED_LOCK(); 556 newcpu = ESTCPULIM(p->p_estcpu + 1); 557 setpriority(p, newcpu, p->p_p->ps_nice); 558 SCHED_UNLOCK(); 559} 560 561void (*cpu_setperf)(int); 562 563#define PERFPOL_MANUAL 0 564#define PERFPOL_AUTO 1 565#define PERFPOL_HIGH 2 566int perflevel = 100; 567int perfpolicy = PERFPOL_AUTO; 568 569#ifndef SMALL_KERNEL 570/* 571 * The code below handles CPU throttling. 572 */ 573#include <sys/sysctl.h> 574 575void setperf_auto(void *); 576struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL); 577extern int hw_power; 578 579void 580setperf_auto(void *v) 581{ 582 static uint64_t *idleticks, *totalticks; 583 static int downbeats; 584 int i, j = 0; 585 int speedup = 0; 586 CPU_INFO_ITERATOR cii; 587 struct cpu_info *ci; 588 uint64_t idle, total, allidle = 0, alltotal = 0; 589 590 if (perfpolicy != PERFPOL_AUTO) 591 return; 592 593 if (cpu_setperf == NULL) 594 return; 595 596 if (hw_power) { 597 speedup = 1; 598 goto faster; 599 } 600 601 if (!idleticks) 602 if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks), 603 M_DEVBUF, M_NOWAIT | M_ZERO))) 604 return; 605 if (!totalticks) 606 if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks), 607 M_DEVBUF, M_NOWAIT | M_ZERO))) { 608 free(idleticks, M_DEVBUF, 609 sizeof(*idleticks) * ncpusfound); 610 return; 611 } 612 CPU_INFO_FOREACH(cii, ci) { 613 if (!cpu_is_online(ci)) 614 continue; 615 total = 0; 616 for (i = 0; i < CPUSTATES; i++) { 617 total += ci->ci_schedstate.spc_cp_time[i]; 618 } 619 total -= totalticks[j]; 620 idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j]; 621 if (idle < total / 3) 622 speedup = 1; 623 alltotal += total; 624 allidle += idle; 625 idleticks[j] += idle; 626 totalticks[j] += total; 627 j++; 628 } 629 if (allidle < alltotal / 2) 630 speedup = 1; 631 if (speedup && downbeats < 5) 632 downbeats++; 633 634 if (speedup && perflevel != 100) { 635faster: 636 perflevel = 100; 637 cpu_setperf(perflevel); 638 } else if (!speedup && perflevel != 0 && --downbeats <= 0) { 639 perflevel = 0; 640 cpu_setperf(perflevel); 641 } 642 643 timeout_add_msec(&setperf_to, 100); 644} 645 646int 647sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 648{ 649 int err; 650 651 if (!cpu_setperf) 652 return EOPNOTSUPP; 653 654 if (perfpolicy != PERFPOL_MANUAL) 655 return sysctl_rdint(oldp, oldlenp, newp, perflevel); 656 657 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, 658 &perflevel, 0, 100); 659 if (err) 660 return err; 661 662 if (newp != NULL) 663 cpu_setperf(perflevel); 664 665 return 0; 666} 667 668int 669sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 670{ 671 char policy[32]; 672 int err; 673 674 if (!cpu_setperf) 675 return EOPNOTSUPP; 676 677 switch (perfpolicy) { 678 case PERFPOL_MANUAL: 679 strlcpy(policy, "manual", sizeof(policy)); 680 break; 681 case PERFPOL_AUTO: 682 strlcpy(policy, "auto", sizeof(policy)); 683 break; 684 case PERFPOL_HIGH: 685 strlcpy(policy, "high", sizeof(policy)); 686 break; 687 default: 688 strlcpy(policy, "unknown", sizeof(policy)); 689 break; 690 } 691 692 if (newp == NULL) 693 return sysctl_rdstring(oldp, oldlenp, newp, policy); 694 695 err = sysctl_string(oldp, oldlenp, newp, newlen, policy, sizeof(policy)); 696 if (err) 697 return err; 698 if (strcmp(policy, "manual") == 0) 699 perfpolicy = PERFPOL_MANUAL; 700 else if (strcmp(policy, "auto") == 0) 701 perfpolicy = PERFPOL_AUTO; 702 else if (strcmp(policy, "high") == 0) 703 perfpolicy = PERFPOL_HIGH; 704 else 705 return EINVAL; 706 707 if (perfpolicy == PERFPOL_AUTO) { 708 timeout_add_msec(&setperf_to, 200); 709 } else if (perfpolicy == PERFPOL_HIGH) { 710 perflevel = 100; 711 cpu_setperf(perflevel); 712 } 713 return 0; 714} 715#endif 716 717/* 718 * Start the scheduler's periodic timeouts. 719 */ 720void 721scheduler_start(void) 722{ 723 schedcpu(NULL); 724 update_loadavg(NULL); 725 726#ifndef SMALL_KERNEL 727 if (perfpolicy == PERFPOL_AUTO) 728 timeout_add_msec(&setperf_to, 200); 729#endif 730} 731 732