kern_resource.c revision 204670
1/*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/kern_resource.c 204670 2010-03-03 21:46:51Z rrs $"); 39 40#include "opt_compat.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/sysproto.h> 45#include <sys/file.h> 46#include <sys/kernel.h> 47#include <sys/lock.h> 48#include <sys/malloc.h> 49#include <sys/mutex.h> 50#include <sys/priv.h> 51#include <sys/proc.h> 52#include <sys/refcount.h> 53#include <sys/resourcevar.h> 54#include <sys/rwlock.h> 55#include <sys/sched.h> 56#include <sys/sx.h> 57#include <sys/syscallsubr.h> 58#include <sys/sysent.h> 59#include <sys/time.h> 60#include <sys/umtx.h> 61 62#include <vm/vm.h> 63#include <vm/vm_param.h> 64#include <vm/pmap.h> 65#include <vm/vm_map.h> 66 67 68static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 69static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 70#define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 71static struct rwlock uihashtbl_lock; 72static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 73static u_long uihash; /* size of hash table - 1 */ 74 75static void calcru1(struct proc *p, struct rusage_ext *ruxp, 76 struct timeval *up, struct timeval *sp); 77static int donice(struct thread *td, struct proc *chgp, int n); 78static struct uidinfo *uilookup(uid_t uid); 79 80/* 81 * Resource controls and accounting. 82 */ 83#ifndef _SYS_SYSPROTO_H_ 84struct getpriority_args { 85 int which; 86 int who; 87}; 88#endif 89int 90getpriority(td, uap) 91 struct thread *td; 92 register struct getpriority_args *uap; 93{ 94 struct proc *p; 95 struct pgrp *pg; 96 int error, low; 97 98 error = 0; 99 low = PRIO_MAX + 1; 100 switch (uap->which) { 101 102 case PRIO_PROCESS: 103 if (uap->who == 0) 104 low = td->td_proc->p_nice; 105 else { 106 p = pfind(uap->who); 107 if (p == NULL) 108 break; 109 if (p_cansee(td, p) == 0) 110 low = p->p_nice; 111 PROC_UNLOCK(p); 112 } 113 break; 114 115 case PRIO_PGRP: 116 sx_slock(&proctree_lock); 117 if (uap->who == 0) { 118 pg = td->td_proc->p_pgrp; 119 PGRP_LOCK(pg); 120 } else { 121 pg = pgfind(uap->who); 122 if (pg == NULL) { 123 sx_sunlock(&proctree_lock); 124 break; 125 } 126 } 127 sx_sunlock(&proctree_lock); 128 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 129 PROC_LOCK(p); 130 if (p_cansee(td, p) == 0) { 131 if (p->p_nice < low) 132 low = p->p_nice; 133 } 134 PROC_UNLOCK(p); 135 } 136 PGRP_UNLOCK(pg); 137 break; 138 139 case PRIO_USER: 140 if (uap->who == 0) 141 uap->who = td->td_ucred->cr_uid; 142 sx_slock(&allproc_lock); 143 FOREACH_PROC_IN_SYSTEM(p) { 144 /* Do not bother to check PRS_NEW processes */ 145 if (p->p_state == PRS_NEW) 146 continue; 147 PROC_LOCK(p); 148 if (p_cansee(td, p) == 0 && 149 p->p_ucred->cr_uid == uap->who) { 150 if (p->p_nice < low) 151 low = p->p_nice; 152 } 153 PROC_UNLOCK(p); 154 } 155 sx_sunlock(&allproc_lock); 156 break; 157 158 default: 159 error = EINVAL; 160 break; 161 } 162 if (low == PRIO_MAX + 1 && error == 0) 163 error = ESRCH; 164 td->td_retval[0] = low; 165 return (error); 166} 167 168#ifndef _SYS_SYSPROTO_H_ 169struct setpriority_args { 170 int which; 171 int who; 172 int prio; 173}; 174#endif 175int 176setpriority(td, uap) 177 struct thread *td; 178 struct setpriority_args *uap; 179{ 180 struct proc *curp, *p; 181 struct pgrp *pg; 182 int found = 0, error = 0; 183 184 curp = td->td_proc; 185 switch (uap->which) { 186 case PRIO_PROCESS: 187 if (uap->who == 0) { 188 PROC_LOCK(curp); 189 error = donice(td, curp, uap->prio); 190 PROC_UNLOCK(curp); 191 } else { 192 p = pfind(uap->who); 193 if (p == NULL) 194 break; 195 error = p_cansee(td, p); 196 if (error == 0) 197 error = donice(td, p, uap->prio); 198 PROC_UNLOCK(p); 199 } 200 found++; 201 break; 202 203 case PRIO_PGRP: 204 sx_slock(&proctree_lock); 205 if (uap->who == 0) { 206 pg = curp->p_pgrp; 207 PGRP_LOCK(pg); 208 } else { 209 pg = pgfind(uap->who); 210 if (pg == NULL) { 211 sx_sunlock(&proctree_lock); 212 break; 213 } 214 } 215 sx_sunlock(&proctree_lock); 216 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 217 PROC_LOCK(p); 218 if (p_cansee(td, p) == 0) { 219 error = donice(td, p, uap->prio); 220 found++; 221 } 222 PROC_UNLOCK(p); 223 } 224 PGRP_UNLOCK(pg); 225 break; 226 227 case PRIO_USER: 228 if (uap->who == 0) 229 uap->who = td->td_ucred->cr_uid; 230 sx_slock(&allproc_lock); 231 FOREACH_PROC_IN_SYSTEM(p) { 232 PROC_LOCK(p); 233 if (p->p_ucred->cr_uid == uap->who && 234 p_cansee(td, p) == 0) { 235 error = donice(td, p, uap->prio); 236 found++; 237 } 238 PROC_UNLOCK(p); 239 } 240 sx_sunlock(&allproc_lock); 241 break; 242 243 default: 244 error = EINVAL; 245 break; 246 } 247 if (found == 0 && error == 0) 248 error = ESRCH; 249 return (error); 250} 251 252/* 253 * Set "nice" for a (whole) process. 254 */ 255static int 256donice(struct thread *td, struct proc *p, int n) 257{ 258 int error; 259 260 PROC_LOCK_ASSERT(p, MA_OWNED); 261 if ((error = p_cansched(td, p))) 262 return (error); 263 if (n > PRIO_MAX) 264 n = PRIO_MAX; 265 if (n < PRIO_MIN) 266 n = PRIO_MIN; 267 if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) 268 return (EACCES); 269 sched_nice(p, n); 270 return (0); 271} 272 273/* 274 * Set realtime priority for LWP. 275 */ 276#ifndef _SYS_SYSPROTO_H_ 277struct rtprio_thread_args { 278 int function; 279 lwpid_t lwpid; 280 struct rtprio *rtp; 281}; 282#endif 283int 284rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) 285{ 286 struct proc *p; 287 struct rtprio rtp; 288 struct thread *td1; 289 int cierror, error; 290 291 /* Perform copyin before acquiring locks if needed. */ 292 if (uap->function == RTP_SET) 293 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 294 else 295 cierror = 0; 296 297 /* 298 * Though lwpid is unique, only current process is supported 299 * since there is no efficient way to look up a LWP yet. 300 */ 301 p = td->td_proc; 302 PROC_LOCK(p); 303 304 switch (uap->function) { 305 case RTP_LOOKUP: 306 if ((error = p_cansee(td, p))) 307 break; 308 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 309 td1 = td; 310 else 311 td1 = thread_find(p, uap->lwpid); 312 if (td1 != NULL) 313 pri_to_rtp(td1, &rtp); 314 else 315 error = ESRCH; 316 PROC_UNLOCK(p); 317 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 318 case RTP_SET: 319 if ((error = p_cansched(td, p)) || (error = cierror)) 320 break; 321 322 /* Disallow setting rtprio in most cases if not superuser. */ 323/* 324 * Realtime priority has to be restricted for reasons which should be 325 * obvious. However, for idle priority, there is a potential for 326 * system deadlock if an idleprio process gains a lock on a resource 327 * that other processes need (and the idleprio process can't run 328 * due to a CPU-bound normal process). Fix me! XXX 329 */ 330#if 0 331 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 332#else 333 if (rtp.type != RTP_PRIO_NORMAL) { 334#endif 335 error = priv_check(td, PRIV_SCHED_RTPRIO); 336 if (error) 337 break; 338 } 339 340 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 341 td1 = td; 342 else 343 td1 = thread_find(p, uap->lwpid); 344 if (td1 != NULL) 345 error = rtp_to_pri(&rtp, td1); 346 else 347 error = ESRCH; 348 break; 349 default: 350 error = EINVAL; 351 break; 352 } 353 PROC_UNLOCK(p); 354 return (error); 355} 356 357/* 358 * Set realtime priority. 359 */ 360#ifndef _SYS_SYSPROTO_H_ 361struct rtprio_args { 362 int function; 363 pid_t pid; 364 struct rtprio *rtp; 365}; 366#endif 367int 368rtprio(td, uap) 369 struct thread *td; /* curthread */ 370 register struct rtprio_args *uap; 371{ 372 struct proc *p; 373 struct thread *tdp; 374 struct rtprio rtp; 375 int cierror, error; 376 377 /* Perform copyin before acquiring locks if needed. */ 378 if (uap->function == RTP_SET) 379 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 380 else 381 cierror = 0; 382 383 if (uap->pid == 0) { 384 p = td->td_proc; 385 PROC_LOCK(p); 386 } else { 387 p = pfind(uap->pid); 388 if (p == NULL) 389 return (ESRCH); 390 } 391 392 switch (uap->function) { 393 case RTP_LOOKUP: 394 if ((error = p_cansee(td, p))) 395 break; 396 /* 397 * Return OUR priority if no pid specified, 398 * or if one is, report the highest priority 399 * in the process. There isn't much more you can do as 400 * there is only room to return a single priority. 401 * Note: specifying our own pid is not the same 402 * as leaving it zero. 403 */ 404 if (uap->pid == 0) { 405 pri_to_rtp(td, &rtp); 406 } else { 407 struct rtprio rtp2; 408 409 rtp.type = RTP_PRIO_IDLE; 410 rtp.prio = RTP_PRIO_MAX; 411 FOREACH_THREAD_IN_PROC(p, tdp) { 412 pri_to_rtp(tdp, &rtp2); 413 if (rtp2.type < rtp.type || 414 (rtp2.type == rtp.type && 415 rtp2.prio < rtp.prio)) { 416 rtp.type = rtp2.type; 417 rtp.prio = rtp2.prio; 418 } 419 } 420 } 421 PROC_UNLOCK(p); 422 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 423 case RTP_SET: 424 if ((error = p_cansched(td, p)) || (error = cierror)) 425 break; 426 427 /* Disallow setting rtprio in most cases if not superuser. */ 428/* 429 * Realtime priority has to be restricted for reasons which should be 430 * obvious. However, for idle priority, there is a potential for 431 * system deadlock if an idleprio process gains a lock on a resource 432 * that other processes need (and the idleprio process can't run 433 * due to a CPU-bound normal process). Fix me! XXX 434 */ 435#if 0 436 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 437#else 438 if (rtp.type != RTP_PRIO_NORMAL) { 439#endif 440 error = priv_check(td, PRIV_SCHED_RTPRIO); 441 if (error) 442 break; 443 } 444 445 /* 446 * If we are setting our own priority, set just our 447 * thread but if we are doing another process, 448 * do all the threads on that process. If we 449 * specify our own pid we do the latter. 450 */ 451 if (uap->pid == 0) { 452 error = rtp_to_pri(&rtp, td); 453 } else { 454 FOREACH_THREAD_IN_PROC(p, td) { 455 if ((error = rtp_to_pri(&rtp, td)) != 0) 456 break; 457 } 458 } 459 break; 460 default: 461 error = EINVAL; 462 break; 463 } 464 PROC_UNLOCK(p); 465 return (error); 466} 467 468int 469rtp_to_pri(struct rtprio *rtp, struct thread *td) 470{ 471 u_char newpri; 472 u_char oldpri; 473 474 thread_lock(td); 475 switch (RTP_PRIO_BASE(rtp->type)) { 476 case RTP_PRIO_REALTIME: 477 if (rtp->prio > RTP_PRIO_MAX) { 478 thread_unlock(td); 479 return (EINVAL); 480 } 481 newpri = PRI_MIN_REALTIME + rtp->prio; 482 break; 483 case RTP_PRIO_NORMAL: 484 if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) { 485 thread_unlock(td); 486 return (EINVAL); 487 } 488 newpri = PRI_MIN_TIMESHARE + rtp->prio; 489 break; 490 case RTP_PRIO_IDLE: 491 newpri = PRI_MIN_IDLE + rtp->prio; 492 break; 493 default: 494 thread_unlock(td); 495 return (EINVAL); 496 } 497 sched_class(td, rtp->type); /* XXX fix */ 498 oldpri = td->td_user_pri; 499 sched_user_prio(td, newpri); 500 if (curthread == td) 501 sched_prio(curthread, td->td_user_pri); /* XXX dubious */ 502 if (TD_ON_UPILOCK(td) && oldpri != newpri) { 503 thread_unlock(td); 504 umtx_pi_adjust(td, oldpri); 505 } else 506 thread_unlock(td); 507 return (0); 508} 509 510void 511pri_to_rtp(struct thread *td, struct rtprio *rtp) 512{ 513 514 thread_lock(td); 515 switch (PRI_BASE(td->td_pri_class)) { 516 case PRI_REALTIME: 517 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; 518 break; 519 case PRI_TIMESHARE: 520 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; 521 break; 522 case PRI_IDLE: 523 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; 524 break; 525 default: 526 break; 527 } 528 rtp->type = td->td_pri_class; 529 thread_unlock(td); 530} 531 532#if defined(COMPAT_43) 533#ifndef _SYS_SYSPROTO_H_ 534struct osetrlimit_args { 535 u_int which; 536 struct orlimit *rlp; 537}; 538#endif 539int 540osetrlimit(td, uap) 541 struct thread *td; 542 register struct osetrlimit_args *uap; 543{ 544 struct orlimit olim; 545 struct rlimit lim; 546 int error; 547 548 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) 549 return (error); 550 lim.rlim_cur = olim.rlim_cur; 551 lim.rlim_max = olim.rlim_max; 552 error = kern_setrlimit(td, uap->which, &lim); 553 return (error); 554} 555 556#ifndef _SYS_SYSPROTO_H_ 557struct ogetrlimit_args { 558 u_int which; 559 struct orlimit *rlp; 560}; 561#endif 562int 563ogetrlimit(td, uap) 564 struct thread *td; 565 register struct ogetrlimit_args *uap; 566{ 567 struct orlimit olim; 568 struct rlimit rl; 569 struct proc *p; 570 int error; 571 572 if (uap->which >= RLIM_NLIMITS) 573 return (EINVAL); 574 p = td->td_proc; 575 PROC_LOCK(p); 576 lim_rlimit(p, uap->which, &rl); 577 PROC_UNLOCK(p); 578 579 /* 580 * XXX would be more correct to convert only RLIM_INFINITY to the 581 * old RLIM_INFINITY and fail with EOVERFLOW for other larger 582 * values. Most 64->32 and 32->16 conversions, including not 583 * unimportant ones of uids are even more broken than what we 584 * do here (they blindly truncate). We don't do this correctly 585 * here since we have little experience with EOVERFLOW yet. 586 * Elsewhere, getuid() can't fail... 587 */ 588 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; 589 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; 590 error = copyout(&olim, uap->rlp, sizeof(olim)); 591 return (error); 592} 593#endif /* COMPAT_43 */ 594 595#ifndef _SYS_SYSPROTO_H_ 596struct __setrlimit_args { 597 u_int which; 598 struct rlimit *rlp; 599}; 600#endif 601int 602setrlimit(td, uap) 603 struct thread *td; 604 register struct __setrlimit_args *uap; 605{ 606 struct rlimit alim; 607 int error; 608 609 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) 610 return (error); 611 error = kern_setrlimit(td, uap->which, &alim); 612 return (error); 613} 614 615static void 616lim_cb(void *arg) 617{ 618 struct rlimit rlim; 619 struct thread *td; 620 struct proc *p; 621 622 p = arg; 623 PROC_LOCK_ASSERT(p, MA_OWNED); 624 /* 625 * Check if the process exceeds its cpu resource allocation. If 626 * it reaches the max, arrange to kill the process in ast(). 627 */ 628 if (p->p_cpulimit == RLIM_INFINITY) 629 return; 630 PROC_SLOCK(p); 631 FOREACH_THREAD_IN_PROC(p, td) { 632 thread_lock(td); 633 ruxagg(&p->p_rux, td); 634 thread_unlock(td); 635 } 636 PROC_SUNLOCK(p); 637 if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { 638 lim_rlimit(p, RLIMIT_CPU, &rlim); 639 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { 640 killproc(p, "exceeded maximum CPU limit"); 641 } else { 642 if (p->p_cpulimit < rlim.rlim_max) 643 p->p_cpulimit += 5; 644 psignal(p, SIGXCPU); 645 } 646 } 647 if ((p->p_flag & P_WEXIT) == 0) 648 callout_reset(&p->p_limco, hz, lim_cb, p); 649} 650 651int 652kern_setrlimit(td, which, limp) 653 struct thread *td; 654 u_int which; 655 struct rlimit *limp; 656{ 657 struct plimit *newlim, *oldlim; 658 struct proc *p; 659 register struct rlimit *alimp; 660 struct rlimit oldssiz; 661 int error; 662 663 if (which >= RLIM_NLIMITS) 664 return (EINVAL); 665 666 /* 667 * Preserve historical bugs by treating negative limits as unsigned. 668 */ 669 if (limp->rlim_cur < 0) 670 limp->rlim_cur = RLIM_INFINITY; 671 if (limp->rlim_max < 0) 672 limp->rlim_max = RLIM_INFINITY; 673 674 oldssiz.rlim_cur = 0; 675 p = td->td_proc; 676 newlim = lim_alloc(); 677 PROC_LOCK(p); 678 oldlim = p->p_limit; 679 alimp = &oldlim->pl_rlimit[which]; 680 if (limp->rlim_cur > alimp->rlim_max || 681 limp->rlim_max > alimp->rlim_max) 682 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) { 683 PROC_UNLOCK(p); 684 lim_free(newlim); 685 return (error); 686 } 687 if (limp->rlim_cur > limp->rlim_max) 688 limp->rlim_cur = limp->rlim_max; 689 lim_copy(newlim, oldlim); 690 alimp = &newlim->pl_rlimit[which]; 691 692 switch (which) { 693 694 case RLIMIT_CPU: 695 if (limp->rlim_cur != RLIM_INFINITY && 696 p->p_cpulimit == RLIM_INFINITY) 697 callout_reset(&p->p_limco, hz, lim_cb, p); 698 p->p_cpulimit = limp->rlim_cur; 699 break; 700 case RLIMIT_DATA: 701 if (limp->rlim_cur > maxdsiz) 702 limp->rlim_cur = maxdsiz; 703 if (limp->rlim_max > maxdsiz) 704 limp->rlim_max = maxdsiz; 705 break; 706 707 case RLIMIT_STACK: 708 if (limp->rlim_cur > maxssiz) 709 limp->rlim_cur = maxssiz; 710 if (limp->rlim_max > maxssiz) 711 limp->rlim_max = maxssiz; 712 oldssiz = *alimp; 713 if (td->td_proc->p_sysent->sv_fixlimit != NULL) 714 td->td_proc->p_sysent->sv_fixlimit(&oldssiz, 715 RLIMIT_STACK); 716 break; 717 718 case RLIMIT_NOFILE: 719 if (limp->rlim_cur > maxfilesperproc) 720 limp->rlim_cur = maxfilesperproc; 721 if (limp->rlim_max > maxfilesperproc) 722 limp->rlim_max = maxfilesperproc; 723 break; 724 725 case RLIMIT_NPROC: 726 if (limp->rlim_cur > maxprocperuid) 727 limp->rlim_cur = maxprocperuid; 728 if (limp->rlim_max > maxprocperuid) 729 limp->rlim_max = maxprocperuid; 730 if (limp->rlim_cur < 1) 731 limp->rlim_cur = 1; 732 if (limp->rlim_max < 1) 733 limp->rlim_max = 1; 734 break; 735 } 736 if (td->td_proc->p_sysent->sv_fixlimit != NULL) 737 td->td_proc->p_sysent->sv_fixlimit(limp, which); 738 *alimp = *limp; 739 p->p_limit = newlim; 740 PROC_UNLOCK(p); 741 lim_free(oldlim); 742 743 if (which == RLIMIT_STACK) { 744 /* 745 * Stack is allocated to the max at exec time with only 746 * "rlim_cur" bytes accessible. If stack limit is going 747 * up make more accessible, if going down make inaccessible. 748 */ 749 if (limp->rlim_cur != oldssiz.rlim_cur) { 750 vm_offset_t addr; 751 vm_size_t size; 752 vm_prot_t prot; 753 754 if (limp->rlim_cur > oldssiz.rlim_cur) { 755 prot = p->p_sysent->sv_stackprot; 756 size = limp->rlim_cur - oldssiz.rlim_cur; 757 addr = p->p_sysent->sv_usrstack - 758 limp->rlim_cur; 759 } else { 760 prot = VM_PROT_NONE; 761 size = oldssiz.rlim_cur - limp->rlim_cur; 762 addr = p->p_sysent->sv_usrstack - 763 oldssiz.rlim_cur; 764 } 765 addr = trunc_page(addr); 766 size = round_page(size); 767 (void)vm_map_protect(&p->p_vmspace->vm_map, 768 addr, addr + size, prot, FALSE); 769 } 770 } 771 772 return (0); 773} 774 775#ifndef _SYS_SYSPROTO_H_ 776struct __getrlimit_args { 777 u_int which; 778 struct rlimit *rlp; 779}; 780#endif 781/* ARGSUSED */ 782int 783getrlimit(td, uap) 784 struct thread *td; 785 register struct __getrlimit_args *uap; 786{ 787 struct rlimit rlim; 788 struct proc *p; 789 int error; 790 791 if (uap->which >= RLIM_NLIMITS) 792 return (EINVAL); 793 p = td->td_proc; 794 PROC_LOCK(p); 795 lim_rlimit(p, uap->which, &rlim); 796 PROC_UNLOCK(p); 797 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); 798 return (error); 799} 800 801/* 802 * Transform the running time and tick information for children of proc p 803 * into user and system time usage. 804 */ 805void 806calccru(p, up, sp) 807 struct proc *p; 808 struct timeval *up; 809 struct timeval *sp; 810{ 811 812 PROC_LOCK_ASSERT(p, MA_OWNED); 813 calcru1(p, &p->p_crux, up, sp); 814} 815 816/* 817 * Transform the running time and tick information in proc p into user 818 * and system time usage. If appropriate, include the current time slice 819 * on this CPU. 820 */ 821void 822calcru(struct proc *p, struct timeval *up, struct timeval *sp) 823{ 824 struct thread *td; 825 uint64_t u; 826 827 PROC_LOCK_ASSERT(p, MA_OWNED); 828 PROC_SLOCK_ASSERT(p, MA_OWNED); 829 /* 830 * If we are getting stats for the current process, then add in the 831 * stats that this thread has accumulated in its current time slice. 832 * We reset the thread and CPU state as if we had performed a context 833 * switch right here. 834 */ 835 td = curthread; 836 if (td->td_proc == p) { 837 u = cpu_ticks(); 838 p->p_rux.rux_runtime += u - PCPU_GET(switchtime); 839 PCPU_SET(switchtime, u); 840 } 841 /* Make sure the per-thread stats are current. */ 842 FOREACH_THREAD_IN_PROC(p, td) { 843 if (td->td_incruntime == 0) 844 continue; 845 thread_lock(td); 846 ruxagg(&p->p_rux, td); 847 thread_unlock(td); 848 } 849 calcru1(p, &p->p_rux, up, sp); 850} 851 852static void 853calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, 854 struct timeval *sp) 855{ 856 /* {user, system, interrupt, total} {ticks, usec}: */ 857 u_int64_t ut, uu, st, su, it, tt, tu; 858 859 ut = ruxp->rux_uticks; 860 st = ruxp->rux_sticks; 861 it = ruxp->rux_iticks; 862 tt = ut + st + it; 863 if (tt == 0) { 864 /* Avoid divide by zero */ 865 st = 1; 866 tt = 1; 867 } 868 tu = cputick2usec(ruxp->rux_runtime); 869 if ((int64_t)tu < 0) { 870 /* XXX: this should be an assert /phk */ 871 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", 872 (intmax_t)tu, p->p_pid, p->p_comm); 873 tu = ruxp->rux_tu; 874 } 875 876 if (tu >= ruxp->rux_tu) { 877 /* 878 * The normal case, time increased. 879 * Enforce monotonicity of bucketed numbers. 880 */ 881 uu = (tu * ut) / tt; 882 if (uu < ruxp->rux_uu) 883 uu = ruxp->rux_uu; 884 su = (tu * st) / tt; 885 if (su < ruxp->rux_su) 886 su = ruxp->rux_su; 887 } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { 888 /* 889 * When we calibrate the cputicker, it is not uncommon to 890 * see the presumably fixed frequency increase slightly over 891 * time as a result of thermal stabilization and NTP 892 * discipline (of the reference clock). We therefore ignore 893 * a bit of backwards slop because we expect to catch up 894 * shortly. We use a 3 microsecond limit to catch low 895 * counts and a 1% limit for high counts. 896 */ 897 uu = ruxp->rux_uu; 898 su = ruxp->rux_su; 899 tu = ruxp->rux_tu; 900 } else { /* tu < ruxp->rux_tu */ 901 /* 902 * What happened here was likely that a laptop, which ran at 903 * a reduced clock frequency at boot, kicked into high gear. 904 * The wisdom of spamming this message in that case is 905 * dubious, but it might also be indicative of something 906 * serious, so lets keep it and hope laptops can be made 907 * more truthful about their CPU speed via ACPI. 908 */ 909 printf("calcru: runtime went backwards from %ju usec " 910 "to %ju usec for pid %d (%s)\n", 911 (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, 912 p->p_pid, p->p_comm); 913 uu = (tu * ut) / tt; 914 su = (tu * st) / tt; 915 } 916 917 ruxp->rux_uu = uu; 918 ruxp->rux_su = su; 919 ruxp->rux_tu = tu; 920 921 up->tv_sec = uu / 1000000; 922 up->tv_usec = uu % 1000000; 923 sp->tv_sec = su / 1000000; 924 sp->tv_usec = su % 1000000; 925} 926 927#ifndef _SYS_SYSPROTO_H_ 928struct getrusage_args { 929 int who; 930 struct rusage *rusage; 931}; 932#endif 933int 934getrusage(td, uap) 935 register struct thread *td; 936 register struct getrusage_args *uap; 937{ 938 struct rusage ru; 939 int error; 940 941 error = kern_getrusage(td, uap->who, &ru); 942 if (error == 0) 943 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 944 return (error); 945} 946 947int 948kern_getrusage(td, who, rup) 949 struct thread *td; 950 int who; 951 struct rusage *rup; 952{ 953 struct proc *p; 954 int error; 955 956 error = 0; 957 p = td->td_proc; 958 PROC_LOCK(p); 959 switch (who) { 960 case RUSAGE_SELF: 961 rufetchcalc(p, rup, &rup->ru_utime, 962 &rup->ru_stime); 963 break; 964 965 case RUSAGE_CHILDREN: 966 *rup = p->p_stats->p_cru; 967 calccru(p, &rup->ru_utime, &rup->ru_stime); 968 break; 969 970 default: 971 error = EINVAL; 972 } 973 PROC_UNLOCK(p); 974 return (error); 975} 976 977void 978rucollect(struct rusage *ru, struct rusage *ru2) 979{ 980 long *ip, *ip2; 981 int i; 982 983 if (ru->ru_maxrss < ru2->ru_maxrss) 984 ru->ru_maxrss = ru2->ru_maxrss; 985 ip = &ru->ru_first; 986 ip2 = &ru2->ru_first; 987 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 988 *ip++ += *ip2++; 989} 990 991void 992ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, 993 struct rusage_ext *rux2) 994{ 995 996 rux->rux_runtime += rux2->rux_runtime; 997 rux->rux_uticks += rux2->rux_uticks; 998 rux->rux_sticks += rux2->rux_sticks; 999 rux->rux_iticks += rux2->rux_iticks; 1000 rux->rux_uu += rux2->rux_uu; 1001 rux->rux_su += rux2->rux_su; 1002 rux->rux_tu += rux2->rux_tu; 1003 rucollect(ru, ru2); 1004} 1005 1006/* 1007 * Aggregate tick counts into the proc's rusage_ext. 1008 */ 1009void 1010ruxagg(struct rusage_ext *rux, struct thread *td) 1011{ 1012 1013 THREAD_LOCK_ASSERT(td, MA_OWNED); 1014 PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); 1015 rux->rux_runtime += td->td_incruntime; 1016 rux->rux_uticks += td->td_uticks; 1017 rux->rux_sticks += td->td_sticks; 1018 rux->rux_iticks += td->td_iticks; 1019 td->td_incruntime = 0; 1020 td->td_uticks = 0; 1021 td->td_iticks = 0; 1022 td->td_sticks = 0; 1023} 1024 1025/* 1026 * Update the rusage_ext structure and fetch a valid aggregate rusage 1027 * for proc p if storage for one is supplied. 1028 */ 1029void 1030rufetch(struct proc *p, struct rusage *ru) 1031{ 1032 struct thread *td; 1033 1034 PROC_SLOCK_ASSERT(p, MA_OWNED); 1035 1036 *ru = p->p_ru; 1037 if (p->p_numthreads > 0) { 1038 FOREACH_THREAD_IN_PROC(p, td) { 1039 thread_lock(td); 1040 ruxagg(&p->p_rux, td); 1041 thread_unlock(td); 1042 rucollect(ru, &td->td_ru); 1043 } 1044 } 1045} 1046 1047/* 1048 * Atomically perform a rufetch and a calcru together. 1049 * Consumers, can safely assume the calcru is executed only once 1050 * rufetch is completed. 1051 */ 1052void 1053rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, 1054 struct timeval *sp) 1055{ 1056 1057 PROC_SLOCK(p); 1058 rufetch(p, ru); 1059 calcru(p, up, sp); 1060 PROC_SUNLOCK(p); 1061} 1062 1063/* 1064 * Allocate a new resource limits structure and initialize its 1065 * reference count and mutex pointer. 1066 */ 1067struct plimit * 1068lim_alloc() 1069{ 1070 struct plimit *limp; 1071 1072 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 1073 refcount_init(&limp->pl_refcnt, 1); 1074 return (limp); 1075} 1076 1077struct plimit * 1078lim_hold(limp) 1079 struct plimit *limp; 1080{ 1081 1082 refcount_acquire(&limp->pl_refcnt); 1083 return (limp); 1084} 1085 1086void 1087lim_fork(struct proc *p1, struct proc *p2) 1088{ 1089 p2->p_limit = lim_hold(p1->p_limit); 1090 callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); 1091 if (p1->p_cpulimit != RLIM_INFINITY) 1092 callout_reset(&p2->p_limco, hz, lim_cb, p2); 1093} 1094 1095void 1096lim_free(limp) 1097 struct plimit *limp; 1098{ 1099 1100 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); 1101 if (refcount_release(&limp->pl_refcnt)) 1102 free((void *)limp, M_PLIMIT); 1103} 1104 1105/* 1106 * Make a copy of the plimit structure. 1107 * We share these structures copy-on-write after fork. 1108 */ 1109void 1110lim_copy(dst, src) 1111 struct plimit *dst, *src; 1112{ 1113 1114 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); 1115 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); 1116} 1117 1118/* 1119 * Return the hard limit for a particular system resource. The 1120 * which parameter specifies the index into the rlimit array. 1121 */ 1122rlim_t 1123lim_max(struct proc *p, int which) 1124{ 1125 struct rlimit rl; 1126 1127 lim_rlimit(p, which, &rl); 1128 return (rl.rlim_max); 1129} 1130 1131/* 1132 * Return the current (soft) limit for a particular system resource. 1133 * The which parameter which specifies the index into the rlimit array 1134 */ 1135rlim_t 1136lim_cur(struct proc *p, int which) 1137{ 1138 struct rlimit rl; 1139 1140 lim_rlimit(p, which, &rl); 1141 return (rl.rlim_cur); 1142} 1143 1144/* 1145 * Return a copy of the entire rlimit structure for the system limit 1146 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 1147 */ 1148void 1149lim_rlimit(struct proc *p, int which, struct rlimit *rlp) 1150{ 1151 1152 PROC_LOCK_ASSERT(p, MA_OWNED); 1153 KASSERT(which >= 0 && which < RLIM_NLIMITS, 1154 ("request for invalid resource limit")); 1155 *rlp = p->p_limit->pl_rlimit[which]; 1156 if (p->p_sysent->sv_fixlimit != NULL) 1157 p->p_sysent->sv_fixlimit(rlp, which); 1158} 1159 1160/* 1161 * Find the uidinfo structure for a uid. This structure is used to 1162 * track the total resource consumption (process count, socket buffer 1163 * size, etc.) for the uid and impose limits. 1164 */ 1165void 1166uihashinit() 1167{ 1168 1169 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 1170 rw_init(&uihashtbl_lock, "uidinfo hash"); 1171} 1172 1173/* 1174 * Look up a uidinfo struct for the parameter uid. 1175 * uihashtbl_lock must be locked. 1176 */ 1177static struct uidinfo * 1178uilookup(uid) 1179 uid_t uid; 1180{ 1181 struct uihashhead *uipp; 1182 struct uidinfo *uip; 1183 1184 rw_assert(&uihashtbl_lock, RA_LOCKED); 1185 uipp = UIHASH(uid); 1186 LIST_FOREACH(uip, uipp, ui_hash) 1187 if (uip->ui_uid == uid) 1188 break; 1189 1190 return (uip); 1191} 1192 1193/* 1194 * Find or allocate a struct uidinfo for a particular uid. 1195 * Increase refcount on uidinfo struct returned. 1196 * uifree() should be called on a struct uidinfo when released. 1197 */ 1198struct uidinfo * 1199uifind(uid) 1200 uid_t uid; 1201{ 1202 struct uidinfo *old_uip, *uip; 1203 1204 rw_rlock(&uihashtbl_lock); 1205 uip = uilookup(uid); 1206 if (uip == NULL) { 1207 rw_runlock(&uihashtbl_lock); 1208 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); 1209 rw_wlock(&uihashtbl_lock); 1210 /* 1211 * There's a chance someone created our uidinfo while we 1212 * were in malloc and not holding the lock, so we have to 1213 * make sure we don't insert a duplicate uidinfo. 1214 */ 1215 if ((old_uip = uilookup(uid)) != NULL) { 1216 /* Someone else beat us to it. */ 1217 free(uip, M_UIDINFO); 1218 uip = old_uip; 1219 } else { 1220 refcount_init(&uip->ui_ref, 0); 1221 uip->ui_uid = uid; 1222 mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL, 1223 MTX_DEF); 1224 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 1225 } 1226 } 1227 uihold(uip); 1228 rw_unlock(&uihashtbl_lock); 1229 return (uip); 1230} 1231 1232/* 1233 * Place another refcount on a uidinfo struct. 1234 */ 1235void 1236uihold(uip) 1237 struct uidinfo *uip; 1238{ 1239 1240 refcount_acquire(&uip->ui_ref); 1241} 1242 1243/*- 1244 * Since uidinfo structs have a long lifetime, we use an 1245 * opportunistic refcounting scheme to avoid locking the lookup hash 1246 * for each release. 1247 * 1248 * If the refcount hits 0, we need to free the structure, 1249 * which means we need to lock the hash. 1250 * Optimal case: 1251 * After locking the struct and lowering the refcount, if we find 1252 * that we don't need to free, simply unlock and return. 1253 * Suboptimal case: 1254 * If refcount lowering results in need to free, bump the count 1255 * back up, lose the lock and acquire the locks in the proper 1256 * order to try again. 1257 */ 1258void 1259uifree(uip) 1260 struct uidinfo *uip; 1261{ 1262 int old; 1263 1264 /* Prepare for optimal case. */ 1265 old = uip->ui_ref; 1266 if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1)) 1267 return; 1268 1269 /* Prepare for suboptimal case. */ 1270 rw_wlock(&uihashtbl_lock); 1271 if (refcount_release(&uip->ui_ref)) { 1272 LIST_REMOVE(uip, ui_hash); 1273 rw_wunlock(&uihashtbl_lock); 1274 if (uip->ui_sbsize != 0) 1275 printf("freeing uidinfo: uid = %d, sbsize = %ld\n", 1276 uip->ui_uid, uip->ui_sbsize); 1277 if (uip->ui_proccnt != 0) 1278 printf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1279 uip->ui_uid, uip->ui_proccnt); 1280 if (uip->ui_vmsize != 0) 1281 printf("freeing uidinfo: uid = %d, swapuse = %lld\n", 1282 uip->ui_uid, (unsigned long long)uip->ui_vmsize); 1283 mtx_destroy(&uip->ui_vmsize_mtx); 1284 free(uip, M_UIDINFO); 1285 return; 1286 } 1287 /* 1288 * Someone added a reference between atomic_cmpset_int() and 1289 * rw_wlock(&uihashtbl_lock). 1290 */ 1291 rw_wunlock(&uihashtbl_lock); 1292} 1293 1294/* 1295 * Change the count associated with number of processes 1296 * a given user is using. When 'max' is 0, don't enforce a limit 1297 */ 1298int 1299chgproccnt(uip, diff, max) 1300 struct uidinfo *uip; 1301 int diff; 1302 rlim_t max; 1303{ 1304 1305 /* Don't allow them to exceed max, but allow subtraction. */ 1306 if (diff > 0 && max != 0) { 1307 if (atomic_fetchadd_long(&uip->ui_proccnt, (long)diff) + diff > max) { 1308 atomic_subtract_long(&uip->ui_proccnt, (long)diff); 1309 return (0); 1310 } 1311 } else { 1312 atomic_add_long(&uip->ui_proccnt, (long)diff); 1313 if (uip->ui_proccnt < 0) 1314 printf("negative proccnt for uid = %d\n", uip->ui_uid); 1315 } 1316 return (1); 1317} 1318 1319/* 1320 * Change the total socket buffer size a user has used. 1321 */ 1322int 1323chgsbsize(uip, hiwat, to, max) 1324 struct uidinfo *uip; 1325 u_int *hiwat; 1326 u_int to; 1327 rlim_t max; 1328{ 1329 int diff; 1330 1331 diff = to - *hiwat; 1332 if (diff > 0) { 1333 if (atomic_fetchadd_long(&uip->ui_sbsize, (long)diff) + diff > max) { 1334 atomic_subtract_long(&uip->ui_sbsize, (long)diff); 1335 return (0); 1336 } 1337 } else { 1338 atomic_add_long(&uip->ui_sbsize, (long)diff); 1339 if (uip->ui_sbsize < 0) 1340 printf("negative sbsize for uid = %d\n", uip->ui_uid); 1341 } 1342 *hiwat = to; 1343 return (1); 1344} 1345 1346/* 1347 * Change the count associated with number of pseudo-terminals 1348 * a given user is using. When 'max' is 0, don't enforce a limit 1349 */ 1350int 1351chgptscnt(uip, diff, max) 1352 struct uidinfo *uip; 1353 int diff; 1354 rlim_t max; 1355{ 1356 1357 /* Don't allow them to exceed max, but allow subtraction. */ 1358 if (diff > 0 && max != 0) { 1359 if (atomic_fetchadd_long(&uip->ui_ptscnt, (long)diff) + diff > max) { 1360 atomic_subtract_long(&uip->ui_ptscnt, (long)diff); 1361 return (0); 1362 } 1363 } else { 1364 atomic_add_long(&uip->ui_ptscnt, (long)diff); 1365 if (uip->ui_ptscnt < 0) 1366 printf("negative ptscnt for uid = %d\n", uip->ui_uid); 1367 } 1368 return (1); 1369} 1370