kern_resource.c revision 128088
1/*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/kern_resource.c 128088 2004-04-10 11:08:16Z mux $"); 39 40#include "opt_compat.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/sysproto.h> 45#include <sys/file.h> 46#include <sys/kernel.h> 47#include <sys/lock.h> 48#include <sys/malloc.h> 49#include <sys/mutex.h> 50#include <sys/proc.h> 51#include <sys/resourcevar.h> 52#include <sys/sched.h> 53#include <sys/sx.h> 54#include <sys/sysent.h> 55#include <sys/time.h> 56 57#include <vm/vm.h> 58#include <vm/vm_param.h> 59#include <vm/pmap.h> 60#include <vm/vm_map.h> 61 62static int donice(struct thread *td, struct proc *chgp, int n); 63 64static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 65static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 66#define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 67static struct mtx uihashtbl_mtx; 68static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 69static u_long uihash; /* size of hash table - 1 */ 70 71static struct uidinfo *uilookup(uid_t uid); 72 73/* 74 * Resource controls and accounting. 75 */ 76 77#ifndef _SYS_SYSPROTO_H_ 78struct getpriority_args { 79 int which; 80 int who; 81}; 82#endif 83/* 84 * MPSAFE 85 */ 86int 87getpriority(td, uap) 88 struct thread *td; 89 register struct getpriority_args *uap; 90{ 91 struct ksegrp *kg; 92 struct proc *p; 93 int error, low; 94 95 error = 0; 96 low = PRIO_MAX + 1; 97 switch (uap->which) { 98 99 case PRIO_PROCESS: 100 if (uap->who == 0) 101 low = td->td_ksegrp->kg_nice; 102 else { 103 p = pfind(uap->who); 104 if (p == NULL) 105 break; 106 if (p_cansee(td, p) == 0) { 107 FOREACH_KSEGRP_IN_PROC(p, kg) { 108 if (kg->kg_nice < low) 109 low = kg->kg_nice; 110 } 111 } 112 PROC_UNLOCK(p); 113 } 114 break; 115 116 case PRIO_PGRP: { 117 register struct pgrp *pg; 118 119 sx_slock(&proctree_lock); 120 if (uap->who == 0) { 121 pg = td->td_proc->p_pgrp; 122 PGRP_LOCK(pg); 123 } else { 124 pg = pgfind(uap->who); 125 if (pg == NULL) { 126 sx_sunlock(&proctree_lock); 127 break; 128 } 129 } 130 sx_sunlock(&proctree_lock); 131 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 132 PROC_LOCK(p); 133 if (!p_cansee(td, p)) { 134 FOREACH_KSEGRP_IN_PROC(p, kg) { 135 if (kg->kg_nice < low) 136 low = kg->kg_nice; 137 } 138 } 139 PROC_UNLOCK(p); 140 } 141 PGRP_UNLOCK(pg); 142 break; 143 } 144 145 case PRIO_USER: 146 if (uap->who == 0) 147 uap->who = td->td_ucred->cr_uid; 148 sx_slock(&allproc_lock); 149 LIST_FOREACH(p, &allproc, p_list) { 150 PROC_LOCK(p); 151 if (!p_cansee(td, p) && 152 p->p_ucred->cr_uid == uap->who) { 153 FOREACH_KSEGRP_IN_PROC(p, kg) { 154 if (kg->kg_nice < low) 155 low = kg->kg_nice; 156 } 157 } 158 PROC_UNLOCK(p); 159 } 160 sx_sunlock(&allproc_lock); 161 break; 162 163 default: 164 error = EINVAL; 165 break; 166 } 167 if (low == PRIO_MAX + 1 && error == 0) 168 error = ESRCH; 169 td->td_retval[0] = low; 170 return (error); 171} 172 173#ifndef _SYS_SYSPROTO_H_ 174struct setpriority_args { 175 int which; 176 int who; 177 int prio; 178}; 179#endif 180/* 181 * MPSAFE 182 */ 183int 184setpriority(td, uap) 185 struct thread *td; 186 register struct setpriority_args *uap; 187{ 188 struct proc *curp; 189 register struct proc *p; 190 int found = 0, error = 0; 191 192 curp = td->td_proc; 193 switch (uap->which) { 194 case PRIO_PROCESS: 195 if (uap->who == 0) { 196 PROC_LOCK(curp); 197 error = donice(td, curp, uap->prio); 198 PROC_UNLOCK(curp); 199 } else { 200 p = pfind(uap->who); 201 if (p == 0) 202 break; 203 if (p_cansee(td, p) == 0) 204 error = donice(td, p, uap->prio); 205 PROC_UNLOCK(p); 206 } 207 found++; 208 break; 209 210 case PRIO_PGRP: { 211 register struct pgrp *pg; 212 213 sx_slock(&proctree_lock); 214 if (uap->who == 0) { 215 pg = curp->p_pgrp; 216 PGRP_LOCK(pg); 217 } else { 218 pg = pgfind(uap->who); 219 if (pg == NULL) { 220 sx_sunlock(&proctree_lock); 221 break; 222 } 223 } 224 sx_sunlock(&proctree_lock); 225 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 226 PROC_LOCK(p); 227 if (!p_cansee(td, p)) { 228 error = donice(td, p, uap->prio); 229 found++; 230 } 231 PROC_UNLOCK(p); 232 } 233 PGRP_UNLOCK(pg); 234 break; 235 } 236 237 case PRIO_USER: 238 if (uap->who == 0) 239 uap->who = td->td_ucred->cr_uid; 240 sx_slock(&allproc_lock); 241 FOREACH_PROC_IN_SYSTEM(p) { 242 PROC_LOCK(p); 243 if (p->p_ucred->cr_uid == uap->who && 244 !p_cansee(td, p)) { 245 error = donice(td, p, uap->prio); 246 found++; 247 } 248 PROC_UNLOCK(p); 249 } 250 sx_sunlock(&allproc_lock); 251 break; 252 253 default: 254 error = EINVAL; 255 break; 256 } 257 if (found == 0 && error == 0) 258 error = ESRCH; 259 return (error); 260} 261 262/* 263 * Set "nice" for a process. Doesn't really understand threaded processes 264 * well but does try. Has the unfortunate side effect of making all the NICE 265 * values for a process's ksegrps the same. This suggests that 266 * NICE values should be stored as a process nice and deltas for the ksegrps. 267 * (but not yet). 268 */ 269static int 270donice(struct thread *td, struct proc *p, int n) 271{ 272 struct ksegrp *kg; 273 int error, low; 274 275 low = PRIO_MAX + 1; 276 PROC_LOCK_ASSERT(p, MA_OWNED); 277 if ((error = p_cansched(td, p))) 278 return (error); 279 if (n > PRIO_MAX) 280 n = PRIO_MAX; 281 if (n < PRIO_MIN) 282 n = PRIO_MIN; 283 /* 284 * Only allow nicing if to more than the lowest nice. 285 * E.g., for nices of 4,3,2 allow nice to 3 but not 1 286 */ 287 FOREACH_KSEGRP_IN_PROC(p, kg) { 288 if (kg->kg_nice < low) 289 low = kg->kg_nice; 290 } 291 if (n < low && suser(td) != 0) 292 return (EACCES); 293 mtx_lock_spin(&sched_lock); 294 FOREACH_KSEGRP_IN_PROC(p, kg) { 295 sched_nice(kg, n); 296 } 297 mtx_unlock_spin(&sched_lock); 298 return (0); 299} 300 301/* 302 * Set realtime priority 303 * 304 * MPSAFE 305 */ 306#ifndef _SYS_SYSPROTO_H_ 307struct rtprio_args { 308 int function; 309 pid_t pid; 310 struct rtprio *rtp; 311}; 312#endif 313 314int 315rtprio(td, uap) 316 struct thread *td; 317 register struct rtprio_args *uap; 318{ 319 struct proc *curp; 320 register struct proc *p; 321 struct rtprio rtp; 322 int cierror, error; 323 324 /* Perform copyin before acquiring locks if needed. */ 325 if (uap->function == RTP_SET) 326 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 327 else 328 cierror = 0; 329 330 curp = td->td_proc; 331 if (uap->pid == 0) { 332 p = curp; 333 PROC_LOCK(p); 334 } else { 335 p = pfind(uap->pid); 336 if (p == NULL) 337 return (ESRCH); 338 } 339 340 switch (uap->function) { 341 case RTP_LOOKUP: 342 if ((error = p_cansee(td, p))) 343 break; 344 mtx_lock_spin(&sched_lock); 345 pri_to_rtp(FIRST_KSEGRP_IN_PROC(p), &rtp); 346 mtx_unlock_spin(&sched_lock); 347 PROC_UNLOCK(p); 348 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 349 case RTP_SET: 350 if ((error = p_cansched(td, p)) || (error = cierror)) 351 break; 352 /* disallow setting rtprio in most cases if not superuser */ 353 if (suser(td) != 0) { 354 /* can't set someone else's */ 355 if (uap->pid) { 356 error = EPERM; 357 break; 358 } 359 /* can't set realtime priority */ 360/* 361 * Realtime priority has to be restricted for reasons which should be 362 * obvious. However, for idle priority, there is a potential for 363 * system deadlock if an idleprio process gains a lock on a resource 364 * that other processes need (and the idleprio process can't run 365 * due to a CPU-bound normal process). Fix me! XXX 366 */ 367#if 0 368 if (RTP_PRIO_IS_REALTIME(rtp.type)) 369#endif 370 if (rtp.type != RTP_PRIO_NORMAL) { 371 error = EPERM; 372 break; 373 } 374 } 375 mtx_lock_spin(&sched_lock); 376 error = rtp_to_pri(&rtp, FIRST_KSEGRP_IN_PROC(p)); 377 mtx_unlock_spin(&sched_lock); 378 break; 379 default: 380 error = EINVAL; 381 break; 382 } 383 PROC_UNLOCK(p); 384 return (error); 385} 386 387int 388rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg) 389{ 390 391 mtx_assert(&sched_lock, MA_OWNED); 392 if (rtp->prio > RTP_PRIO_MAX) 393 return (EINVAL); 394 switch (RTP_PRIO_BASE(rtp->type)) { 395 case RTP_PRIO_REALTIME: 396 kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio; 397 break; 398 case RTP_PRIO_NORMAL: 399 kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio; 400 break; 401 case RTP_PRIO_IDLE: 402 kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio; 403 break; 404 default: 405 return (EINVAL); 406 } 407 sched_class(kg, rtp->type); 408 if (curthread->td_ksegrp == kg) { 409 curthread->td_base_pri = kg->kg_user_pri; 410 sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */ 411 } 412 return (0); 413} 414 415void 416pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp) 417{ 418 419 mtx_assert(&sched_lock, MA_OWNED); 420 switch (PRI_BASE(kg->kg_pri_class)) { 421 case PRI_REALTIME: 422 rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME; 423 break; 424 case PRI_TIMESHARE: 425 rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE; 426 break; 427 case PRI_IDLE: 428 rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE; 429 break; 430 default: 431 break; 432 } 433 rtp->type = kg->kg_pri_class; 434} 435 436#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 437#ifndef _SYS_SYSPROTO_H_ 438struct osetrlimit_args { 439 u_int which; 440 struct orlimit *rlp; 441}; 442#endif 443/* 444 * MPSAFE 445 */ 446int 447osetrlimit(td, uap) 448 struct thread *td; 449 register struct osetrlimit_args *uap; 450{ 451 struct orlimit olim; 452 struct rlimit lim; 453 int error; 454 455 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) 456 return (error); 457 lim.rlim_cur = olim.rlim_cur; 458 lim.rlim_max = olim.rlim_max; 459 error = kern_setrlimit(td, uap->which, &lim); 460 return (error); 461} 462 463#ifndef _SYS_SYSPROTO_H_ 464struct ogetrlimit_args { 465 u_int which; 466 struct orlimit *rlp; 467}; 468#endif 469/* 470 * MPSAFE 471 */ 472int 473ogetrlimit(td, uap) 474 struct thread *td; 475 register struct ogetrlimit_args *uap; 476{ 477 struct orlimit olim; 478 struct rlimit rl; 479 struct proc *p; 480 int error; 481 482 if (uap->which >= RLIM_NLIMITS) 483 return (EINVAL); 484 p = td->td_proc; 485 PROC_LOCK(p); 486 lim_rlimit(p, uap->which, &rl); 487 PROC_UNLOCK(p); 488 489 /* 490 * XXX would be more correct to convert only RLIM_INFINITY to the 491 * old RLIM_INFINITY and fail with EOVERFLOW for other larger 492 * values. Most 64->32 and 32->16 conversions, including not 493 * unimportant ones of uids are even more broken than what we 494 * do here (they blindly truncate). We don't do this correctly 495 * here since we have little experience with EOVERFLOW yet. 496 * Elsewhere, getuid() can't fail... 497 */ 498 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; 499 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; 500 error = copyout(&olim, uap->rlp, sizeof(olim)); 501 return (error); 502} 503#endif /* COMPAT_43 || COMPAT_SUNOS */ 504 505#ifndef _SYS_SYSPROTO_H_ 506struct __setrlimit_args { 507 u_int which; 508 struct rlimit *rlp; 509}; 510#endif 511/* 512 * MPSAFE 513 */ 514int 515setrlimit(td, uap) 516 struct thread *td; 517 register struct __setrlimit_args *uap; 518{ 519 struct rlimit alim; 520 int error; 521 522 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) 523 return (error); 524 error = kern_setrlimit(td, uap->which, &alim); 525 return (error); 526} 527 528int 529kern_setrlimit(td, which, limp) 530 struct thread *td; 531 u_int which; 532 struct rlimit *limp; 533{ 534 struct plimit *newlim, *oldlim; 535 struct proc *p; 536 register struct rlimit *alimp; 537 rlim_t oldssiz; 538 int error; 539 540 if (which >= RLIM_NLIMITS) 541 return (EINVAL); 542 543 /* 544 * Preserve historical bugs by treating negative limits as unsigned. 545 */ 546 if (limp->rlim_cur < 0) 547 limp->rlim_cur = RLIM_INFINITY; 548 if (limp->rlim_max < 0) 549 limp->rlim_max = RLIM_INFINITY; 550 551 oldssiz = 0; 552 p = td->td_proc; 553 newlim = lim_alloc(); 554 PROC_LOCK(p); 555 oldlim = p->p_limit; 556 alimp = &oldlim->pl_rlimit[which]; 557 if (limp->rlim_cur > alimp->rlim_max || 558 limp->rlim_max > alimp->rlim_max) 559 if ((error = suser_cred(td->td_ucred, PRISON_ROOT))) { 560 PROC_UNLOCK(p); 561 lim_free(newlim); 562 return (error); 563 } 564 if (limp->rlim_cur > limp->rlim_max) 565 limp->rlim_cur = limp->rlim_max; 566 lim_copy(newlim, oldlim); 567 alimp = &newlim->pl_rlimit[which]; 568 569 switch (which) { 570 571 case RLIMIT_CPU: 572 mtx_lock_spin(&sched_lock); 573 p->p_cpulimit = limp->rlim_cur; 574 mtx_unlock_spin(&sched_lock); 575 break; 576 case RLIMIT_DATA: 577 if (limp->rlim_cur > maxdsiz) 578 limp->rlim_cur = maxdsiz; 579 if (limp->rlim_max > maxdsiz) 580 limp->rlim_max = maxdsiz; 581 break; 582 583 case RLIMIT_STACK: 584 if (limp->rlim_cur > maxssiz) 585 limp->rlim_cur = maxssiz; 586 if (limp->rlim_max > maxssiz) 587 limp->rlim_max = maxssiz; 588 oldssiz = alimp->rlim_cur; 589 break; 590 591 case RLIMIT_NOFILE: 592 if (limp->rlim_cur > maxfilesperproc) 593 limp->rlim_cur = maxfilesperproc; 594 if (limp->rlim_max > maxfilesperproc) 595 limp->rlim_max = maxfilesperproc; 596 break; 597 598 case RLIMIT_NPROC: 599 if (limp->rlim_cur > maxprocperuid) 600 limp->rlim_cur = maxprocperuid; 601 if (limp->rlim_max > maxprocperuid) 602 limp->rlim_max = maxprocperuid; 603 if (limp->rlim_cur < 1) 604 limp->rlim_cur = 1; 605 if (limp->rlim_max < 1) 606 limp->rlim_max = 1; 607 break; 608 } 609 *alimp = *limp; 610 p->p_limit = newlim; 611 PROC_UNLOCK(p); 612 lim_free(oldlim); 613 614 if (which == RLIMIT_STACK) { 615 /* 616 * Stack is allocated to the max at exec time with only 617 * "rlim_cur" bytes accessible. If stack limit is going 618 * up make more accessible, if going down make inaccessible. 619 */ 620 if (limp->rlim_cur != oldssiz) { 621 vm_offset_t addr; 622 vm_size_t size; 623 vm_prot_t prot; 624 625 mtx_lock(&Giant); 626 if (limp->rlim_cur > oldssiz) { 627 prot = p->p_sysent->sv_stackprot; 628 size = limp->rlim_cur - oldssiz; 629 addr = p->p_sysent->sv_usrstack - 630 limp->rlim_cur; 631 } else { 632 prot = VM_PROT_NONE; 633 size = oldssiz - limp->rlim_cur; 634 addr = p->p_sysent->sv_usrstack - 635 oldssiz; 636 } 637 addr = trunc_page(addr); 638 size = round_page(size); 639 (void) vm_map_protect(&p->p_vmspace->vm_map, 640 addr, addr+size, prot, FALSE); 641 mtx_unlock(&Giant); 642 } 643 } 644 return (0); 645} 646 647#ifndef _SYS_SYSPROTO_H_ 648struct __getrlimit_args { 649 u_int which; 650 struct rlimit *rlp; 651}; 652#endif 653/* 654 * MPSAFE 655 */ 656/* ARGSUSED */ 657int 658getrlimit(td, uap) 659 struct thread *td; 660 register struct __getrlimit_args *uap; 661{ 662 struct rlimit rlim; 663 struct proc *p; 664 int error; 665 666 if (uap->which >= RLIM_NLIMITS) 667 return (EINVAL); 668 p = td->td_proc; 669 PROC_LOCK(p); 670 lim_rlimit(p, uap->which, &rlim); 671 PROC_UNLOCK(p); 672 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); 673 return(error); 674} 675 676/* 677 * Transform the running time and tick information in proc p into user, 678 * system, and interrupt time usage. 679 */ 680void 681calcru(p, up, sp, ip) 682 struct proc *p; 683 struct timeval *up; 684 struct timeval *sp; 685 struct timeval *ip; 686{ 687 struct bintime bt; 688 struct timeval tv; 689 /* {user, system, interrupt, total} {ticks, usec}; previous tu: */ 690 u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu; 691 692 mtx_assert(&sched_lock, MA_OWNED); 693 /* XXX: why spl-protect ? worst case is an off-by-one report */ 694 695 ut = p->p_uticks; 696 st = p->p_sticks; 697 it = p->p_iticks; 698 699 tt = ut + st + it; 700 if (tt == 0) { 701 st = 1; 702 tt = 1; 703 } 704 if (p == curthread->td_proc) { 705 /* 706 * Adjust for the current time slice. This is actually fairly 707 * important since the error here is on the order of a time 708 * quantum, which is much greater than the sampling error. 709 * XXXKSE use a different test due to threads on other 710 * processors also being 'current'. 711 */ 712 binuptime(&bt); 713 bintime_sub(&bt, PCPU_PTR(switchtime)); 714 bintime_add(&bt, &p->p_runtime); 715 } else 716 bt = p->p_runtime; 717 bintime2timeval(&bt, &tv); 718 tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec; 719 ptu = p->p_uu + p->p_su + p->p_iu; 720 if (tu < ptu || (int64_t)tu < 0) { 721 printf("calcru: negative time of %jd usec for pid %d (%s)\n", 722 (intmax_t)tu, p->p_pid, p->p_comm); 723 tu = ptu; 724 } 725 726 /* Subdivide tu. */ 727 uu = (tu * ut) / tt; 728 su = (tu * st) / tt; 729 iu = tu - uu - su; 730 731 /* Enforce monotonicity. */ 732 if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) { 733 if (uu < p->p_uu) 734 uu = p->p_uu; 735 else if (uu + p->p_su + p->p_iu > tu) 736 uu = tu - p->p_su - p->p_iu; 737 if (st == 0) 738 su = p->p_su; 739 else { 740 su = ((tu - uu) * st) / (st + it); 741 if (su < p->p_su) 742 su = p->p_su; 743 else if (uu + su + p->p_iu > tu) 744 su = tu - uu - p->p_iu; 745 } 746 KASSERT(uu + su + p->p_iu <= tu, 747 ("calcru: monotonisation botch 1")); 748 iu = tu - uu - su; 749 KASSERT(iu >= p->p_iu, 750 ("calcru: monotonisation botch 2")); 751 } 752 p->p_uu = uu; 753 p->p_su = su; 754 p->p_iu = iu; 755 756 up->tv_sec = uu / 1000000; 757 up->tv_usec = uu % 1000000; 758 sp->tv_sec = su / 1000000; 759 sp->tv_usec = su % 1000000; 760 if (ip != NULL) { 761 ip->tv_sec = iu / 1000000; 762 ip->tv_usec = iu % 1000000; 763 } 764} 765 766#ifndef _SYS_SYSPROTO_H_ 767struct getrusage_args { 768 int who; 769 struct rusage *rusage; 770}; 771#endif 772/* 773 * MPSAFE 774 */ 775/* ARGSUSED */ 776int 777getrusage(td, uap) 778 register struct thread *td; 779 register struct getrusage_args *uap; 780{ 781 struct rusage ru; 782 struct proc *p; 783 784 p = td->td_proc; 785 switch (uap->who) { 786 787 case RUSAGE_SELF: 788 mtx_lock(&Giant); 789 mtx_lock_spin(&sched_lock); 790 calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime, 791 NULL); 792 mtx_unlock_spin(&sched_lock); 793 ru = p->p_stats->p_ru; 794 mtx_unlock(&Giant); 795 break; 796 797 case RUSAGE_CHILDREN: 798 mtx_lock(&Giant); 799 ru = p->p_stats->p_cru; 800 mtx_unlock(&Giant); 801 break; 802 803 default: 804 return (EINVAL); 805 break; 806 } 807 return (copyout(&ru, uap->rusage, sizeof(struct rusage))); 808} 809 810void 811ruadd(ru, ru2) 812 register struct rusage *ru, *ru2; 813{ 814 register long *ip, *ip2; 815 register int i; 816 817 timevaladd(&ru->ru_utime, &ru2->ru_utime); 818 timevaladd(&ru->ru_stime, &ru2->ru_stime); 819 if (ru->ru_maxrss < ru2->ru_maxrss) 820 ru->ru_maxrss = ru2->ru_maxrss; 821 ip = &ru->ru_first; ip2 = &ru2->ru_first; 822 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 823 *ip++ += *ip2++; 824} 825 826/* 827 * Allocate a new resource limits structure and initialize its 828 * reference count and mutex pointer. 829 */ 830struct plimit * 831lim_alloc() 832{ 833 struct plimit *limp; 834 835 limp = (struct plimit *)malloc(sizeof(struct plimit), M_PLIMIT, 836 M_WAITOK); 837 limp->pl_refcnt = 1; 838 limp->pl_mtx = mtx_pool_alloc(mtxpool_sleep); 839 return (limp); 840} 841 842struct plimit * 843lim_hold(limp) 844 struct plimit *limp; 845{ 846 847 LIM_LOCK(limp); 848 limp->pl_refcnt++; 849 LIM_UNLOCK(limp); 850 return (limp); 851} 852 853void 854lim_free(limp) 855 struct plimit *limp; 856{ 857 858 LIM_LOCK(limp); 859 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); 860 if (--limp->pl_refcnt == 0) { 861 LIM_UNLOCK(limp); 862 free((void *)limp, M_PLIMIT); 863 return; 864 } 865 LIM_UNLOCK(limp); 866} 867 868/* 869 * Make a copy of the plimit structure. 870 * We share these structures copy-on-write after fork. 871 */ 872void 873lim_copy(dst, src) 874 struct plimit *dst, *src; 875{ 876 877 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); 878 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); 879} 880 881/* 882 * Return the hard limit for a particular system resource. The 883 * which parameter specifies the index into the rlimit array. 884 */ 885rlim_t 886lim_max(struct proc *p, int which) 887{ 888 struct rlimit rl; 889 890 lim_rlimit(p, which, &rl); 891 return (rl.rlim_max); 892} 893 894/* 895 * Return the current (soft) limit for a particular system resource. 896 * The which parameter which specifies the index into the rlimit array 897 */ 898rlim_t 899lim_cur(struct proc *p, int which) 900{ 901 struct rlimit rl; 902 903 lim_rlimit(p, which, &rl); 904 return (rl.rlim_cur); 905} 906 907/* 908 * Return a copy of the entire rlimit structure for the system limit 909 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 910 */ 911void 912lim_rlimit(struct proc *p, int which, struct rlimit *rlp) 913{ 914 915 PROC_LOCK_ASSERT(p, MA_OWNED); 916 KASSERT(which >= 0 && which < RLIM_NLIMITS, 917 ("request for invalid resource limit")); 918 *rlp = p->p_limit->pl_rlimit[which]; 919} 920 921/* 922 * Find the uidinfo structure for a uid. This structure is used to 923 * track the total resource consumption (process count, socket buffer 924 * size, etc.) for the uid and impose limits. 925 */ 926void 927uihashinit() 928{ 929 930 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 931 mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); 932} 933 934/* 935 * Look up a uidinfo struct for the parameter uid. 936 * uihashtbl_mtx must be locked. 937 */ 938static struct uidinfo * 939uilookup(uid) 940 uid_t uid; 941{ 942 struct uihashhead *uipp; 943 struct uidinfo *uip; 944 945 mtx_assert(&uihashtbl_mtx, MA_OWNED); 946 uipp = UIHASH(uid); 947 LIST_FOREACH(uip, uipp, ui_hash) 948 if (uip->ui_uid == uid) 949 break; 950 951 return (uip); 952} 953 954/* 955 * Find or allocate a struct uidinfo for a particular uid. 956 * Increase refcount on uidinfo struct returned. 957 * uifree() should be called on a struct uidinfo when released. 958 */ 959struct uidinfo * 960uifind(uid) 961 uid_t uid; 962{ 963 struct uidinfo *old_uip, *uip; 964 965 mtx_lock(&uihashtbl_mtx); 966 uip = uilookup(uid); 967 if (uip == NULL) { 968 mtx_unlock(&uihashtbl_mtx); 969 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); 970 mtx_lock(&uihashtbl_mtx); 971 /* 972 * There's a chance someone created our uidinfo while we 973 * were in malloc and not holding the lock, so we have to 974 * make sure we don't insert a duplicate uidinfo. 975 */ 976 if ((old_uip = uilookup(uid)) != NULL) { 977 /* Someone else beat us to it. */ 978 free(uip, M_UIDINFO); 979 uip = old_uip; 980 } else { 981 uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); 982 uip->ui_uid = uid; 983 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 984 } 985 } 986 uihold(uip); 987 mtx_unlock(&uihashtbl_mtx); 988 return (uip); 989} 990 991/* 992 * Place another refcount on a uidinfo struct. 993 */ 994void 995uihold(uip) 996 struct uidinfo *uip; 997{ 998 999 UIDINFO_LOCK(uip); 1000 uip->ui_ref++; 1001 UIDINFO_UNLOCK(uip); 1002} 1003 1004/*- 1005 * Since uidinfo structs have a long lifetime, we use an 1006 * opportunistic refcounting scheme to avoid locking the lookup hash 1007 * for each release. 1008 * 1009 * If the refcount hits 0, we need to free the structure, 1010 * which means we need to lock the hash. 1011 * Optimal case: 1012 * After locking the struct and lowering the refcount, if we find 1013 * that we don't need to free, simply unlock and return. 1014 * Suboptimal case: 1015 * If refcount lowering results in need to free, bump the count 1016 * back up, loose the lock and aquire the locks in the proper 1017 * order to try again. 1018 */ 1019void 1020uifree(uip) 1021 struct uidinfo *uip; 1022{ 1023 1024 /* Prepare for optimal case. */ 1025 UIDINFO_LOCK(uip); 1026 1027 if (--uip->ui_ref != 0) { 1028 UIDINFO_UNLOCK(uip); 1029 return; 1030 } 1031 1032 /* Prepare for suboptimal case. */ 1033 uip->ui_ref++; 1034 UIDINFO_UNLOCK(uip); 1035 mtx_lock(&uihashtbl_mtx); 1036 UIDINFO_LOCK(uip); 1037 1038 /* 1039 * We must subtract one from the count again because we backed out 1040 * our initial subtraction before dropping the lock. 1041 * Since another thread may have added a reference after we dropped the 1042 * initial lock we have to test for zero again. 1043 */ 1044 if (--uip->ui_ref == 0) { 1045 LIST_REMOVE(uip, ui_hash); 1046 mtx_unlock(&uihashtbl_mtx); 1047 if (uip->ui_sbsize != 0) 1048 printf("freeing uidinfo: uid = %d, sbsize = %jd\n", 1049 uip->ui_uid, (intmax_t)uip->ui_sbsize); 1050 if (uip->ui_proccnt != 0) 1051 printf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1052 uip->ui_uid, uip->ui_proccnt); 1053 UIDINFO_UNLOCK(uip); 1054 FREE(uip, M_UIDINFO); 1055 return; 1056 } 1057 1058 mtx_unlock(&uihashtbl_mtx); 1059 UIDINFO_UNLOCK(uip); 1060} 1061 1062/* 1063 * Change the count associated with number of processes 1064 * a given user is using. When 'max' is 0, don't enforce a limit 1065 */ 1066int 1067chgproccnt(uip, diff, max) 1068 struct uidinfo *uip; 1069 int diff; 1070 int max; 1071{ 1072 1073 UIDINFO_LOCK(uip); 1074 /* Don't allow them to exceed max, but allow subtraction. */ 1075 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { 1076 UIDINFO_UNLOCK(uip); 1077 return (0); 1078 } 1079 uip->ui_proccnt += diff; 1080 if (uip->ui_proccnt < 0) 1081 printf("negative proccnt for uid = %d\n", uip->ui_uid); 1082 UIDINFO_UNLOCK(uip); 1083 return (1); 1084} 1085 1086/* 1087 * Change the total socket buffer size a user has used. 1088 */ 1089int 1090chgsbsize(uip, hiwat, to, max) 1091 struct uidinfo *uip; 1092 u_int *hiwat; 1093 u_int to; 1094 rlim_t max; 1095{ 1096 rlim_t new; 1097 int s; 1098 1099 s = splnet(); 1100 UIDINFO_LOCK(uip); 1101 new = uip->ui_sbsize + to - *hiwat; 1102 /* Don't allow them to exceed max, but allow subtraction */ 1103 if (to > *hiwat && new > max) { 1104 splx(s); 1105 UIDINFO_UNLOCK(uip); 1106 return (0); 1107 } 1108 uip->ui_sbsize = new; 1109 *hiwat = to; 1110 if (uip->ui_sbsize < 0) 1111 printf("negative sbsize for uid = %d\n", uip->ui_uid); 1112 splx(s); 1113 UIDINFO_UNLOCK(uip); 1114 return (1); 1115} 1116