kern_resource.c revision 128088
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/kern_resource.c 128088 2004-04-10 11:08:16Z mux $");
39
40#include "opt_compat.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/sysproto.h>
45#include <sys/file.h>
46#include <sys/kernel.h>
47#include <sys/lock.h>
48#include <sys/malloc.h>
49#include <sys/mutex.h>
50#include <sys/proc.h>
51#include <sys/resourcevar.h>
52#include <sys/sched.h>
53#include <sys/sx.h>
54#include <sys/sysent.h>
55#include <sys/time.h>
56
57#include <vm/vm.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <vm/vm_map.h>
61
62static int donice(struct thread *td, struct proc *chgp, int n);
63
64static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
65static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
66#define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
67static struct mtx uihashtbl_mtx;
68static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
69static u_long uihash;		/* size of hash table - 1 */
70
71static struct uidinfo	*uilookup(uid_t uid);
72
73/*
74 * Resource controls and accounting.
75 */
76
77#ifndef _SYS_SYSPROTO_H_
78struct getpriority_args {
79	int	which;
80	int	who;
81};
82#endif
83/*
84 * MPSAFE
85 */
86int
87getpriority(td, uap)
88	struct thread *td;
89	register struct getpriority_args *uap;
90{
91	struct ksegrp *kg;
92	struct proc *p;
93	int error, low;
94
95	error = 0;
96	low = PRIO_MAX + 1;
97	switch (uap->which) {
98
99	case PRIO_PROCESS:
100		if (uap->who == 0)
101			low = td->td_ksegrp->kg_nice;
102		else {
103			p = pfind(uap->who);
104			if (p == NULL)
105				break;
106			if (p_cansee(td, p) == 0) {
107				FOREACH_KSEGRP_IN_PROC(p, kg) {
108					if (kg->kg_nice < low)
109						low = kg->kg_nice;
110				}
111			}
112			PROC_UNLOCK(p);
113		}
114		break;
115
116	case PRIO_PGRP: {
117		register struct pgrp *pg;
118
119		sx_slock(&proctree_lock);
120		if (uap->who == 0) {
121			pg = td->td_proc->p_pgrp;
122			PGRP_LOCK(pg);
123		} else {
124			pg = pgfind(uap->who);
125			if (pg == NULL) {
126				sx_sunlock(&proctree_lock);
127				break;
128			}
129		}
130		sx_sunlock(&proctree_lock);
131		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
132			PROC_LOCK(p);
133			if (!p_cansee(td, p)) {
134				FOREACH_KSEGRP_IN_PROC(p, kg) {
135					if (kg->kg_nice < low)
136						low = kg->kg_nice;
137				}
138			}
139			PROC_UNLOCK(p);
140		}
141		PGRP_UNLOCK(pg);
142		break;
143	}
144
145	case PRIO_USER:
146		if (uap->who == 0)
147			uap->who = td->td_ucred->cr_uid;
148		sx_slock(&allproc_lock);
149		LIST_FOREACH(p, &allproc, p_list) {
150			PROC_LOCK(p);
151			if (!p_cansee(td, p) &&
152			    p->p_ucred->cr_uid == uap->who) {
153				FOREACH_KSEGRP_IN_PROC(p, kg) {
154					if (kg->kg_nice < low)
155						low = kg->kg_nice;
156				}
157			}
158			PROC_UNLOCK(p);
159		}
160		sx_sunlock(&allproc_lock);
161		break;
162
163	default:
164		error = EINVAL;
165		break;
166	}
167	if (low == PRIO_MAX + 1 && error == 0)
168		error = ESRCH;
169	td->td_retval[0] = low;
170	return (error);
171}
172
173#ifndef _SYS_SYSPROTO_H_
174struct setpriority_args {
175	int	which;
176	int	who;
177	int	prio;
178};
179#endif
180/*
181 * MPSAFE
182 */
183int
184setpriority(td, uap)
185	struct thread *td;
186	register struct setpriority_args *uap;
187{
188	struct proc *curp;
189	register struct proc *p;
190	int found = 0, error = 0;
191
192	curp = td->td_proc;
193	switch (uap->which) {
194	case PRIO_PROCESS:
195		if (uap->who == 0) {
196			PROC_LOCK(curp);
197			error = donice(td, curp, uap->prio);
198			PROC_UNLOCK(curp);
199		} else {
200			p = pfind(uap->who);
201			if (p == 0)
202				break;
203			if (p_cansee(td, p) == 0)
204				error = donice(td, p, uap->prio);
205			PROC_UNLOCK(p);
206		}
207		found++;
208		break;
209
210	case PRIO_PGRP: {
211		register struct pgrp *pg;
212
213		sx_slock(&proctree_lock);
214		if (uap->who == 0) {
215			pg = curp->p_pgrp;
216			PGRP_LOCK(pg);
217		} else {
218			pg = pgfind(uap->who);
219			if (pg == NULL) {
220				sx_sunlock(&proctree_lock);
221				break;
222			}
223		}
224		sx_sunlock(&proctree_lock);
225		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
226			PROC_LOCK(p);
227			if (!p_cansee(td, p)) {
228				error = donice(td, p, uap->prio);
229				found++;
230			}
231			PROC_UNLOCK(p);
232		}
233		PGRP_UNLOCK(pg);
234		break;
235	}
236
237	case PRIO_USER:
238		if (uap->who == 0)
239			uap->who = td->td_ucred->cr_uid;
240		sx_slock(&allproc_lock);
241		FOREACH_PROC_IN_SYSTEM(p) {
242			PROC_LOCK(p);
243			if (p->p_ucred->cr_uid == uap->who &&
244			    !p_cansee(td, p)) {
245				error = donice(td, p, uap->prio);
246				found++;
247			}
248			PROC_UNLOCK(p);
249		}
250		sx_sunlock(&allproc_lock);
251		break;
252
253	default:
254		error = EINVAL;
255		break;
256	}
257	if (found == 0 && error == 0)
258		error = ESRCH;
259	return (error);
260}
261
262/*
263 * Set "nice" for a process.  Doesn't really understand threaded processes
264 * well but does try.  Has the unfortunate side effect of making all the NICE
265 * values for a process's ksegrps the same.  This suggests that
266 * NICE values should be stored as a process nice and deltas for the ksegrps.
267 * (but not yet).
268 */
269static int
270donice(struct thread *td, struct proc *p, int n)
271{
272	struct ksegrp *kg;
273	int error, low;
274
275	low = PRIO_MAX + 1;
276	PROC_LOCK_ASSERT(p, MA_OWNED);
277	if ((error = p_cansched(td, p)))
278		return (error);
279	if (n > PRIO_MAX)
280		n = PRIO_MAX;
281	if (n < PRIO_MIN)
282		n = PRIO_MIN;
283	/*
284	 * Only allow nicing if to more than the lowest nice.
285	 * E.g., for nices of 4,3,2 allow nice to 3 but not 1
286	 */
287	FOREACH_KSEGRP_IN_PROC(p, kg) {
288		if (kg->kg_nice < low)
289			low = kg->kg_nice;
290	}
291 	if (n < low && suser(td) != 0)
292		return (EACCES);
293	mtx_lock_spin(&sched_lock);
294	FOREACH_KSEGRP_IN_PROC(p, kg) {
295		sched_nice(kg, n);
296	}
297	mtx_unlock_spin(&sched_lock);
298	return (0);
299}
300
301/*
302 * Set realtime priority
303 *
304 * MPSAFE
305 */
306#ifndef _SYS_SYSPROTO_H_
307struct rtprio_args {
308	int		function;
309	pid_t		pid;
310	struct rtprio	*rtp;
311};
312#endif
313
314int
315rtprio(td, uap)
316	struct thread *td;
317	register struct rtprio_args *uap;
318{
319	struct proc *curp;
320	register struct proc *p;
321	struct rtprio rtp;
322	int cierror, error;
323
324	/* Perform copyin before acquiring locks if needed. */
325	if (uap->function == RTP_SET)
326		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
327	else
328		cierror = 0;
329
330	curp = td->td_proc;
331	if (uap->pid == 0) {
332		p = curp;
333		PROC_LOCK(p);
334	} else {
335		p = pfind(uap->pid);
336		if (p == NULL)
337			return (ESRCH);
338	}
339
340	switch (uap->function) {
341	case RTP_LOOKUP:
342		if ((error = p_cansee(td, p)))
343			break;
344		mtx_lock_spin(&sched_lock);
345		pri_to_rtp(FIRST_KSEGRP_IN_PROC(p), &rtp);
346		mtx_unlock_spin(&sched_lock);
347		PROC_UNLOCK(p);
348		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
349	case RTP_SET:
350		if ((error = p_cansched(td, p)) || (error = cierror))
351			break;
352		/* disallow setting rtprio in most cases if not superuser */
353		if (suser(td) != 0) {
354			/* can't set someone else's */
355			if (uap->pid) {
356				error = EPERM;
357				break;
358			}
359			/* can't set realtime priority */
360/*
361 * Realtime priority has to be restricted for reasons which should be
362 * obvious. However, for idle priority, there is a potential for
363 * system deadlock if an idleprio process gains a lock on a resource
364 * that other processes need (and the idleprio process can't run
365 * due to a CPU-bound normal process). Fix me! XXX
366 */
367#if 0
368 			if (RTP_PRIO_IS_REALTIME(rtp.type))
369#endif
370			if (rtp.type != RTP_PRIO_NORMAL) {
371				error = EPERM;
372				break;
373			}
374		}
375		mtx_lock_spin(&sched_lock);
376		error = rtp_to_pri(&rtp, FIRST_KSEGRP_IN_PROC(p));
377		mtx_unlock_spin(&sched_lock);
378		break;
379	default:
380		error = EINVAL;
381		break;
382	}
383	PROC_UNLOCK(p);
384	return (error);
385}
386
387int
388rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
389{
390
391	mtx_assert(&sched_lock, MA_OWNED);
392	if (rtp->prio > RTP_PRIO_MAX)
393		return (EINVAL);
394	switch (RTP_PRIO_BASE(rtp->type)) {
395	case RTP_PRIO_REALTIME:
396		kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
397		break;
398	case RTP_PRIO_NORMAL:
399		kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
400		break;
401	case RTP_PRIO_IDLE:
402		kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
403		break;
404	default:
405		return (EINVAL);
406	}
407	sched_class(kg, rtp->type);
408	if (curthread->td_ksegrp == kg) {
409		curthread->td_base_pri = kg->kg_user_pri;
410		sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */
411	}
412	return (0);
413}
414
415void
416pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
417{
418
419	mtx_assert(&sched_lock, MA_OWNED);
420	switch (PRI_BASE(kg->kg_pri_class)) {
421	case PRI_REALTIME:
422		rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
423		break;
424	case PRI_TIMESHARE:
425		rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
426		break;
427	case PRI_IDLE:
428		rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
429		break;
430	default:
431		break;
432	}
433	rtp->type = kg->kg_pri_class;
434}
435
436#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
437#ifndef _SYS_SYSPROTO_H_
438struct osetrlimit_args {
439	u_int	which;
440	struct	orlimit *rlp;
441};
442#endif
443/*
444 * MPSAFE
445 */
446int
447osetrlimit(td, uap)
448	struct thread *td;
449	register struct osetrlimit_args *uap;
450{
451	struct orlimit olim;
452	struct rlimit lim;
453	int error;
454
455	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
456		return (error);
457	lim.rlim_cur = olim.rlim_cur;
458	lim.rlim_max = olim.rlim_max;
459	error = kern_setrlimit(td, uap->which, &lim);
460	return (error);
461}
462
463#ifndef _SYS_SYSPROTO_H_
464struct ogetrlimit_args {
465	u_int	which;
466	struct	orlimit *rlp;
467};
468#endif
469/*
470 * MPSAFE
471 */
472int
473ogetrlimit(td, uap)
474	struct thread *td;
475	register struct ogetrlimit_args *uap;
476{
477	struct orlimit olim;
478	struct rlimit rl;
479	struct proc *p;
480	int error;
481
482	if (uap->which >= RLIM_NLIMITS)
483		return (EINVAL);
484	p = td->td_proc;
485	PROC_LOCK(p);
486	lim_rlimit(p, uap->which, &rl);
487	PROC_UNLOCK(p);
488
489	/*
490	 * XXX would be more correct to convert only RLIM_INFINITY to the
491	 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
492	 * values.  Most 64->32 and 32->16 conversions, including not
493	 * unimportant ones of uids are even more broken than what we
494	 * do here (they blindly truncate).  We don't do this correctly
495	 * here since we have little experience with EOVERFLOW yet.
496	 * Elsewhere, getuid() can't fail...
497	 */
498	olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
499	olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
500	error = copyout(&olim, uap->rlp, sizeof(olim));
501	return (error);
502}
503#endif /* COMPAT_43 || COMPAT_SUNOS */
504
505#ifndef _SYS_SYSPROTO_H_
506struct __setrlimit_args {
507	u_int	which;
508	struct	rlimit *rlp;
509};
510#endif
511/*
512 * MPSAFE
513 */
514int
515setrlimit(td, uap)
516	struct thread *td;
517	register struct __setrlimit_args *uap;
518{
519	struct rlimit alim;
520	int error;
521
522	if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
523		return (error);
524	error = kern_setrlimit(td, uap->which, &alim);
525	return (error);
526}
527
528int
529kern_setrlimit(td, which, limp)
530	struct thread *td;
531	u_int which;
532	struct rlimit *limp;
533{
534	struct plimit *newlim, *oldlim;
535	struct proc *p;
536	register struct rlimit *alimp;
537	rlim_t oldssiz;
538	int error;
539
540	if (which >= RLIM_NLIMITS)
541		return (EINVAL);
542
543	/*
544	 * Preserve historical bugs by treating negative limits as unsigned.
545	 */
546	if (limp->rlim_cur < 0)
547		limp->rlim_cur = RLIM_INFINITY;
548	if (limp->rlim_max < 0)
549		limp->rlim_max = RLIM_INFINITY;
550
551	oldssiz = 0;
552	p = td->td_proc;
553	newlim = lim_alloc();
554	PROC_LOCK(p);
555	oldlim = p->p_limit;
556	alimp = &oldlim->pl_rlimit[which];
557	if (limp->rlim_cur > alimp->rlim_max ||
558	    limp->rlim_max > alimp->rlim_max)
559		if ((error = suser_cred(td->td_ucred, PRISON_ROOT))) {
560			PROC_UNLOCK(p);
561			lim_free(newlim);
562			return (error);
563	}
564	if (limp->rlim_cur > limp->rlim_max)
565		limp->rlim_cur = limp->rlim_max;
566	lim_copy(newlim, oldlim);
567	alimp = &newlim->pl_rlimit[which];
568
569	switch (which) {
570
571	case RLIMIT_CPU:
572		mtx_lock_spin(&sched_lock);
573		p->p_cpulimit = limp->rlim_cur;
574		mtx_unlock_spin(&sched_lock);
575		break;
576	case RLIMIT_DATA:
577		if (limp->rlim_cur > maxdsiz)
578			limp->rlim_cur = maxdsiz;
579		if (limp->rlim_max > maxdsiz)
580			limp->rlim_max = maxdsiz;
581		break;
582
583	case RLIMIT_STACK:
584		if (limp->rlim_cur > maxssiz)
585			limp->rlim_cur = maxssiz;
586		if (limp->rlim_max > maxssiz)
587			limp->rlim_max = maxssiz;
588		oldssiz = alimp->rlim_cur;
589		break;
590
591	case RLIMIT_NOFILE:
592		if (limp->rlim_cur > maxfilesperproc)
593			limp->rlim_cur = maxfilesperproc;
594		if (limp->rlim_max > maxfilesperproc)
595			limp->rlim_max = maxfilesperproc;
596		break;
597
598	case RLIMIT_NPROC:
599		if (limp->rlim_cur > maxprocperuid)
600			limp->rlim_cur = maxprocperuid;
601		if (limp->rlim_max > maxprocperuid)
602			limp->rlim_max = maxprocperuid;
603		if (limp->rlim_cur < 1)
604			limp->rlim_cur = 1;
605		if (limp->rlim_max < 1)
606			limp->rlim_max = 1;
607		break;
608	}
609	*alimp = *limp;
610	p->p_limit = newlim;
611	PROC_UNLOCK(p);
612	lim_free(oldlim);
613
614	if (which == RLIMIT_STACK) {
615		/*
616		 * Stack is allocated to the max at exec time with only
617		 * "rlim_cur" bytes accessible.  If stack limit is going
618		 * up make more accessible, if going down make inaccessible.
619		 */
620		if (limp->rlim_cur != oldssiz) {
621			vm_offset_t addr;
622			vm_size_t size;
623			vm_prot_t prot;
624
625			mtx_lock(&Giant);
626			if (limp->rlim_cur > oldssiz) {
627				prot = p->p_sysent->sv_stackprot;
628				size = limp->rlim_cur - oldssiz;
629				addr = p->p_sysent->sv_usrstack -
630				    limp->rlim_cur;
631			} else {
632				prot = VM_PROT_NONE;
633				size = oldssiz - limp->rlim_cur;
634				addr = p->p_sysent->sv_usrstack -
635				    oldssiz;
636			}
637			addr = trunc_page(addr);
638			size = round_page(size);
639			(void) vm_map_protect(&p->p_vmspace->vm_map,
640					      addr, addr+size, prot, FALSE);
641			mtx_unlock(&Giant);
642		}
643	}
644	return (0);
645}
646
647#ifndef _SYS_SYSPROTO_H_
648struct __getrlimit_args {
649	u_int	which;
650	struct	rlimit *rlp;
651};
652#endif
653/*
654 * MPSAFE
655 */
656/* ARGSUSED */
657int
658getrlimit(td, uap)
659	struct thread *td;
660	register struct __getrlimit_args *uap;
661{
662	struct rlimit rlim;
663	struct proc *p;
664	int error;
665
666	if (uap->which >= RLIM_NLIMITS)
667		return (EINVAL);
668	p = td->td_proc;
669	PROC_LOCK(p);
670	lim_rlimit(p, uap->which, &rlim);
671	PROC_UNLOCK(p);
672	error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
673	return(error);
674}
675
676/*
677 * Transform the running time and tick information in proc p into user,
678 * system, and interrupt time usage.
679 */
680void
681calcru(p, up, sp, ip)
682	struct proc *p;
683	struct timeval *up;
684	struct timeval *sp;
685	struct timeval *ip;
686{
687	struct bintime bt;
688	struct timeval tv;
689	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
690	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
691
692	mtx_assert(&sched_lock, MA_OWNED);
693	/* XXX: why spl-protect ?  worst case is an off-by-one report */
694
695	ut = p->p_uticks;
696	st = p->p_sticks;
697	it = p->p_iticks;
698
699	tt = ut + st + it;
700	if (tt == 0) {
701		st = 1;
702		tt = 1;
703	}
704	if (p == curthread->td_proc) {
705		/*
706		 * Adjust for the current time slice.  This is actually fairly
707		 * important since the error here is on the order of a time
708		 * quantum, which is much greater than the sampling error.
709		 * XXXKSE use a different test due to threads on other
710		 * processors also being 'current'.
711		 */
712		binuptime(&bt);
713		bintime_sub(&bt, PCPU_PTR(switchtime));
714		bintime_add(&bt, &p->p_runtime);
715	} else
716		bt = p->p_runtime;
717	bintime2timeval(&bt, &tv);
718	tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
719	ptu = p->p_uu + p->p_su + p->p_iu;
720	if (tu < ptu || (int64_t)tu < 0) {
721		printf("calcru: negative time of %jd usec for pid %d (%s)\n",
722		    (intmax_t)tu, p->p_pid, p->p_comm);
723		tu = ptu;
724	}
725
726	/* Subdivide tu. */
727	uu = (tu * ut) / tt;
728	su = (tu * st) / tt;
729	iu = tu - uu - su;
730
731	/* Enforce monotonicity. */
732	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
733		if (uu < p->p_uu)
734			uu = p->p_uu;
735		else if (uu + p->p_su + p->p_iu > tu)
736			uu = tu - p->p_su - p->p_iu;
737		if (st == 0)
738			su = p->p_su;
739		else {
740			su = ((tu - uu) * st) / (st + it);
741			if (su < p->p_su)
742				su = p->p_su;
743			else if (uu + su + p->p_iu > tu)
744				su = tu - uu - p->p_iu;
745		}
746		KASSERT(uu + su + p->p_iu <= tu,
747		    ("calcru: monotonisation botch 1"));
748		iu = tu - uu - su;
749		KASSERT(iu >= p->p_iu,
750		    ("calcru: monotonisation botch 2"));
751	}
752	p->p_uu = uu;
753	p->p_su = su;
754	p->p_iu = iu;
755
756	up->tv_sec = uu / 1000000;
757	up->tv_usec = uu % 1000000;
758	sp->tv_sec = su / 1000000;
759	sp->tv_usec = su % 1000000;
760	if (ip != NULL) {
761		ip->tv_sec = iu / 1000000;
762		ip->tv_usec = iu % 1000000;
763	}
764}
765
766#ifndef _SYS_SYSPROTO_H_
767struct getrusage_args {
768	int	who;
769	struct	rusage *rusage;
770};
771#endif
772/*
773 * MPSAFE
774 */
775/* ARGSUSED */
776int
777getrusage(td, uap)
778	register struct thread *td;
779	register struct getrusage_args *uap;
780{
781	struct rusage ru;
782	struct proc *p;
783
784	p = td->td_proc;
785	switch (uap->who) {
786
787	case RUSAGE_SELF:
788		mtx_lock(&Giant);
789		mtx_lock_spin(&sched_lock);
790		calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
791		    NULL);
792		mtx_unlock_spin(&sched_lock);
793		ru = p->p_stats->p_ru;
794		mtx_unlock(&Giant);
795		break;
796
797	case RUSAGE_CHILDREN:
798		mtx_lock(&Giant);
799		ru = p->p_stats->p_cru;
800		mtx_unlock(&Giant);
801		break;
802
803	default:
804		return (EINVAL);
805		break;
806	}
807	return (copyout(&ru, uap->rusage, sizeof(struct rusage)));
808}
809
810void
811ruadd(ru, ru2)
812	register struct rusage *ru, *ru2;
813{
814	register long *ip, *ip2;
815	register int i;
816
817	timevaladd(&ru->ru_utime, &ru2->ru_utime);
818	timevaladd(&ru->ru_stime, &ru2->ru_stime);
819	if (ru->ru_maxrss < ru2->ru_maxrss)
820		ru->ru_maxrss = ru2->ru_maxrss;
821	ip = &ru->ru_first; ip2 = &ru2->ru_first;
822	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
823		*ip++ += *ip2++;
824}
825
826/*
827 * Allocate a new resource limits structure and initialize its
828 * reference count and mutex pointer.
829 */
830struct plimit *
831lim_alloc()
832{
833	struct plimit *limp;
834
835	limp = (struct plimit *)malloc(sizeof(struct plimit), M_PLIMIT,
836	    M_WAITOK);
837	limp->pl_refcnt = 1;
838	limp->pl_mtx = mtx_pool_alloc(mtxpool_sleep);
839	return (limp);
840}
841
842struct plimit *
843lim_hold(limp)
844	struct plimit *limp;
845{
846
847	LIM_LOCK(limp);
848	limp->pl_refcnt++;
849	LIM_UNLOCK(limp);
850	return (limp);
851}
852
853void
854lim_free(limp)
855	struct plimit *limp;
856{
857
858	LIM_LOCK(limp);
859	KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow"));
860	if (--limp->pl_refcnt == 0) {
861		LIM_UNLOCK(limp);
862		free((void *)limp, M_PLIMIT);
863		return;
864	}
865	LIM_UNLOCK(limp);
866}
867
868/*
869 * Make a copy of the plimit structure.
870 * We share these structures copy-on-write after fork.
871 */
872void
873lim_copy(dst, src)
874	struct plimit *dst, *src;
875{
876
877	KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit"));
878	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
879}
880
881/*
882 * Return the hard limit for a particular system resource.  The
883 * which parameter specifies the index into the rlimit array.
884 */
885rlim_t
886lim_max(struct proc *p, int which)
887{
888	struct rlimit rl;
889
890	lim_rlimit(p, which, &rl);
891	return (rl.rlim_max);
892}
893
894/*
895 * Return the current (soft) limit for a particular system resource.
896 * The which parameter which specifies the index into the rlimit array
897 */
898rlim_t
899lim_cur(struct proc *p, int which)
900{
901	struct rlimit rl;
902
903	lim_rlimit(p, which, &rl);
904	return (rl.rlim_cur);
905}
906
907/*
908 * Return a copy of the entire rlimit structure for the system limit
909 * specified by 'which' in the rlimit structure pointed to by 'rlp'.
910 */
911void
912lim_rlimit(struct proc *p, int which, struct rlimit *rlp)
913{
914
915	PROC_LOCK_ASSERT(p, MA_OWNED);
916	KASSERT(which >= 0 && which < RLIM_NLIMITS,
917	    ("request for invalid resource limit"));
918	*rlp = p->p_limit->pl_rlimit[which];
919}
920
921/*
922 * Find the uidinfo structure for a uid.  This structure is used to
923 * track the total resource consumption (process count, socket buffer
924 * size, etc.) for the uid and impose limits.
925 */
926void
927uihashinit()
928{
929
930	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
931	mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF);
932}
933
934/*
935 * Look up a uidinfo struct for the parameter uid.
936 * uihashtbl_mtx must be locked.
937 */
938static struct uidinfo *
939uilookup(uid)
940	uid_t uid;
941{
942	struct uihashhead *uipp;
943	struct uidinfo *uip;
944
945	mtx_assert(&uihashtbl_mtx, MA_OWNED);
946	uipp = UIHASH(uid);
947	LIST_FOREACH(uip, uipp, ui_hash)
948		if (uip->ui_uid == uid)
949			break;
950
951	return (uip);
952}
953
954/*
955 * Find or allocate a struct uidinfo for a particular uid.
956 * Increase refcount on uidinfo struct returned.
957 * uifree() should be called on a struct uidinfo when released.
958 */
959struct uidinfo *
960uifind(uid)
961	uid_t uid;
962{
963	struct uidinfo *old_uip, *uip;
964
965	mtx_lock(&uihashtbl_mtx);
966	uip = uilookup(uid);
967	if (uip == NULL) {
968		mtx_unlock(&uihashtbl_mtx);
969		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
970		mtx_lock(&uihashtbl_mtx);
971		/*
972		 * There's a chance someone created our uidinfo while we
973		 * were in malloc and not holding the lock, so we have to
974		 * make sure we don't insert a duplicate uidinfo.
975		 */
976		if ((old_uip = uilookup(uid)) != NULL) {
977			/* Someone else beat us to it. */
978			free(uip, M_UIDINFO);
979			uip = old_uip;
980		} else {
981			uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep);
982			uip->ui_uid = uid;
983			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
984		}
985	}
986	uihold(uip);
987	mtx_unlock(&uihashtbl_mtx);
988	return (uip);
989}
990
991/*
992 * Place another refcount on a uidinfo struct.
993 */
994void
995uihold(uip)
996	struct uidinfo *uip;
997{
998
999	UIDINFO_LOCK(uip);
1000	uip->ui_ref++;
1001	UIDINFO_UNLOCK(uip);
1002}
1003
1004/*-
1005 * Since uidinfo structs have a long lifetime, we use an
1006 * opportunistic refcounting scheme to avoid locking the lookup hash
1007 * for each release.
1008 *
1009 * If the refcount hits 0, we need to free the structure,
1010 * which means we need to lock the hash.
1011 * Optimal case:
1012 *   After locking the struct and lowering the refcount, if we find
1013 *   that we don't need to free, simply unlock and return.
1014 * Suboptimal case:
1015 *   If refcount lowering results in need to free, bump the count
1016 *   back up, loose the lock and aquire the locks in the proper
1017 *   order to try again.
1018 */
1019void
1020uifree(uip)
1021	struct uidinfo *uip;
1022{
1023
1024	/* Prepare for optimal case. */
1025	UIDINFO_LOCK(uip);
1026
1027	if (--uip->ui_ref != 0) {
1028		UIDINFO_UNLOCK(uip);
1029		return;
1030	}
1031
1032	/* Prepare for suboptimal case. */
1033	uip->ui_ref++;
1034	UIDINFO_UNLOCK(uip);
1035	mtx_lock(&uihashtbl_mtx);
1036	UIDINFO_LOCK(uip);
1037
1038	/*
1039	 * We must subtract one from the count again because we backed out
1040	 * our initial subtraction before dropping the lock.
1041	 * Since another thread may have added a reference after we dropped the
1042	 * initial lock we have to test for zero again.
1043	 */
1044	if (--uip->ui_ref == 0) {
1045		LIST_REMOVE(uip, ui_hash);
1046		mtx_unlock(&uihashtbl_mtx);
1047		if (uip->ui_sbsize != 0)
1048			printf("freeing uidinfo: uid = %d, sbsize = %jd\n",
1049			    uip->ui_uid, (intmax_t)uip->ui_sbsize);
1050		if (uip->ui_proccnt != 0)
1051			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1052			    uip->ui_uid, uip->ui_proccnt);
1053		UIDINFO_UNLOCK(uip);
1054		FREE(uip, M_UIDINFO);
1055		return;
1056	}
1057
1058	mtx_unlock(&uihashtbl_mtx);
1059	UIDINFO_UNLOCK(uip);
1060}
1061
1062/*
1063 * Change the count associated with number of processes
1064 * a given user is using.  When 'max' is 0, don't enforce a limit
1065 */
1066int
1067chgproccnt(uip, diff, max)
1068	struct	uidinfo	*uip;
1069	int	diff;
1070	int	max;
1071{
1072
1073	UIDINFO_LOCK(uip);
1074	/* Don't allow them to exceed max, but allow subtraction. */
1075	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1076		UIDINFO_UNLOCK(uip);
1077		return (0);
1078	}
1079	uip->ui_proccnt += diff;
1080	if (uip->ui_proccnt < 0)
1081		printf("negative proccnt for uid = %d\n", uip->ui_uid);
1082	UIDINFO_UNLOCK(uip);
1083	return (1);
1084}
1085
1086/*
1087 * Change the total socket buffer size a user has used.
1088 */
1089int
1090chgsbsize(uip, hiwat, to, max)
1091	struct	uidinfo	*uip;
1092	u_int  *hiwat;
1093	u_int	to;
1094	rlim_t	max;
1095{
1096	rlim_t new;
1097	int s;
1098
1099	s = splnet();
1100	UIDINFO_LOCK(uip);
1101	new = uip->ui_sbsize + to - *hiwat;
1102	/* Don't allow them to exceed max, but allow subtraction */
1103	if (to > *hiwat && new > max) {
1104		splx(s);
1105		UIDINFO_UNLOCK(uip);
1106		return (0);
1107	}
1108	uip->ui_sbsize = new;
1109	*hiwat = to;
1110	if (uip->ui_sbsize < 0)
1111		printf("negative sbsize for uid = %d\n", uip->ui_uid);
1112	splx(s);
1113	UIDINFO_UNLOCK(uip);
1114	return (1);
1115}
1116