kern_resource.c revision 113921
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/kern_resource.c 113921 2003-04-23 18:48:55Z jhb $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/file.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/malloc.h>
51#include <sys/mutex.h>
52#include <sys/proc.h>
53#include <sys/resourcevar.h>
54#include <sys/sched.h>
55#include <sys/sx.h>
56#include <sys/sysent.h>
57#include <sys/time.h>
58
59#include <vm/vm.h>
60#include <vm/vm_param.h>
61#include <vm/pmap.h>
62#include <vm/vm_map.h>
63
64static int donice(struct thread *td, struct proc *chgp, int n);
65
66static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
67#define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
68static struct mtx uihashtbl_mtx;
69static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
70static u_long uihash;		/* size of hash table - 1 */
71
72static struct uidinfo	*uilookup(uid_t uid);
73
74/*
75 * Resource controls and accounting.
76 */
77
78#ifndef _SYS_SYSPROTO_H_
79struct getpriority_args {
80	int	which;
81	int	who;
82};
83#endif
84/*
85 * MPSAFE
86 */
87int
88getpriority(td, uap)
89	struct thread *td;
90	register struct getpriority_args *uap;
91{
92	struct proc *p;
93	int low = PRIO_MAX + 1;
94	int error = 0;
95	struct ksegrp *kg;
96
97	switch (uap->which) {
98	case PRIO_PROCESS:
99		if (uap->who == 0)
100			low = td->td_ksegrp->kg_nice;
101		else {
102			p = pfind(uap->who);
103			if (p == NULL)
104				break;
105			if (p_cansee(td, p) == 0) {
106				FOREACH_KSEGRP_IN_PROC(p, kg) {
107					if (kg->kg_nice < low)
108						low = kg->kg_nice;
109				}
110			}
111			PROC_UNLOCK(p);
112		}
113		break;
114
115	case PRIO_PGRP: {
116		register struct pgrp *pg;
117
118		sx_slock(&proctree_lock);
119		if (uap->who == 0) {
120			pg = td->td_proc->p_pgrp;
121			PGRP_LOCK(pg);
122		} else {
123			pg = pgfind(uap->who);
124			if (pg == NULL) {
125				sx_sunlock(&proctree_lock);
126				break;
127			}
128		}
129		sx_sunlock(&proctree_lock);
130		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
131			PROC_LOCK(p);
132			if (!p_cansee(td, p)) {
133				FOREACH_KSEGRP_IN_PROC(p, kg) {
134					if (kg->kg_nice < low)
135						low = kg->kg_nice;
136				}
137			}
138			PROC_UNLOCK(p);
139		}
140		PGRP_UNLOCK(pg);
141		break;
142	}
143
144	case PRIO_USER:
145		if (uap->who == 0)
146			uap->who = td->td_ucred->cr_uid;
147		sx_slock(&allproc_lock);
148		LIST_FOREACH(p, &allproc, p_list) {
149			PROC_LOCK(p);
150			if (!p_cansee(td, p) &&
151			    p->p_ucred->cr_uid == uap->who) {
152				FOREACH_KSEGRP_IN_PROC(p, kg) {
153					if (kg->kg_nice < low)
154						low = kg->kg_nice;
155				}
156			}
157			PROC_UNLOCK(p);
158		}
159		sx_sunlock(&allproc_lock);
160		break;
161
162	default:
163		error = EINVAL;
164		break;
165	}
166	if (low == PRIO_MAX + 1 && error == 0)
167		error = ESRCH;
168	td->td_retval[0] = low;
169	return (error);
170}
171
172#ifndef _SYS_SYSPROTO_H_
173struct setpriority_args {
174	int	which;
175	int	who;
176	int	prio;
177};
178#endif
179/*
180 * MPSAFE
181 */
182/* ARGSUSED */
183int
184setpriority(td, uap)
185	struct thread *td;
186	register struct setpriority_args *uap;
187{
188	struct proc *curp = td->td_proc;
189	register struct proc *p;
190	int found = 0, error = 0;
191
192	switch (uap->which) {
193	case PRIO_PROCESS:
194		if (uap->who == 0) {
195			PROC_LOCK(curp);
196			error = donice(td, curp, uap->prio);
197			PROC_UNLOCK(curp);
198		} else {
199			p = pfind(uap->who);
200			if (p == 0)
201				break;
202			if (p_cansee(td, p) == 0)
203				error = donice(td, p, uap->prio);
204			PROC_UNLOCK(p);
205		}
206		found++;
207		break;
208
209	case PRIO_PGRP: {
210		register struct pgrp *pg;
211
212		sx_slock(&proctree_lock);
213		if (uap->who == 0) {
214			pg = curp->p_pgrp;
215			PGRP_LOCK(pg);
216		} else {
217			pg = pgfind(uap->who);
218			if (pg == NULL) {
219				sx_sunlock(&proctree_lock);
220				break;
221			}
222		}
223		sx_sunlock(&proctree_lock);
224		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
225			PROC_LOCK(p);
226			if (!p_cansee(td, p)) {
227				error = donice(td, p, uap->prio);
228				found++;
229			}
230			PROC_UNLOCK(p);
231		}
232		PGRP_UNLOCK(pg);
233		break;
234	}
235
236	case PRIO_USER:
237		if (uap->who == 0)
238			uap->who = td->td_ucred->cr_uid;
239		sx_slock(&allproc_lock);
240		FOREACH_PROC_IN_SYSTEM(p) {
241			PROC_LOCK(p);
242			if (p->p_ucred->cr_uid == uap->who &&
243			    !p_cansee(td, p)) {
244				error = donice(td, p, uap->prio);
245				found++;
246			}
247			PROC_UNLOCK(p);
248		}
249		sx_sunlock(&allproc_lock);
250		break;
251
252	default:
253		error = EINVAL;
254		break;
255	}
256	if (found == 0 && error == 0)
257		error = ESRCH;
258	return (error);
259}
260
261/*
262 * Set "nice" for a process. Doesn't really understand threaded processes well
263 * but does try. Has the unfortunate side effect of making all the NICE
264 * values for a process's ksegrps the same.. This suggests that
265 * NICE valuse should be stored as a process nice and deltas for the ksegrps.
266 * (but not yet).
267 */
268static int
269donice(struct thread *td, struct proc *p, int n)
270{
271	int	error;
272	int low = PRIO_MAX + 1;
273	struct ksegrp *kg;
274
275	PROC_LOCK_ASSERT(p, MA_OWNED);
276	if ((error = p_cansched(td, p)))
277		return (error);
278	if (n > PRIO_MAX)
279		n = PRIO_MAX;
280	if (n < PRIO_MIN)
281		n = PRIO_MIN;
282	/*
283	 * Only allow nicing if to more than the lowest nice.
284	 * e.g.  nices of 4,3,2  allow nice to 3 but not 1
285	 */
286	FOREACH_KSEGRP_IN_PROC(p, kg) {
287		if (kg->kg_nice < low)
288			low = kg->kg_nice;
289	}
290 	if (n < low && suser(td))
291		return (EACCES);
292	mtx_lock_spin(&sched_lock);
293	FOREACH_KSEGRP_IN_PROC(p, kg) {
294		sched_nice(kg, n);
295	}
296	mtx_unlock_spin(&sched_lock);
297	return (0);
298}
299
300/* rtprio system call */
301#ifndef _SYS_SYSPROTO_H_
302struct rtprio_args {
303	int		function;
304	pid_t		pid;
305	struct rtprio	*rtp;
306};
307#endif
308
309/*
310 * Set realtime priority
311 */
312
313/*
314 * MPSAFE
315 */
316/* ARGSUSED */
317int
318rtprio(td, uap)
319	struct thread *td;
320	register struct rtprio_args *uap;
321{
322	struct proc *curp = td->td_proc;
323	register struct proc *p;
324	struct rtprio rtp;
325	int error, cierror = 0;
326
327	/* Perform copyin before acquiring locks if needed. */
328	if (uap->function == RTP_SET)
329		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
330
331	if (uap->pid == 0) {
332		p = curp;
333		PROC_LOCK(p);
334	} else {
335		p = pfind(uap->pid);
336		if (p == NULL)
337			return (ESRCH);
338	}
339
340	switch (uap->function) {
341	case RTP_LOOKUP:
342		if ((error = p_cansee(td, p)))
343			break;
344		mtx_lock_spin(&sched_lock);
345		pri_to_rtp(FIRST_KSEGRP_IN_PROC(p), &rtp);
346		mtx_unlock_spin(&sched_lock);
347		PROC_UNLOCK(p);
348		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
349	case RTP_SET:
350		if ((error = p_cansched(td, p)) || (error = cierror))
351			break;
352		/* disallow setting rtprio in most cases if not superuser */
353		if (suser(td) != 0) {
354			/* can't set someone else's */
355			if (uap->pid) {
356				error = EPERM;
357				break;
358			}
359			/* can't set realtime priority */
360/*
361 * Realtime priority has to be restricted for reasons which should be
362 * obvious. However, for idle priority, there is a potential for
363 * system deadlock if an idleprio process gains a lock on a resource
364 * that other processes need (and the idleprio process can't run
365 * due to a CPU-bound normal process). Fix me! XXX
366 */
367#if 0
368 			if (RTP_PRIO_IS_REALTIME(rtp.type))
369#endif
370			if (rtp.type != RTP_PRIO_NORMAL) {
371				error = EPERM;
372				break;
373			}
374		}
375		mtx_lock_spin(&sched_lock);
376		error = rtp_to_pri(&rtp, FIRST_KSEGRP_IN_PROC(p));
377		mtx_unlock_spin(&sched_lock);
378		break;
379	default:
380		error = EINVAL;
381		break;
382	}
383	PROC_UNLOCK(p);
384	return (error);
385}
386
387int
388rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
389{
390
391	mtx_assert(&sched_lock, MA_OWNED);
392	if (rtp->prio > RTP_PRIO_MAX)
393		return (EINVAL);
394	switch (RTP_PRIO_BASE(rtp->type)) {
395	case RTP_PRIO_REALTIME:
396		kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
397		break;
398	case RTP_PRIO_NORMAL:
399		kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
400		break;
401	case RTP_PRIO_IDLE:
402		kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
403		break;
404	default:
405		return (EINVAL);
406	}
407	sched_class(kg, rtp->type);
408	if (curthread->td_ksegrp == kg) {
409		curthread->td_base_pri = kg->kg_user_pri;
410		curthread->td_priority = kg->kg_user_pri; /* XXX dubious */
411	}
412	return (0);
413}
414
415void
416pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
417{
418
419	mtx_assert(&sched_lock, MA_OWNED);
420	switch (PRI_BASE(kg->kg_pri_class)) {
421	case PRI_REALTIME:
422		rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
423		break;
424	case PRI_TIMESHARE:
425		rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
426		break;
427	case PRI_IDLE:
428		rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
429		break;
430	default:
431		break;
432	}
433	rtp->type = kg->kg_pri_class;
434}
435
436#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
437#ifndef _SYS_SYSPROTO_H_
438struct osetrlimit_args {
439	u_int	which;
440	struct	orlimit *rlp;
441};
442#endif
443/*
444 * MPSAFE
445 */
446/* ARGSUSED */
447int
448osetrlimit(td, uap)
449	struct thread *td;
450	register struct osetrlimit_args *uap;
451{
452	struct orlimit olim;
453	struct rlimit lim;
454	int error;
455
456	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
457		return (error);
458	lim.rlim_cur = olim.rlim_cur;
459	lim.rlim_max = olim.rlim_max;
460	mtx_lock(&Giant);
461	error = dosetrlimit(td, uap->which, &lim);
462	mtx_unlock(&Giant);
463	return (error);
464}
465
466#ifndef _SYS_SYSPROTO_H_
467struct ogetrlimit_args {
468	u_int	which;
469	struct	orlimit *rlp;
470};
471#endif
472/*
473 * MPSAFE
474 */
475/* ARGSUSED */
476int
477ogetrlimit(td, uap)
478	struct thread *td;
479	register struct ogetrlimit_args *uap;
480{
481	struct proc *p = td->td_proc;
482	struct orlimit olim;
483	int error;
484
485	if (uap->which >= RLIM_NLIMITS)
486		return (EINVAL);
487	mtx_lock(&Giant);
488	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
489	if (olim.rlim_cur == -1)
490		olim.rlim_cur = 0x7fffffff;
491	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
492	if (olim.rlim_max == -1)
493		olim.rlim_max = 0x7fffffff;
494	error = copyout(&olim, uap->rlp, sizeof(olim));
495	mtx_unlock(&Giant);
496	return (error);
497}
498#endif /* COMPAT_43 || COMPAT_SUNOS */
499
500#ifndef _SYS_SYSPROTO_H_
501struct __setrlimit_args {
502	u_int	which;
503	struct	rlimit *rlp;
504};
505#endif
506/*
507 * MPSAFE
508 */
509/* ARGSUSED */
510int
511setrlimit(td, uap)
512	struct thread *td;
513	register struct __setrlimit_args *uap;
514{
515	struct rlimit alim;
516	int error;
517
518	if ((error = copyin(uap->rlp, &alim, sizeof (struct rlimit))))
519		return (error);
520	mtx_lock(&Giant);
521	error = dosetrlimit(td, uap->which, &alim);
522	mtx_unlock(&Giant);
523	return (error);
524}
525
526int
527dosetrlimit(td, which, limp)
528	struct thread *td;
529	u_int which;
530	struct rlimit *limp;
531{
532	struct proc *p = td->td_proc;
533	register struct rlimit *alimp;
534	int error;
535
536	GIANT_REQUIRED;
537
538	if (which >= RLIM_NLIMITS)
539		return (EINVAL);
540	alimp = &p->p_rlimit[which];
541
542	/*
543	 * Preserve historical bugs by treating negative limits as unsigned.
544	 */
545	if (limp->rlim_cur < 0)
546		limp->rlim_cur = RLIM_INFINITY;
547	if (limp->rlim_max < 0)
548		limp->rlim_max = RLIM_INFINITY;
549
550	if (limp->rlim_cur > alimp->rlim_max ||
551	    limp->rlim_max > alimp->rlim_max)
552		if ((error = suser_cred(td->td_ucred, PRISON_ROOT)))
553			return (error);
554	if (limp->rlim_cur > limp->rlim_max)
555		limp->rlim_cur = limp->rlim_max;
556	if (p->p_limit->p_refcnt > 1) {
557		p->p_limit->p_refcnt--;
558		p->p_limit = limcopy(p->p_limit);
559		alimp = &p->p_rlimit[which];
560	}
561
562	switch (which) {
563
564	case RLIMIT_CPU:
565		mtx_lock_spin(&sched_lock);
566		p->p_cpulimit = limp->rlim_cur;
567		mtx_unlock_spin(&sched_lock);
568		break;
569	case RLIMIT_DATA:
570		if (limp->rlim_cur > maxdsiz)
571			limp->rlim_cur = maxdsiz;
572		if (limp->rlim_max > maxdsiz)
573			limp->rlim_max = maxdsiz;
574		break;
575
576	case RLIMIT_STACK:
577		if (limp->rlim_cur > maxssiz)
578			limp->rlim_cur = maxssiz;
579		if (limp->rlim_max > maxssiz)
580			limp->rlim_max = maxssiz;
581		/*
582		 * Stack is allocated to the max at exec time with only
583		 * "rlim_cur" bytes accessible.  If stack limit is going
584		 * up make more accessible, if going down make inaccessible.
585		 */
586		if (limp->rlim_cur != alimp->rlim_cur) {
587			vm_offset_t addr;
588			vm_size_t size;
589			vm_prot_t prot;
590
591			if (limp->rlim_cur > alimp->rlim_cur) {
592				prot = p->p_sysent->sv_stackprot;
593				size = limp->rlim_cur - alimp->rlim_cur;
594				addr = p->p_sysent->sv_usrstack -
595				    limp->rlim_cur;
596			} else {
597				prot = VM_PROT_NONE;
598				size = alimp->rlim_cur - limp->rlim_cur;
599				addr = p->p_sysent->sv_usrstack -
600				    alimp->rlim_cur;
601			}
602			addr = trunc_page(addr);
603			size = round_page(size);
604			(void) vm_map_protect(&p->p_vmspace->vm_map,
605					      addr, addr+size, prot, FALSE);
606		}
607		break;
608
609	case RLIMIT_NOFILE:
610		if (limp->rlim_cur > maxfilesperproc)
611			limp->rlim_cur = maxfilesperproc;
612		if (limp->rlim_max > maxfilesperproc)
613			limp->rlim_max = maxfilesperproc;
614		break;
615
616	case RLIMIT_NPROC:
617		if (limp->rlim_cur > maxprocperuid)
618			limp->rlim_cur = maxprocperuid;
619		if (limp->rlim_max > maxprocperuid)
620			limp->rlim_max = maxprocperuid;
621		if (limp->rlim_cur < 1)
622			limp->rlim_cur = 1;
623		if (limp->rlim_max < 1)
624			limp->rlim_max = 1;
625		break;
626	}
627	*alimp = *limp;
628	return (0);
629}
630
631#ifndef _SYS_SYSPROTO_H_
632struct __getrlimit_args {
633	u_int	which;
634	struct	rlimit *rlp;
635};
636#endif
637/*
638 * MPSAFE
639 */
640/* ARGSUSED */
641int
642getrlimit(td, uap)
643	struct thread *td;
644	register struct __getrlimit_args *uap;
645{
646	int error;
647	struct proc *p = td->td_proc;
648
649	if (uap->which >= RLIM_NLIMITS)
650		return (EINVAL);
651	mtx_lock(&Giant);
652	error = copyout(&p->p_rlimit[uap->which], uap->rlp,
653		    sizeof (struct rlimit));
654	mtx_unlock(&Giant);
655	return(error);
656}
657
658/*
659 * Transform the running time and tick information in proc p into user,
660 * system, and interrupt time usage.
661 */
662void
663calcru(p, up, sp, ip)
664	struct proc *p;
665	struct timeval *up;
666	struct timeval *sp;
667	struct timeval *ip;
668{
669	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
670	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
671	struct timeval tv;
672	struct bintime bt;
673
674	mtx_assert(&sched_lock, MA_OWNED);
675	/* XXX: why spl-protect ?  worst case is an off-by-one report */
676
677	ut = p->p_uticks;
678	st = p->p_sticks;
679	it = p->p_iticks;
680
681	tt = ut + st + it;
682	if (tt == 0) {
683		st = 1;
684		tt = 1;
685	}
686
687	if (curthread->td_proc == p) {
688		/*
689		 * Adjust for the current time slice.  This is actually fairly
690		 * important since the error here is on the order of a time
691		 * quantum, which is much greater than the sampling error.
692		 * XXXKSE use a different test due to threads on other
693		 * processors also being 'current'.
694		 */
695
696		binuptime(&bt);
697		bintime_sub(&bt, PCPU_PTR(switchtime));
698		bintime_add(&bt, &p->p_runtime);
699	} else {
700		bt = p->p_runtime;
701	}
702	bintime2timeval(&bt, &tv);
703	tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
704	ptu = p->p_uu + p->p_su + p->p_iu;
705	if (tu < ptu || (int64_t)tu < 0) {
706		/* XXX no %qd in kernel.  Truncate. */
707		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
708		       (long)tu, p->p_pid, p->p_comm);
709		tu = ptu;
710	}
711
712	/* Subdivide tu. */
713	uu = (tu * ut) / tt;
714	su = (tu * st) / tt;
715	iu = tu - uu - su;
716
717	/* Enforce monotonicity. */
718	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
719		if (uu < p->p_uu)
720			uu = p->p_uu;
721		else if (uu + p->p_su + p->p_iu > tu)
722			uu = tu - p->p_su - p->p_iu;
723		if (st == 0)
724			su = p->p_su;
725		else {
726			su = ((tu - uu) * st) / (st + it);
727			if (su < p->p_su)
728				su = p->p_su;
729			else if (uu + su + p->p_iu > tu)
730				su = tu - uu - p->p_iu;
731		}
732		KASSERT(uu + su + p->p_iu <= tu,
733		    	("calcru: monotonisation botch 1"));
734		iu = tu - uu - su;
735		KASSERT(iu >= p->p_iu,
736		    	("calcru: monotonisation botch 2"));
737	}
738	p->p_uu = uu;
739	p->p_su = su;
740	p->p_iu = iu;
741
742	up->tv_sec = uu / 1000000;
743	up->tv_usec = uu % 1000000;
744	sp->tv_sec = su / 1000000;
745	sp->tv_usec = su % 1000000;
746	if (ip != NULL) {
747		ip->tv_sec = iu / 1000000;
748		ip->tv_usec = iu % 1000000;
749	}
750}
751
752#ifndef _SYS_SYSPROTO_H_
753struct getrusage_args {
754	int	who;
755	struct	rusage *rusage;
756};
757#endif
758/*
759 * MPSAFE
760 */
761/* ARGSUSED */
762int
763getrusage(td, uap)
764	register struct thread *td;
765	register struct getrusage_args *uap;
766{
767	struct proc *p = td->td_proc;
768	register struct rusage *rup;
769	int error = 0;
770
771	mtx_lock(&Giant);
772
773	switch (uap->who) {
774	case RUSAGE_SELF:
775		rup = &p->p_stats->p_ru;
776		mtx_lock_spin(&sched_lock);
777		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
778		mtx_unlock_spin(&sched_lock);
779		break;
780
781	case RUSAGE_CHILDREN:
782		rup = &p->p_stats->p_cru;
783		break;
784
785	default:
786		rup = NULL;
787		error = EINVAL;
788		break;
789	}
790	mtx_unlock(&Giant);
791	if (error == 0) {
792		/* XXX Unlocked access to p_stats->p_ru or p_cru. */
793		error = copyout(rup, uap->rusage, sizeof (struct rusage));
794	}
795	return(error);
796}
797
798void
799ruadd(ru, ru2)
800	register struct rusage *ru, *ru2;
801{
802	register long *ip, *ip2;
803	register int i;
804
805	timevaladd(&ru->ru_utime, &ru2->ru_utime);
806	timevaladd(&ru->ru_stime, &ru2->ru_stime);
807	if (ru->ru_maxrss < ru2->ru_maxrss)
808		ru->ru_maxrss = ru2->ru_maxrss;
809	ip = &ru->ru_first; ip2 = &ru2->ru_first;
810	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
811		*ip++ += *ip2++;
812}
813
814/*
815 * Make a copy of the plimit structure.
816 * We share these structures copy-on-write after fork,
817 * and copy when a limit is changed.
818 */
819struct plimit *
820limcopy(lim)
821	struct plimit *lim;
822{
823	register struct plimit *copy;
824
825	MALLOC(copy, struct plimit *, sizeof(struct plimit),
826	    M_SUBPROC, M_WAITOK);
827	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
828	copy->p_refcnt = 1;
829	return (copy);
830}
831
832/*
833 * Find the uidinfo structure for a uid.  This structure is used to
834 * track the total resource consumption (process count, socket buffer
835 * size, etc.) for the uid and impose limits.
836 */
837void
838uihashinit()
839{
840
841	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
842	mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF);
843}
844
845/*
846 * lookup a uidinfo struct for the parameter uid.
847 * uihashtbl_mtx must be locked.
848 */
849static struct uidinfo *
850uilookup(uid)
851	uid_t uid;
852{
853	struct	uihashhead *uipp;
854	struct	uidinfo *uip;
855
856	mtx_assert(&uihashtbl_mtx, MA_OWNED);
857	uipp = UIHASH(uid);
858	LIST_FOREACH(uip, uipp, ui_hash)
859		if (uip->ui_uid == uid)
860			break;
861
862	return (uip);
863}
864
865/*
866 * Find or allocate a struct uidinfo for a particular uid.
867 * Increase refcount on uidinfo struct returned.
868 * uifree() should be called on a struct uidinfo when released.
869 */
870struct uidinfo *
871uifind(uid)
872	uid_t uid;
873{
874	struct	uidinfo *uip;
875
876	mtx_lock(&uihashtbl_mtx);
877	uip = uilookup(uid);
878	if (uip == NULL) {
879		struct  uidinfo *old_uip;
880
881		mtx_unlock(&uihashtbl_mtx);
882		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
883		mtx_lock(&uihashtbl_mtx);
884		/*
885		 * There's a chance someone created our uidinfo while we
886		 * were in malloc and not holding the lock, so we have to
887		 * make sure we don't insert a duplicate uidinfo
888		 */
889		if ((old_uip = uilookup(uid)) != NULL) {
890			/* someone else beat us to it */
891			free(uip, M_UIDINFO);
892			uip = old_uip;
893		} else {
894			uip->ui_mtxp = mtx_pool_alloc();
895			uip->ui_uid = uid;
896			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
897		}
898	}
899	uihold(uip);
900	mtx_unlock(&uihashtbl_mtx);
901	return (uip);
902}
903
904/*
905 * Place another refcount on a uidinfo struct.
906 */
907void
908uihold(uip)
909	struct uidinfo *uip;
910{
911
912	UIDINFO_LOCK(uip);
913	uip->ui_ref++;
914	UIDINFO_UNLOCK(uip);
915}
916
917/*-
918 * Since uidinfo structs have a long lifetime, we use an
919 * opportunistic refcounting scheme to avoid locking the lookup hash
920 * for each release.
921 *
922 * If the refcount hits 0, we need to free the structure,
923 * which means we need to lock the hash.
924 * Optimal case:
925 *   After locking the struct and lowering the refcount, if we find
926 *   that we don't need to free, simply unlock and return.
927 * Suboptimal case:
928 *   If refcount lowering results in need to free, bump the count
929 *   back up, loose the lock and aquire the locks in the proper
930 *   order to try again.
931 */
932void
933uifree(uip)
934	struct uidinfo *uip;
935{
936
937	/* Prepare for optimal case. */
938	UIDINFO_LOCK(uip);
939
940	if (--uip->ui_ref != 0) {
941		UIDINFO_UNLOCK(uip);
942		return;
943	}
944
945	/* Prepare for suboptimal case. */
946	uip->ui_ref++;
947	UIDINFO_UNLOCK(uip);
948	mtx_lock(&uihashtbl_mtx);
949	UIDINFO_LOCK(uip);
950
951	/*
952	 * We must subtract one from the count again because we backed out
953	 * our initial subtraction before dropping the lock.
954	 * Since another thread may have added a reference after we dropped the
955	 * initial lock we have to test for zero again.
956	 */
957	if (--uip->ui_ref == 0) {
958		LIST_REMOVE(uip, ui_hash);
959		mtx_unlock(&uihashtbl_mtx);
960		if (uip->ui_sbsize != 0)
961			/* XXX no %qd in kernel.  Truncate. */
962			printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
963			    uip->ui_uid, (long)uip->ui_sbsize);
964		if (uip->ui_proccnt != 0)
965			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
966			    uip->ui_uid, uip->ui_proccnt);
967		UIDINFO_UNLOCK(uip);
968		FREE(uip, M_UIDINFO);
969		return;
970	}
971
972	mtx_unlock(&uihashtbl_mtx);
973	UIDINFO_UNLOCK(uip);
974}
975
976/*
977 * Change the count associated with number of processes
978 * a given user is using.  When 'max' is 0, don't enforce a limit
979 */
980int
981chgproccnt(uip, diff, max)
982	struct	uidinfo	*uip;
983	int	diff;
984	int	max;
985{
986
987	UIDINFO_LOCK(uip);
988	/* don't allow them to exceed max, but allow subtraction */
989	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
990		UIDINFO_UNLOCK(uip);
991		return (0);
992	}
993	uip->ui_proccnt += diff;
994	if (uip->ui_proccnt < 0)
995		printf("negative proccnt for uid = %d\n", uip->ui_uid);
996	UIDINFO_UNLOCK(uip);
997	return (1);
998}
999
1000/*
1001 * Change the total socket buffer size a user has used.
1002 */
1003int
1004chgsbsize(uip, hiwat, to, max)
1005	struct	uidinfo	*uip;
1006	u_int  *hiwat;
1007	u_int	to;
1008	rlim_t	max;
1009{
1010	rlim_t new;
1011	int s;
1012
1013	s = splnet();
1014	UIDINFO_LOCK(uip);
1015	new = uip->ui_sbsize + to - *hiwat;
1016	/* don't allow them to exceed max, but allow subtraction */
1017	if (to > *hiwat && new > max) {
1018		splx(s);
1019		UIDINFO_UNLOCK(uip);
1020		return (0);
1021	}
1022	uip->ui_sbsize = new;
1023	*hiwat = to;
1024	if (uip->ui_sbsize < 0)
1025		printf("negative sbsize for uid = %d\n", uip->ui_uid);
1026	splx(s);
1027	UIDINFO_UNLOCK(uip);
1028	return (1);
1029}
1030