kern_resource.c revision 79335
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/kern_resource.c 79335 2001-07-05 17:10:46Z rwatson $
40 */
41
42#include "opt_compat.h"
43#include "opt_rlimit.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/file.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mutex.h>
53#include <sys/proc.h>
54#include <sys/resourcevar.h>
55#include <sys/sx.h>
56#include <sys/time.h>
57
58#include <vm/vm.h>
59#include <vm/vm_param.h>
60#include <vm/pmap.h>
61#include <vm/vm_map.h>
62
63static int donice __P((struct proc *curp, struct proc *chgp, int n));
64/* dosetrlimit non-static:  Needed by SysVR4 emulator */
65int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
66
67static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
68#define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
69static struct mtx uihashtbl_mtx;
70static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
71static u_long uihash;		/* size of hash table - 1 */
72
73static struct uidinfo	*uilookup __P((uid_t uid));
74
75/*
76 * Resource controls and accounting.
77 */
78
79#ifndef _SYS_SYSPROTO_H_
80struct getpriority_args {
81	int	which;
82	int	who;
83};
84#endif
85int
86getpriority(curp, uap)
87	struct proc *curp;
88	register struct getpriority_args *uap;
89{
90	register struct proc *p;
91	register int low = PRIO_MAX + 1;
92
93	switch (uap->which) {
94
95	case PRIO_PROCESS:
96		if (uap->who == 0)
97			low = curp->p_nice;
98		else {
99			p = pfind(uap->who);
100			if (p == NULL)
101				break;
102			if (p_cansee(curp, p) == 0)
103				low = p->p_nice;
104			PROC_UNLOCK(p);
105		}
106		break;
107
108	case PRIO_PGRP: {
109		register struct pgrp *pg;
110
111		if (uap->who == 0)
112			pg = curp->p_pgrp;
113		else if ((pg = pgfind(uap->who)) == NULL)
114			break;
115		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
116			if (!p_cansee(curp, p) && p->p_nice < low)
117				low = p->p_nice;
118		}
119		break;
120	}
121
122	case PRIO_USER:
123		if (uap->who == 0)
124			uap->who = curp->p_ucred->cr_uid;
125		sx_slock(&allproc_lock);
126		LIST_FOREACH(p, &allproc, p_list)
127			if (!p_cansee(curp, p) &&
128			    p->p_ucred->cr_uid == uap->who &&
129			    p->p_nice < low)
130				low = p->p_nice;
131		sx_sunlock(&allproc_lock);
132		break;
133
134	default:
135		return (EINVAL);
136	}
137	if (low == PRIO_MAX + 1)
138		return (ESRCH);
139	curp->p_retval[0] = low;
140	return (0);
141}
142
143#ifndef _SYS_SYSPROTO_H_
144struct setpriority_args {
145	int	which;
146	int	who;
147	int	prio;
148};
149#endif
150/* ARGSUSED */
151int
152setpriority(curp, uap)
153	struct proc *curp;
154	register struct setpriority_args *uap;
155{
156	register struct proc *p;
157	int found = 0, error = 0;
158
159	switch (uap->which) {
160
161	case PRIO_PROCESS:
162		if (uap->who == 0)
163			error = donice(curp, curp, uap->prio);
164		else {
165			p = pfind(uap->who);
166			if (p == 0)
167				break;
168			if (p_cansee(curp, p) == 0)
169				error = donice(curp, p, uap->prio);
170			PROC_UNLOCK(p);
171		}
172		found++;
173		break;
174
175	case PRIO_PGRP: {
176		register struct pgrp *pg;
177
178		if (uap->who == 0)
179			pg = curp->p_pgrp;
180		else if ((pg = pgfind(uap->who)) == NULL)
181			break;
182		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
183			if (!p_cansee(curp, p)) {
184				error = donice(curp, p, uap->prio);
185				found++;
186			}
187		}
188		break;
189	}
190
191	case PRIO_USER:
192		if (uap->who == 0)
193			uap->who = curp->p_ucred->cr_uid;
194		sx_slock(&allproc_lock);
195		LIST_FOREACH(p, &allproc, p_list)
196			if (p->p_ucred->cr_uid == uap->who &&
197			    !p_cansee(curp, p)) {
198				error = donice(curp, p, uap->prio);
199				found++;
200			}
201		sx_sunlock(&allproc_lock);
202		break;
203
204	default:
205		return (EINVAL);
206	}
207	if (found == 0)
208		return (ESRCH);
209	return (error);
210}
211
212static int
213donice(curp, chgp, n)
214	register struct proc *curp, *chgp;
215	register int n;
216{
217	int	error;
218
219	if ((error = p_cansched(curp, chgp)))
220		return (error);
221	if (n > PRIO_MAX)
222		n = PRIO_MAX;
223	if (n < PRIO_MIN)
224		n = PRIO_MIN;
225	if (n < chgp->p_nice && suser(curp))
226		return (EACCES);
227	chgp->p_nice = n;
228	(void)resetpriority(chgp);
229	return (0);
230}
231
232/* rtprio system call */
233#ifndef _SYS_SYSPROTO_H_
234struct rtprio_args {
235	int		function;
236	pid_t		pid;
237	struct rtprio	*rtp;
238};
239#endif
240
241/*
242 * Set realtime priority
243 */
244
245/* ARGSUSED */
246int
247rtprio(curp, uap)
248	struct proc *curp;
249	register struct rtprio_args *uap;
250{
251	register struct proc *p;
252	struct rtprio rtp;
253	int error;
254
255	if (uap->pid == 0) {
256		p = curp;
257		PROC_LOCK(p);
258	} else
259		p = pfind(uap->pid);
260
261	if (p == NULL)
262		return (ESRCH);
263
264	switch (uap->function) {
265	case RTP_LOOKUP:
266		if ((error = p_cansee(curp, p)))
267			break;
268		pri_to_rtp(&p->p_pri, &rtp);
269		error = copyout(&rtp, uap->rtp, sizeof(struct rtprio));
270		break;
271	case RTP_SET:
272		if ((error = p_cansched(curp, p)) ||
273		    (error = copyin(uap->rtp, &rtp, sizeof(struct rtprio))))
274			break;
275		/* disallow setting rtprio in most cases if not superuser */
276		if (suser(curp) != 0) {
277			/* can't set someone else's */
278			if (uap->pid) {
279				error = EPERM;
280				break;
281			}
282			/* can't set realtime priority */
283/*
284 * Realtime priority has to be restricted for reasons which should be
285 * obvious. However, for idle priority, there is a potential for
286 * system deadlock if an idleprio process gains a lock on a resource
287 * that other processes need (and the idleprio process can't run
288 * due to a CPU-bound normal process). Fix me! XXX
289 */
290#if 0
291 			if (RTP_PRIO_IS_REALTIME(rtp.type))
292#endif
293			if (rtp.type != RTP_PRIO_NORMAL) {
294				error = EPERM;
295				break;
296			}
297		}
298		error = rtp_to_pri(&rtp, &p->p_pri);
299		break;
300	default:
301		error = EINVAL;
302		break;
303	}
304	PROC_UNLOCK(p);
305	return (error);
306}
307
308int
309rtp_to_pri(struct rtprio *rtp, struct priority *pri)
310{
311
312	if (rtp->prio > RTP_PRIO_MAX)
313		return (EINVAL);
314	switch (RTP_PRIO_BASE(rtp->type)) {
315	case RTP_PRIO_REALTIME:
316		pri->pri_level = PRI_MIN_REALTIME + rtp->prio;
317		break;
318	case RTP_PRIO_NORMAL:
319		pri->pri_level = PRI_MIN_TIMESHARE + rtp->prio;
320		break;
321	case RTP_PRIO_IDLE:
322		pri->pri_level = PRI_MIN_IDLE + rtp->prio;
323		break;
324	default:
325		return (EINVAL);
326	}
327	pri->pri_class = rtp->type;
328	pri->pri_native = pri->pri_level;
329	pri->pri_user = pri->pri_level;
330	return (0);
331}
332
333void
334pri_to_rtp(struct priority *pri, struct rtprio *rtp)
335{
336
337	switch (PRI_BASE(pri->pri_class)) {
338	case PRI_REALTIME:
339		rtp->prio = pri->pri_level - PRI_MIN_REALTIME;
340		break;
341	case PRI_TIMESHARE:
342		rtp->prio = pri->pri_level - PRI_MIN_TIMESHARE;
343		break;
344	case PRI_IDLE:
345		rtp->prio = pri->pri_level - PRI_MIN_IDLE;
346		break;
347	default:
348		break;
349	}
350	rtp->type = pri->pri_class;
351}
352
353#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
354#ifndef _SYS_SYSPROTO_H_
355struct osetrlimit_args {
356	u_int	which;
357	struct	orlimit *rlp;
358};
359#endif
360/* ARGSUSED */
361int
362osetrlimit(p, uap)
363	struct proc *p;
364	register struct osetrlimit_args *uap;
365{
366	struct orlimit olim;
367	struct rlimit lim;
368	int error;
369
370	if ((error =
371	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
372		return (error);
373	lim.rlim_cur = olim.rlim_cur;
374	lim.rlim_max = olim.rlim_max;
375	return (dosetrlimit(p, uap->which, &lim));
376}
377
378#ifndef _SYS_SYSPROTO_H_
379struct ogetrlimit_args {
380	u_int	which;
381	struct	orlimit *rlp;
382};
383#endif
384/* ARGSUSED */
385int
386ogetrlimit(p, uap)
387	struct proc *p;
388	register struct ogetrlimit_args *uap;
389{
390	struct orlimit olim;
391
392	if (uap->which >= RLIM_NLIMITS)
393		return (EINVAL);
394	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
395	if (olim.rlim_cur == -1)
396		olim.rlim_cur = 0x7fffffff;
397	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
398	if (olim.rlim_max == -1)
399		olim.rlim_max = 0x7fffffff;
400	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
401}
402#endif /* COMPAT_43 || COMPAT_SUNOS */
403
404#ifndef _SYS_SYSPROTO_H_
405struct __setrlimit_args {
406	u_int	which;
407	struct	rlimit *rlp;
408};
409#endif
410/* ARGSUSED */
411int
412setrlimit(p, uap)
413	struct proc *p;
414	register struct __setrlimit_args *uap;
415{
416	struct rlimit alim;
417	int error;
418
419	if ((error =
420	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
421		return (error);
422	return (dosetrlimit(p, uap->which, &alim));
423}
424
425int
426dosetrlimit(p, which, limp)
427	struct proc *p;
428	u_int which;
429	struct rlimit *limp;
430{
431	register struct rlimit *alimp;
432	int error;
433
434	GIANT_REQUIRED;
435
436	if (which >= RLIM_NLIMITS)
437		return (EINVAL);
438	alimp = &p->p_rlimit[which];
439
440	/*
441	 * Preserve historical bugs by treating negative limits as unsigned.
442	 */
443	if (limp->rlim_cur < 0)
444		limp->rlim_cur = RLIM_INFINITY;
445	if (limp->rlim_max < 0)
446		limp->rlim_max = RLIM_INFINITY;
447
448	if (limp->rlim_cur > alimp->rlim_max ||
449	    limp->rlim_max > alimp->rlim_max)
450		if ((error = suser_xxx(0, p, PRISON_ROOT)))
451			return (error);
452	if (limp->rlim_cur > limp->rlim_max)
453		limp->rlim_cur = limp->rlim_max;
454	if (p->p_limit->p_refcnt > 1 &&
455	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
456		p->p_limit->p_refcnt--;
457		p->p_limit = limcopy(p->p_limit);
458		alimp = &p->p_rlimit[which];
459	}
460
461	switch (which) {
462
463	case RLIMIT_CPU:
464		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
465			p->p_limit->p_cpulimit = RLIM_INFINITY;
466		else
467			p->p_limit->p_cpulimit =
468			    (rlim_t)1000000 * limp->rlim_cur;
469		break;
470	case RLIMIT_DATA:
471		if (limp->rlim_cur > MAXDSIZ)
472			limp->rlim_cur = MAXDSIZ;
473		if (limp->rlim_max > MAXDSIZ)
474			limp->rlim_max = MAXDSIZ;
475		break;
476
477	case RLIMIT_STACK:
478		if (limp->rlim_cur > MAXSSIZ)
479			limp->rlim_cur = MAXSSIZ;
480		if (limp->rlim_max > MAXSSIZ)
481			limp->rlim_max = MAXSSIZ;
482		/*
483		 * Stack is allocated to the max at exec time with only
484		 * "rlim_cur" bytes accessible.  If stack limit is going
485		 * up make more accessible, if going down make inaccessible.
486		 */
487		if (limp->rlim_cur != alimp->rlim_cur) {
488			vm_offset_t addr;
489			vm_size_t size;
490			vm_prot_t prot;
491
492			if (limp->rlim_cur > alimp->rlim_cur) {
493				prot = VM_PROT_ALL;
494				size = limp->rlim_cur - alimp->rlim_cur;
495				addr = USRSTACK - limp->rlim_cur;
496			} else {
497				prot = VM_PROT_NONE;
498				size = alimp->rlim_cur - limp->rlim_cur;
499				addr = USRSTACK - alimp->rlim_cur;
500			}
501			addr = trunc_page(addr);
502			size = round_page(size);
503			(void) vm_map_protect(&p->p_vmspace->vm_map,
504					      addr, addr+size, prot, FALSE);
505		}
506		break;
507
508	case RLIMIT_NOFILE:
509		if (limp->rlim_cur > maxfilesperproc)
510			limp->rlim_cur = maxfilesperproc;
511		if (limp->rlim_max > maxfilesperproc)
512			limp->rlim_max = maxfilesperproc;
513		break;
514
515	case RLIMIT_NPROC:
516		if (limp->rlim_cur > maxprocperuid)
517			limp->rlim_cur = maxprocperuid;
518		if (limp->rlim_max > maxprocperuid)
519			limp->rlim_max = maxprocperuid;
520		if (limp->rlim_cur < 1)
521			limp->rlim_cur = 1;
522		if (limp->rlim_max < 1)
523			limp->rlim_max = 1;
524		break;
525	}
526	*alimp = *limp;
527	return (0);
528}
529
530#ifndef _SYS_SYSPROTO_H_
531struct __getrlimit_args {
532	u_int	which;
533	struct	rlimit *rlp;
534};
535#endif
536/* ARGSUSED */
537int
538getrlimit(p, uap)
539	struct proc *p;
540	register struct __getrlimit_args *uap;
541{
542
543	if (uap->which >= RLIM_NLIMITS)
544		return (EINVAL);
545	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
546	    sizeof (struct rlimit)));
547}
548
549/*
550 * Transform the running time and tick information in proc p into user,
551 * system, and interrupt time usage.
552 */
553void
554calcru(p, up, sp, ip)
555	struct proc *p;
556	struct timeval *up;
557	struct timeval *sp;
558	struct timeval *ip;
559{
560	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
561	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
562	int s;
563	struct timeval tv;
564
565	mtx_assert(&sched_lock, MA_OWNED);
566	/* XXX: why spl-protect ?  worst case is an off-by-one report */
567	s = splstatclock();
568	ut = p->p_uticks;
569	st = p->p_sticks;
570	it = p->p_iticks;
571	splx(s);
572
573	tt = ut + st + it;
574	if (tt == 0) {
575		st = 1;
576		tt = 1;
577	}
578
579	tu = p->p_runtime;
580	if (p == curproc) {
581		/*
582		 * Adjust for the current time slice.  This is actually fairly
583		 * important since the error here is on the order of a time
584		 * quantum, which is much greater than the sampling error.
585		 */
586		microuptime(&tv);
587		if (timevalcmp(&tv, PCPU_PTR(switchtime), <))
588			printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
589			    PCPU_GET(switchtime.tv_sec), PCPU_GET(switchtime.tv_usec),
590			    tv.tv_sec, tv.tv_usec);
591		else
592			tu += (tv.tv_usec - PCPU_GET(switchtime.tv_usec)) +
593			    (tv.tv_sec - PCPU_GET(switchtime.tv_sec)) *
594			    (int64_t)1000000;
595	}
596	ptu = p->p_uu + p->p_su + p->p_iu;
597	if (tu < ptu || (int64_t)tu < 0) {
598		/* XXX no %qd in kernel.  Truncate. */
599		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
600		       (long)tu, p->p_pid, p->p_comm);
601		tu = ptu;
602	}
603
604	/* Subdivide tu. */
605	uu = (tu * ut) / tt;
606	su = (tu * st) / tt;
607	iu = tu - uu - su;
608
609	/* Enforce monotonicity. */
610	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
611		if (uu < p->p_uu)
612			uu = p->p_uu;
613		else if (uu + p->p_su + p->p_iu > tu)
614			uu = tu - p->p_su - p->p_iu;
615		if (st == 0)
616			su = p->p_su;
617		else {
618			su = ((tu - uu) * st) / (st + it);
619			if (su < p->p_su)
620				su = p->p_su;
621			else if (uu + su + p->p_iu > tu)
622				su = tu - uu - p->p_iu;
623		}
624		KASSERT(uu + su + p->p_iu <= tu,
625		    ("calcru: monotonisation botch 1"));
626		iu = tu - uu - su;
627		KASSERT(iu >= p->p_iu,
628		    ("calcru: monotonisation botch 2"));
629	}
630	p->p_uu = uu;
631	p->p_su = su;
632	p->p_iu = iu;
633
634	up->tv_sec = uu / 1000000;
635	up->tv_usec = uu % 1000000;
636	sp->tv_sec = su / 1000000;
637	sp->tv_usec = su % 1000000;
638	if (ip != NULL) {
639		ip->tv_sec = iu / 1000000;
640		ip->tv_usec = iu % 1000000;
641	}
642}
643
644#ifndef _SYS_SYSPROTO_H_
645struct getrusage_args {
646	int	who;
647	struct	rusage *rusage;
648};
649#endif
650/* ARGSUSED */
651int
652getrusage(p, uap)
653	register struct proc *p;
654	register struct getrusage_args *uap;
655{
656	register struct rusage *rup;
657
658	switch (uap->who) {
659
660	case RUSAGE_SELF:
661		rup = &p->p_stats->p_ru;
662		mtx_lock_spin(&sched_lock);
663		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
664		mtx_unlock_spin(&sched_lock);
665		break;
666
667	case RUSAGE_CHILDREN:
668		rup = &p->p_stats->p_cru;
669		break;
670
671	default:
672		return (EINVAL);
673	}
674	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
675	    sizeof (struct rusage)));
676}
677
678void
679ruadd(ru, ru2)
680	register struct rusage *ru, *ru2;
681{
682	register long *ip, *ip2;
683	register int i;
684
685	timevaladd(&ru->ru_utime, &ru2->ru_utime);
686	timevaladd(&ru->ru_stime, &ru2->ru_stime);
687	if (ru->ru_maxrss < ru2->ru_maxrss)
688		ru->ru_maxrss = ru2->ru_maxrss;
689	ip = &ru->ru_first; ip2 = &ru2->ru_first;
690	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
691		*ip++ += *ip2++;
692}
693
694/*
695 * Make a copy of the plimit structure.
696 * We share these structures copy-on-write after fork,
697 * and copy when a limit is changed.
698 */
699struct plimit *
700limcopy(lim)
701	struct plimit *lim;
702{
703	register struct plimit *copy;
704
705	MALLOC(copy, struct plimit *, sizeof(struct plimit),
706	    M_SUBPROC, M_WAITOK);
707	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
708	copy->p_lflags = 0;
709	copy->p_refcnt = 1;
710	return (copy);
711}
712
713/*
714 * Find the uidinfo structure for a uid.  This structure is used to
715 * track the total resource consumption (process count, socket buffer
716 * size, etc.) for the uid and impose limits.
717 */
718void
719uihashinit()
720{
721
722	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
723	mtx_init(&uihashtbl_mtx, "uidinfo hash", MTX_DEF);
724}
725
726/*
727 * lookup a uidinfo struct for the parameter uid.
728 * uihashtbl_mtx must be locked.
729 */
730static struct uidinfo *
731uilookup(uid)
732	uid_t uid;
733{
734	struct	uihashhead *uipp;
735	struct	uidinfo *uip;
736
737	mtx_assert(&uihashtbl_mtx, MA_OWNED);
738	uipp = UIHASH(uid);
739	LIST_FOREACH(uip, uipp, ui_hash)
740		if (uip->ui_uid == uid)
741			break;
742
743	return (uip);
744}
745
746/*
747 * Find or allocate a struct uidinfo for a particular uid.
748 * Increase refcount on uidinfo struct returned.
749 * uifree() should be called on a struct uidinfo when released.
750 */
751struct uidinfo *
752uifind(uid)
753	uid_t uid;
754{
755	struct	uidinfo *uip;
756
757	mtx_lock(&uihashtbl_mtx);
758	uip = uilookup(uid);
759	if (uip == NULL) {
760		struct  uidinfo *old_uip;
761
762		mtx_unlock(&uihashtbl_mtx);
763		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
764		mtx_lock(&uihashtbl_mtx);
765		/*
766		 * There's a chance someone created our uidinfo while we
767		 * were in malloc and not holding the lock, so we have to
768		 * make sure we don't insert a duplicate uidinfo
769		 */
770		if ((old_uip = uilookup(uid)) != NULL) {
771			/* someone else beat us to it */
772			free(uip, M_UIDINFO);
773			uip = old_uip;
774		} else {
775			mtx_init(&uip->ui_mtx, "uidinfo struct", MTX_DEF);
776			uip->ui_uid = uid;
777			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
778		}
779	}
780	uihold(uip);
781	mtx_unlock(&uihashtbl_mtx);
782	return (uip);
783}
784
785/*
786 * Place another refcount on a uidinfo struct.
787 */
788void
789uihold(uip)
790	struct uidinfo *uip;
791{
792
793	mtx_lock(&uip->ui_mtx);
794	uip->ui_ref++;
795	mtx_unlock(&uip->ui_mtx);
796}
797
798/*-
799 * Since uidinfo structs have a long lifetime, we use an
800 * opportunistic refcounting scheme to avoid locking the lookup hash
801 * for each release.
802 *
803 * If the refcount hits 0, we need to free the structure,
804 * which means we need to lock the hash.
805 * Optimal case:
806 *   After locking the struct and lowering the refcount, if we find
807 *   that we don't need to free, simply unlock and return.
808 * Suboptimal case:
809 *   If refcount lowering results in need to free, bump the count
810 *   back up, loose the lock and aquire the locks in the proper
811 *   order to try again.
812 */
813void
814uifree(uip)
815	struct uidinfo *uip;
816{
817
818	/* Prepare for optimal case. */
819	mtx_lock(&uip->ui_mtx);
820
821	if (--uip->ui_ref != 0) {
822		mtx_unlock(&uip->ui_mtx);
823		return;
824	}
825
826	/* Prepare for suboptimal case. */
827	uip->ui_ref++;
828	mtx_unlock(&uip->ui_mtx);
829	mtx_lock(&uihashtbl_mtx);
830	mtx_lock(&uip->ui_mtx);
831
832	/*
833	 * We must subtract one from the count again because we backed out
834	 * our initial subtraction before dropping the lock.
835	 * Since another thread may have added a reference after we dropped the
836	 * initial lock we have to test for zero again.
837	 */
838	if (--uip->ui_ref == 0) {
839		LIST_REMOVE(uip, ui_hash);
840		mtx_unlock(&uihashtbl_mtx);
841		if (uip->ui_sbsize != 0)
842			/* XXX no %qd in kernel.  Truncate. */
843			printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
844			    uip->ui_uid, (long)uip->ui_sbsize);
845		if (uip->ui_proccnt != 0)
846			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
847			    uip->ui_uid, uip->ui_proccnt);
848		mtx_destroy(&uip->ui_mtx);
849		FREE(uip, M_UIDINFO);
850		return;
851	}
852
853	mtx_unlock(&uihashtbl_mtx);
854	mtx_unlock(&uip->ui_mtx);
855}
856
857/*
858 * Change the count associated with number of processes
859 * a given user is using.  When 'max' is 0, don't enforce a limit
860 */
861int
862chgproccnt(uip, diff, max)
863	struct	uidinfo	*uip;
864	int	diff;
865	int	max;
866{
867
868	mtx_lock(&uip->ui_mtx);
869	/* don't allow them to exceed max, but allow subtraction */
870	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
871		mtx_unlock(&uip->ui_mtx);
872		return (0);
873	}
874	uip->ui_proccnt += diff;
875	if (uip->ui_proccnt < 0)
876		printf("negative proccnt for uid = %d\n", uip->ui_uid);
877	mtx_unlock(&uip->ui_mtx);
878	return (1);
879}
880
881/*
882 * Change the total socket buffer size a user has used.
883 */
884int
885chgsbsize(uip, hiwat, to, max)
886	struct	uidinfo	*uip;
887	u_long *hiwat;
888	u_long	to;
889	rlim_t	max;
890{
891	rlim_t new;
892	int s;
893
894	s = splnet();
895	mtx_lock(&uip->ui_mtx);
896	new = uip->ui_sbsize + to - *hiwat;
897	/* don't allow them to exceed max, but allow subtraction */
898	if (to > *hiwat && new > max) {
899		splx(s);
900		mtx_unlock(&uip->ui_mtx);
901		return (0);
902	}
903	uip->ui_sbsize = new;
904	*hiwat = to;
905	if (uip->ui_sbsize < 0)
906		printf("negative sbsize for uid = %d\n", uip->ui_uid);
907	splx(s);
908	mtx_unlock(&uip->ui_mtx);
909	return (1);
910}
911