kern_resource.c revision 74914
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/kern_resource.c 74914 2001-03-28 09:17:56Z jhb $
40 */
41
42#include "opt_compat.h"
43#include "opt_rlimit.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/file.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mutex.h>
53#include <sys/proc.h>
54#include <sys/resourcevar.h>
55#include <sys/time.h>
56
57#include <vm/vm.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <vm/vm_map.h>
61
62static int donice __P((struct proc *curp, struct proc *chgp, int n));
63/* dosetrlimit non-static:  Needed by SysVR4 emulator */
64int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
65
66static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
67#define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
68static struct mtx uihashtbl_mtx;
69static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
70static u_long uihash;		/* size of hash table - 1 */
71
72static struct uidinfo	*uilookup __P((uid_t uid));
73
74/*
75 * Resource controls and accounting.
76 */
77
78#ifndef _SYS_SYSPROTO_H_
79struct getpriority_args {
80	int	which;
81	int	who;
82};
83#endif
84int
85getpriority(curp, uap)
86	struct proc *curp;
87	register struct getpriority_args *uap;
88{
89	register struct proc *p;
90	register int low = PRIO_MAX + 1;
91
92	switch (uap->which) {
93
94	case PRIO_PROCESS:
95		if (uap->who == 0)
96			p = curp;
97		else
98			p = pfind(uap->who);
99		if (p == 0)
100			break;
101		if (p_can(curp, p, P_CAN_SEE, NULL))
102			break;
103		low = p->p_nice;
104		break;
105
106	case PRIO_PGRP: {
107		register struct pgrp *pg;
108
109		if (uap->who == 0)
110			pg = curp->p_pgrp;
111		else if ((pg = pgfind(uap->who)) == NULL)
112			break;
113		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
114			if (!p_can(curp, p, P_CAN_SEE, NULL) && p->p_nice < low)
115				low = p->p_nice;
116		}
117		break;
118	}
119
120	case PRIO_USER:
121		if (uap->who == 0)
122			uap->who = curp->p_ucred->cr_uid;
123		ALLPROC_LOCK(AP_SHARED);
124		LIST_FOREACH(p, &allproc, p_list)
125			if (!p_can(curp, p, P_CAN_SEE, NULL) &&
126			    p->p_ucred->cr_uid == uap->who &&
127			    p->p_nice < low)
128				low = p->p_nice;
129		ALLPROC_LOCK(AP_RELEASE);
130		break;
131
132	default:
133		return (EINVAL);
134	}
135	if (low == PRIO_MAX + 1)
136		return (ESRCH);
137	curp->p_retval[0] = low;
138	return (0);
139}
140
141#ifndef _SYS_SYSPROTO_H_
142struct setpriority_args {
143	int	which;
144	int	who;
145	int	prio;
146};
147#endif
148/* ARGSUSED */
149int
150setpriority(curp, uap)
151	struct proc *curp;
152	register struct setpriority_args *uap;
153{
154	register struct proc *p;
155	int found = 0, error = 0;
156
157	switch (uap->which) {
158
159	case PRIO_PROCESS:
160		if (uap->who == 0)
161			p = curp;
162		else
163			p = pfind(uap->who);
164		if (p == 0)
165			break;
166		if (p_can(curp, p, P_CAN_SEE, NULL))
167			break;
168		error = donice(curp, p, uap->prio);
169		found++;
170		break;
171
172	case PRIO_PGRP: {
173		register struct pgrp *pg;
174
175		if (uap->who == 0)
176			pg = curp->p_pgrp;
177		else if ((pg = pgfind(uap->who)) == NULL)
178			break;
179		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
180			if (!p_can(curp, p, P_CAN_SEE, NULL)) {
181				error = donice(curp, p, uap->prio);
182				found++;
183			}
184		}
185		break;
186	}
187
188	case PRIO_USER:
189		if (uap->who == 0)
190			uap->who = curp->p_ucred->cr_uid;
191		ALLPROC_LOCK(AP_SHARED);
192		LIST_FOREACH(p, &allproc, p_list)
193			if (p->p_ucred->cr_uid == uap->who &&
194			    !p_can(curp, p, P_CAN_SEE, NULL)) {
195				error = donice(curp, p, uap->prio);
196				found++;
197			}
198		ALLPROC_LOCK(AP_RELEASE);
199		break;
200
201	default:
202		return (EINVAL);
203	}
204	if (found == 0)
205		return (ESRCH);
206	return (error);
207}
208
209static int
210donice(curp, chgp, n)
211	register struct proc *curp, *chgp;
212	register int n;
213{
214	int	error;
215
216	if ((error = p_can(curp, chgp, P_CAN_SCHED, NULL)))
217		return (error);
218	if (n > PRIO_MAX)
219		n = PRIO_MAX;
220	if (n < PRIO_MIN)
221		n = PRIO_MIN;
222	if (n < chgp->p_nice && suser(curp))
223		return (EACCES);
224	chgp->p_nice = n;
225	(void)resetpriority(chgp);
226	return (0);
227}
228
229/* rtprio system call */
230#ifndef _SYS_SYSPROTO_H_
231struct rtprio_args {
232	int		function;
233	pid_t		pid;
234	struct rtprio	*rtp;
235};
236#endif
237
238/*
239 * Set realtime priority
240 */
241
242/* ARGSUSED */
243int
244rtprio(curp, uap)
245	struct proc *curp;
246	register struct rtprio_args *uap;
247{
248	register struct proc *p;
249	struct rtprio rtp;
250	int error;
251
252	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
253	if (error)
254		return (error);
255
256	if (uap->pid == 0)
257		p = curp;
258	else
259		p = pfind(uap->pid);
260
261	if (p == 0)
262		return (ESRCH);
263
264	switch (uap->function) {
265	case RTP_LOOKUP:
266		pri_to_rtp(&p->p_pri, &rtp);
267		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
268	case RTP_SET:
269		if ((error = p_can(curp, p, P_CAN_SCHED, NULL)))
270		        return (error);
271		/* disallow setting rtprio in most cases if not superuser */
272		if (suser(curp) != 0) {
273			/* can't set someone else's */
274			if (uap->pid)
275				return (EPERM);
276			/* can't set realtime priority */
277/*
278 * Realtime priority has to be restricted for reasons which should be
279 * obvious. However, for idle priority, there is a potential for
280 * system deadlock if an idleprio process gains a lock on a resource
281 * that other processes need (and the idleprio process can't run
282 * due to a CPU-bound normal process). Fix me! XXX
283 */
284#if 0
285 			if (RTP_PRIO_IS_REALTIME(rtp.type))
286#endif
287			if (rtp.type != RTP_PRIO_NORMAL)
288				return (EPERM);
289		}
290		if (rtp_to_pri(&rtp, &p->p_pri) == 0)
291			return (0);
292		return (EINVAL);
293	default:
294		return (EINVAL);
295	}
296}
297
298int
299rtp_to_pri(struct rtprio *rtp, struct priority *pri)
300{
301
302	if (rtp->prio > RTP_PRIO_MAX)
303		return (-1);
304	switch (RTP_PRIO_BASE(rtp->type)) {
305	case RTP_PRIO_REALTIME:
306		pri->pri_level = PRI_MIN_REALTIME + rtp->prio;
307		break;
308	case RTP_PRIO_NORMAL:
309		pri->pri_level = PRI_MIN_TIMESHARE + rtp->prio;
310		break;
311	case RTP_PRIO_IDLE:
312		pri->pri_level = PRI_MIN_IDLE + rtp->prio;
313		break;
314	default:
315		return (-1);
316	}
317	pri->pri_class = rtp->type;
318	pri->pri_native = pri->pri_level;
319	pri->pri_user = pri->pri_level;
320	return (0);
321}
322
323void
324pri_to_rtp(struct priority *pri, struct rtprio *rtp)
325{
326
327	switch (PRI_BASE(pri->pri_class)) {
328	case PRI_REALTIME:
329		rtp->prio = pri->pri_level - PRI_MIN_REALTIME;
330		break;
331	case PRI_TIMESHARE:
332		rtp->prio = pri->pri_level - PRI_MIN_TIMESHARE;
333		break;
334	case PRI_IDLE:
335		rtp->prio = pri->pri_level - PRI_MIN_IDLE;
336		break;
337	default:
338		break;
339	}
340	rtp->type = pri->pri_class;
341}
342
343#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
344#ifndef _SYS_SYSPROTO_H_
345struct osetrlimit_args {
346	u_int	which;
347	struct	orlimit *rlp;
348};
349#endif
350/* ARGSUSED */
351int
352osetrlimit(p, uap)
353	struct proc *p;
354	register struct osetrlimit_args *uap;
355{
356	struct orlimit olim;
357	struct rlimit lim;
358	int error;
359
360	if ((error =
361	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
362		return (error);
363	lim.rlim_cur = olim.rlim_cur;
364	lim.rlim_max = olim.rlim_max;
365	return (dosetrlimit(p, uap->which, &lim));
366}
367
368#ifndef _SYS_SYSPROTO_H_
369struct ogetrlimit_args {
370	u_int	which;
371	struct	orlimit *rlp;
372};
373#endif
374/* ARGSUSED */
375int
376ogetrlimit(p, uap)
377	struct proc *p;
378	register struct ogetrlimit_args *uap;
379{
380	struct orlimit olim;
381
382	if (uap->which >= RLIM_NLIMITS)
383		return (EINVAL);
384	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
385	if (olim.rlim_cur == -1)
386		olim.rlim_cur = 0x7fffffff;
387	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
388	if (olim.rlim_max == -1)
389		olim.rlim_max = 0x7fffffff;
390	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
391}
392#endif /* COMPAT_43 || COMPAT_SUNOS */
393
394#ifndef _SYS_SYSPROTO_H_
395struct __setrlimit_args {
396	u_int	which;
397	struct	rlimit *rlp;
398};
399#endif
400/* ARGSUSED */
401int
402setrlimit(p, uap)
403	struct proc *p;
404	register struct __setrlimit_args *uap;
405{
406	struct rlimit alim;
407	int error;
408
409	if ((error =
410	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
411		return (error);
412	return (dosetrlimit(p, uap->which, &alim));
413}
414
415int
416dosetrlimit(p, which, limp)
417	struct proc *p;
418	u_int which;
419	struct rlimit *limp;
420{
421	register struct rlimit *alimp;
422	int error;
423
424	if (which >= RLIM_NLIMITS)
425		return (EINVAL);
426	alimp = &p->p_rlimit[which];
427
428	/*
429	 * Preserve historical bugs by treating negative limits as unsigned.
430	 */
431	if (limp->rlim_cur < 0)
432		limp->rlim_cur = RLIM_INFINITY;
433	if (limp->rlim_max < 0)
434		limp->rlim_max = RLIM_INFINITY;
435
436	if (limp->rlim_cur > alimp->rlim_max ||
437	    limp->rlim_max > alimp->rlim_max)
438		if ((error = suser_xxx(0, p, PRISON_ROOT)))
439			return (error);
440	if (limp->rlim_cur > limp->rlim_max)
441		limp->rlim_cur = limp->rlim_max;
442	if (p->p_limit->p_refcnt > 1 &&
443	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
444		p->p_limit->p_refcnt--;
445		p->p_limit = limcopy(p->p_limit);
446		alimp = &p->p_rlimit[which];
447	}
448
449	switch (which) {
450
451	case RLIMIT_CPU:
452		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
453			p->p_limit->p_cpulimit = RLIM_INFINITY;
454		else
455			p->p_limit->p_cpulimit =
456			    (rlim_t)1000000 * limp->rlim_cur;
457		break;
458	case RLIMIT_DATA:
459		if (limp->rlim_cur > MAXDSIZ)
460			limp->rlim_cur = MAXDSIZ;
461		if (limp->rlim_max > MAXDSIZ)
462			limp->rlim_max = MAXDSIZ;
463		break;
464
465	case RLIMIT_STACK:
466		if (limp->rlim_cur > MAXSSIZ)
467			limp->rlim_cur = MAXSSIZ;
468		if (limp->rlim_max > MAXSSIZ)
469			limp->rlim_max = MAXSSIZ;
470		/*
471		 * Stack is allocated to the max at exec time with only
472		 * "rlim_cur" bytes accessible.  If stack limit is going
473		 * up make more accessible, if going down make inaccessible.
474		 */
475		if (limp->rlim_cur != alimp->rlim_cur) {
476			vm_offset_t addr;
477			vm_size_t size;
478			vm_prot_t prot;
479
480			if (limp->rlim_cur > alimp->rlim_cur) {
481				prot = VM_PROT_ALL;
482				size = limp->rlim_cur - alimp->rlim_cur;
483				addr = USRSTACK - limp->rlim_cur;
484			} else {
485				prot = VM_PROT_NONE;
486				size = alimp->rlim_cur - limp->rlim_cur;
487				addr = USRSTACK - alimp->rlim_cur;
488			}
489			addr = trunc_page(addr);
490			size = round_page(size);
491			(void) vm_map_protect(&p->p_vmspace->vm_map,
492					      addr, addr+size, prot, FALSE);
493		}
494		break;
495
496	case RLIMIT_NOFILE:
497		if (limp->rlim_cur > maxfilesperproc)
498			limp->rlim_cur = maxfilesperproc;
499		if (limp->rlim_max > maxfilesperproc)
500			limp->rlim_max = maxfilesperproc;
501		break;
502
503	case RLIMIT_NPROC:
504		if (limp->rlim_cur > maxprocperuid)
505			limp->rlim_cur = maxprocperuid;
506		if (limp->rlim_max > maxprocperuid)
507			limp->rlim_max = maxprocperuid;
508		if (limp->rlim_cur < 1)
509			limp->rlim_cur = 1;
510		if (limp->rlim_max < 1)
511			limp->rlim_max = 1;
512		break;
513	}
514	*alimp = *limp;
515	return (0);
516}
517
518#ifndef _SYS_SYSPROTO_H_
519struct __getrlimit_args {
520	u_int	which;
521	struct	rlimit *rlp;
522};
523#endif
524/* ARGSUSED */
525int
526getrlimit(p, uap)
527	struct proc *p;
528	register struct __getrlimit_args *uap;
529{
530
531	if (uap->which >= RLIM_NLIMITS)
532		return (EINVAL);
533	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
534	    sizeof (struct rlimit)));
535}
536
537/*
538 * Transform the running time and tick information in proc p into user,
539 * system, and interrupt time usage.
540 */
541void
542calcru(p, up, sp, ip)
543	struct proc *p;
544	struct timeval *up;
545	struct timeval *sp;
546	struct timeval *ip;
547{
548	/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
549	u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
550	int s;
551	struct timeval tv;
552
553	mtx_assert(&sched_lock, MA_OWNED);
554	/* XXX: why spl-protect ?  worst case is an off-by-one report */
555	s = splstatclock();
556	ut = p->p_uticks;
557	st = p->p_sticks;
558	it = p->p_iticks;
559	splx(s);
560
561	tt = ut + st + it;
562	if (tt == 0) {
563		st = 1;
564		tt = 1;
565	}
566
567	tu = p->p_runtime;
568	if (p == curproc) {
569		/*
570		 * Adjust for the current time slice.  This is actually fairly
571		 * important since the error here is on the order of a time
572		 * quantum, which is much greater than the sampling error.
573		 */
574		microuptime(&tv);
575		if (timevalcmp(&tv, PCPU_PTR(switchtime), <))
576			printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
577			    PCPU_GET(switchtime.tv_sec), PCPU_GET(switchtime.tv_usec),
578			    tv.tv_sec, tv.tv_usec);
579		else
580			tu += (tv.tv_usec - PCPU_GET(switchtime.tv_usec)) +
581			    (tv.tv_sec - PCPU_GET(switchtime.tv_sec)) *
582			    (int64_t)1000000;
583	}
584	ptu = p->p_uu + p->p_su + p->p_iu;
585	if (tu < ptu || (int64_t)tu < 0) {
586		/* XXX no %qd in kernel.  Truncate. */
587		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
588		       (long)tu, p->p_pid, p->p_comm);
589		tu = ptu;
590	}
591
592	/* Subdivide tu. */
593	uu = (tu * ut) / tt;
594	su = (tu * st) / tt;
595	iu = tu - uu - su;
596
597	/* Enforce monotonicity. */
598	if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
599		if (uu < p->p_uu)
600			uu = p->p_uu;
601		else if (uu + p->p_su + p->p_iu > tu)
602			uu = tu - p->p_su - p->p_iu;
603		if (st == 0)
604			su = p->p_su;
605		else {
606			su = ((tu - uu) * st) / (st + it);
607			if (su < p->p_su)
608				su = p->p_su;
609			else if (uu + su + p->p_iu > tu)
610				su = tu - uu - p->p_iu;
611		}
612		KASSERT(uu + su + p->p_iu <= tu,
613		    ("calcru: monotonisation botch 1"));
614		iu = tu - uu - su;
615		KASSERT(iu >= p->p_iu,
616		    ("calcru: monotonisation botch 2"));
617	}
618	p->p_uu = uu;
619	p->p_su = su;
620	p->p_iu = iu;
621
622	up->tv_sec = uu / 1000000;
623	up->tv_usec = uu % 1000000;
624	sp->tv_sec = su / 1000000;
625	sp->tv_usec = su % 1000000;
626	if (ip != NULL) {
627		ip->tv_sec = iu / 1000000;
628		ip->tv_usec = iu % 1000000;
629	}
630}
631
632#ifndef _SYS_SYSPROTO_H_
633struct getrusage_args {
634	int	who;
635	struct	rusage *rusage;
636};
637#endif
638/* ARGSUSED */
639int
640getrusage(p, uap)
641	register struct proc *p;
642	register struct getrusage_args *uap;
643{
644	register struct rusage *rup;
645
646	switch (uap->who) {
647
648	case RUSAGE_SELF:
649		rup = &p->p_stats->p_ru;
650		mtx_lock_spin(&sched_lock);
651		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
652		mtx_unlock_spin(&sched_lock);
653		break;
654
655	case RUSAGE_CHILDREN:
656		rup = &p->p_stats->p_cru;
657		break;
658
659	default:
660		return (EINVAL);
661	}
662	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
663	    sizeof (struct rusage)));
664}
665
666void
667ruadd(ru, ru2)
668	register struct rusage *ru, *ru2;
669{
670	register long *ip, *ip2;
671	register int i;
672
673	timevaladd(&ru->ru_utime, &ru2->ru_utime);
674	timevaladd(&ru->ru_stime, &ru2->ru_stime);
675	if (ru->ru_maxrss < ru2->ru_maxrss)
676		ru->ru_maxrss = ru2->ru_maxrss;
677	ip = &ru->ru_first; ip2 = &ru2->ru_first;
678	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
679		*ip++ += *ip2++;
680}
681
682/*
683 * Make a copy of the plimit structure.
684 * We share these structures copy-on-write after fork,
685 * and copy when a limit is changed.
686 */
687struct plimit *
688limcopy(lim)
689	struct plimit *lim;
690{
691	register struct plimit *copy;
692
693	MALLOC(copy, struct plimit *, sizeof(struct plimit),
694	    M_SUBPROC, M_WAITOK);
695	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
696	copy->p_lflags = 0;
697	copy->p_refcnt = 1;
698	return (copy);
699}
700
701/*
702 * Find the uidinfo structure for a uid.  This structure is used to
703 * track the total resource consumption (process count, socket buffer
704 * size, etc.) for the uid and impose limits.
705 */
706void
707uihashinit()
708{
709
710	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
711	mtx_init(&uihashtbl_mtx, "uidinfo hash", MTX_DEF);
712}
713
714/*
715 * lookup a uidinfo struct for the parameter uid.
716 * uihashtbl_mtx must be locked.
717 */
718static struct uidinfo *
719uilookup(uid)
720	uid_t uid;
721{
722	struct	uihashhead *uipp;
723	struct	uidinfo *uip;
724
725	mtx_assert(&uihashtbl_mtx, MA_OWNED);
726	uipp = UIHASH(uid);
727	LIST_FOREACH(uip, uipp, ui_hash)
728		if (uip->ui_uid == uid)
729			break;
730
731	return (uip);
732}
733
734/*
735 * Find or allocate a struct uidinfo for a particular uid.
736 * Increase refcount on uidinfo struct returned.
737 * uifree() should be called on a struct uidinfo when released.
738 */
739struct uidinfo *
740uifind(uid)
741	uid_t uid;
742{
743	struct	uidinfo *uip;
744
745	mtx_lock(&uihashtbl_mtx);
746	uip = uilookup(uid);
747	if (uip == NULL) {
748		struct  uidinfo *old_uip;
749
750		mtx_unlock(&uihashtbl_mtx);
751		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
752		mtx_lock(&uihashtbl_mtx);
753		/*
754		 * There's a chance someone created our uidinfo while we
755		 * were in malloc and not holding the lock, so we have to
756		 * make sure we don't insert a duplicate uidinfo
757		 */
758		if ((old_uip = uilookup(uid)) != NULL) {
759			/* someone else beat us to it */
760			free(uip, M_UIDINFO);
761			uip = old_uip;
762		} else {
763			mtx_init(&uip->ui_mtx, "uidinfo struct", MTX_DEF);
764			uip->ui_uid = uid;
765			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
766		}
767	}
768	uihold(uip);
769	mtx_unlock(&uihashtbl_mtx);
770	return (uip);
771}
772
773/*
774 * Place another refcount on a uidinfo struct.
775 */
776void
777uihold(uip)
778	struct uidinfo *uip;
779{
780
781	mtx_lock(&uip->ui_mtx);
782	uip->ui_ref++;
783	mtx_unlock(&uip->ui_mtx);
784}
785
786/*-
787 * Since uidinfo structs have a long lifetime, we use an
788 * opportunistic refcounting scheme to avoid locking the lookup hash
789 * for each release.
790 *
791 * If the refcount hits 0, we need to free the structure,
792 * which means we need to lock the hash.
793 * Optimal case:
794 *   After locking the struct and lowering the refcount, if we find
795 *   that we don't need to free, simply unlock and return.
796 * Suboptimal case:
797 *   If refcount lowering results in need to free, bump the count
798 *   back up, loose the lock and aquire the locks in the proper
799 *   order to try again.
800 */
801void
802uifree(uip)
803	struct uidinfo *uip;
804{
805
806	/* Prepare for optimal case. */
807	mtx_lock(&uip->ui_mtx);
808
809	if (--uip->ui_ref != 0) {
810		mtx_unlock(&uip->ui_mtx);
811		return;
812	}
813
814	/* Prepare for suboptimal case. */
815	uip->ui_ref++;
816	mtx_unlock(&uip->ui_mtx);
817	mtx_lock(&uihashtbl_mtx);
818	mtx_lock(&uip->ui_mtx);
819
820	/*
821	 * We must subtract one from the count again because we backed out
822	 * our initial subtraction before dropping the lock.
823	 * Since another thread may have added a reference after we dropped the
824	 * initial lock we have to test for zero again.
825	 */
826	if (--uip->ui_ref == 0) {
827		LIST_REMOVE(uip, ui_hash);
828		mtx_unlock(&uihashtbl_mtx);
829		if (uip->ui_sbsize != 0)
830			/* XXX no %qd in kernel.  Truncate. */
831			printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
832			    uip->ui_uid, (long)uip->ui_sbsize);
833		if (uip->ui_proccnt != 0)
834			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
835			    uip->ui_uid, uip->ui_proccnt);
836		mtx_destroy(&uip->ui_mtx);
837		FREE(uip, M_UIDINFO);
838		return;
839	}
840
841	mtx_unlock(&uihashtbl_mtx);
842	mtx_unlock(&uip->ui_mtx);
843}
844
845/*
846 * Change the count associated with number of processes
847 * a given user is using.  When 'max' is 0, don't enforce a limit
848 */
849int
850chgproccnt(uip, diff, max)
851	struct	uidinfo	*uip;
852	int	diff;
853	int	max;
854{
855
856	mtx_lock(&uip->ui_mtx);
857	/* don't allow them to exceed max, but allow subtraction */
858	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
859		mtx_unlock(&uip->ui_mtx);
860		return (0);
861	}
862	uip->ui_proccnt += diff;
863	if (uip->ui_proccnt < 0)
864		printf("negative proccnt for uid = %d\n", uip->ui_uid);
865	mtx_unlock(&uip->ui_mtx);
866	return (1);
867}
868
869/*
870 * Change the total socket buffer size a user has used.
871 */
872int
873chgsbsize(uip, hiwat, to, max)
874	struct	uidinfo	*uip;
875	u_long *hiwat;
876	u_long	to;
877	rlim_t	max;
878{
879	rlim_t new;
880	int s;
881
882	s = splnet();
883	mtx_lock(&uip->ui_mtx);
884	new = uip->ui_sbsize + to - *hiwat;
885	/* don't allow them to exceed max, but allow subtraction */
886	if (to > *hiwat && new > max) {
887		splx(s);
888		mtx_unlock(&uip->ui_mtx);
889		return (0);
890	}
891	uip->ui_sbsize = new;
892	*hiwat = to;
893	if (uip->ui_sbsize < 0)
894		printf("negative sbsize for uid = %d\n", uip->ui_uid);
895	splx(s);
896	mtx_unlock(&uip->ui_mtx);
897	return (1);
898}
899