kern_resource.c revision 44327
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39 * $Id: kern_resource.c,v 1.41 1999/02/25 11:03:08 bde Exp $
40 */
41
42#include "opt_compat.h"
43#include "opt_rlimit.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/kernel.h>
49#include <sys/file.h>
50#include <sys/resourcevar.h>
51#include <sys/malloc.h>
52#include <sys/proc.h>
53
54#include <vm/vm.h>
55#include <vm/vm_param.h>
56#include <vm/vm_prot.h>
57#include <sys/lock.h>
58#include <vm/pmap.h>
59#include <vm/vm_map.h>
60
61static int donice __P((struct proc *curp, struct proc *chgp, int n));
62/* dosetrlimit non-static:  Needed by SysVR4 emulator */
63int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
64
65/*
66 * Resource controls and accounting.
67 */
68
69#ifndef _SYS_SYSPROTO_H_
70struct getpriority_args {
71	int	which;
72	int	who;
73};
74#endif
75int
76getpriority(curp, uap)
77	struct proc *curp;
78	register struct getpriority_args *uap;
79{
80	register struct proc *p;
81	register int low = PRIO_MAX + 1;
82
83	switch (uap->which) {
84
85	case PRIO_PROCESS:
86		if (uap->who == 0)
87			p = curp;
88		else
89			p = pfind(uap->who);
90		if (p == 0)
91			break;
92		low = p->p_nice;
93		break;
94
95	case PRIO_PGRP: {
96		register struct pgrp *pg;
97
98		if (uap->who == 0)
99			pg = curp->p_pgrp;
100		else if ((pg = pgfind(uap->who)) == NULL)
101			break;
102		for (p = pg->pg_members.lh_first; p != 0;
103		     p = p->p_pglist.le_next) {
104			if (p->p_nice < low)
105				low = p->p_nice;
106		}
107		break;
108	}
109
110	case PRIO_USER:
111		if (uap->who == 0)
112			uap->who = curp->p_ucred->cr_uid;
113		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
114			if (p->p_ucred->cr_uid == uap->who &&
115			    p->p_nice < low)
116				low = p->p_nice;
117		break;
118
119	default:
120		return (EINVAL);
121	}
122	if (low == PRIO_MAX + 1)
123		return (ESRCH);
124	curp->p_retval[0] = low;
125	return (0);
126}
127
128#ifndef _SYS_SYSPROTO_H_
129struct setpriority_args {
130	int	which;
131	int	who;
132	int	prio;
133};
134#endif
135/* ARGSUSED */
136int
137setpriority(curp, uap)
138	struct proc *curp;
139	register struct setpriority_args *uap;
140{
141	register struct proc *p;
142	int found = 0, error = 0;
143
144	switch (uap->which) {
145
146	case PRIO_PROCESS:
147		if (uap->who == 0)
148			p = curp;
149		else
150			p = pfind(uap->who);
151		if (p == 0)
152			break;
153		error = donice(curp, p, uap->prio);
154		found++;
155		break;
156
157	case PRIO_PGRP: {
158		register struct pgrp *pg;
159
160		if (uap->who == 0)
161			pg = curp->p_pgrp;
162		else if ((pg = pgfind(uap->who)) == NULL)
163			break;
164		for (p = pg->pg_members.lh_first; p != 0;
165		    p = p->p_pglist.le_next) {
166			error = donice(curp, p, uap->prio);
167			found++;
168		}
169		break;
170	}
171
172	case PRIO_USER:
173		if (uap->who == 0)
174			uap->who = curp->p_ucred->cr_uid;
175		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
176			if (p->p_ucred->cr_uid == uap->who) {
177				error = donice(curp, p, uap->prio);
178				found++;
179			}
180		break;
181
182	default:
183		return (EINVAL);
184	}
185	if (found == 0)
186		return (ESRCH);
187	return (error);
188}
189
190static int
191donice(curp, chgp, n)
192	register struct proc *curp, *chgp;
193	register int n;
194{
195	register struct pcred *pcred = curp->p_cred;
196
197	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
198	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
199	    pcred->p_ruid != chgp->p_ucred->cr_uid)
200		return (EPERM);
201	if (n > PRIO_MAX)
202		n = PRIO_MAX;
203	if (n < PRIO_MIN)
204		n = PRIO_MIN;
205	if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag))
206		return (EACCES);
207	chgp->p_nice = n;
208	(void)resetpriority(chgp);
209	return (0);
210}
211
212/* rtprio system call */
213#ifndef _SYS_SYSPROTO_H_
214struct rtprio_args {
215	int		function;
216	pid_t		pid;
217	struct rtprio	*rtp;
218};
219#endif
220
221/*
222 * Set realtime priority
223 */
224
225/* ARGSUSED */
226int
227rtprio(curp, uap)
228	struct proc *curp;
229	register struct rtprio_args *uap;
230{
231	register struct proc *p;
232	register struct pcred *pcred = curp->p_cred;
233	struct rtprio rtp;
234	int error;
235
236	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
237	if (error)
238		return (error);
239
240	if (uap->pid == 0)
241		p = curp;
242	else
243		p = pfind(uap->pid);
244
245	if (p == 0)
246		return (ESRCH);
247
248	switch (uap->function) {
249	case RTP_LOOKUP:
250		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
251	case RTP_SET:
252		if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
253		    pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid &&
254		    pcred->p_ruid != p->p_ucred->cr_uid)
255		        return (EPERM);
256		/* disallow setting rtprio in most cases if not superuser */
257		if (suser(pcred->pc_ucred, &curp->p_acflag)) {
258			/* can't set someone else's */
259			if (uap->pid)
260				return (EPERM);
261			/* can't set realtime priority */
262/*
263 * Realtime priority has to be restricted for reasons which should be
264 * obvious. However, for idle priority, there is a potential for
265 * system deadlock if an idleprio process gains a lock on a resource
266 * that other processes need (and the idleprio process can't run
267 * due to a CPU-bound normal process). Fix me! XXX
268 */
269#if 0
270 			if (RTP_PRIO_IS_REALTIME(rtp.type))
271#endif
272			if (rtp.type != RTP_PRIO_NORMAL)
273				return (EPERM);
274		}
275		switch (rtp.type) {
276#ifdef RTP_PRIO_FIFO
277		case RTP_PRIO_FIFO:
278#endif
279		case RTP_PRIO_REALTIME:
280		case RTP_PRIO_NORMAL:
281		case RTP_PRIO_IDLE:
282			if (rtp.prio > RTP_PRIO_MAX)
283				return (EINVAL);
284			p->p_rtprio = rtp;
285			return (0);
286		default:
287			return (EINVAL);
288		}
289
290	default:
291		return (EINVAL);
292	}
293}
294
295#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
296#ifndef _SYS_SYSPROTO_H_
297struct osetrlimit_args {
298	u_int	which;
299	struct	orlimit *rlp;
300};
301#endif
302/* ARGSUSED */
303int
304osetrlimit(p, uap)
305	struct proc *p;
306	register struct osetrlimit_args *uap;
307{
308	struct orlimit olim;
309	struct rlimit lim;
310	int error;
311
312	if ((error =
313	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
314		return (error);
315	lim.rlim_cur = olim.rlim_cur;
316	lim.rlim_max = olim.rlim_max;
317	return (dosetrlimit(p, uap->which, &lim));
318}
319
320#ifndef _SYS_SYSPROTO_H_
321struct ogetrlimit_args {
322	u_int	which;
323	struct	orlimit *rlp;
324};
325#endif
326/* ARGSUSED */
327int
328ogetrlimit(p, uap)
329	struct proc *p;
330	register struct ogetrlimit_args *uap;
331{
332	struct orlimit olim;
333
334	if (uap->which >= RLIM_NLIMITS)
335		return (EINVAL);
336	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
337	if (olim.rlim_cur == -1)
338		olim.rlim_cur = 0x7fffffff;
339	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
340	if (olim.rlim_max == -1)
341		olim.rlim_max = 0x7fffffff;
342	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
343}
344#endif /* COMPAT_43 || COMPAT_SUNOS */
345
346#ifndef _SYS_SYSPROTO_H_
347struct __setrlimit_args {
348	u_int	which;
349	struct	rlimit *rlp;
350};
351#endif
352/* ARGSUSED */
353int
354setrlimit(p, uap)
355	struct proc *p;
356	register struct __setrlimit_args *uap;
357{
358	struct rlimit alim;
359	int error;
360
361	if ((error =
362	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
363		return (error);
364	return (dosetrlimit(p, uap->which, &alim));
365}
366
367int
368dosetrlimit(p, which, limp)
369	struct proc *p;
370	u_int which;
371	struct rlimit *limp;
372{
373	register struct rlimit *alimp;
374	int error;
375
376	if (which >= RLIM_NLIMITS)
377		return (EINVAL);
378	alimp = &p->p_rlimit[which];
379
380	/*
381	 * Preserve historical bugs by treating negative limits as unsigned.
382	 */
383	if (limp->rlim_cur < 0)
384		limp->rlim_cur = RLIM_INFINITY;
385	if (limp->rlim_max < 0)
386		limp->rlim_max = RLIM_INFINITY;
387
388	if (limp->rlim_cur > alimp->rlim_max ||
389	    limp->rlim_max > alimp->rlim_max)
390		if ((error = suser(p->p_ucred, &p->p_acflag)))
391			return (error);
392	if (limp->rlim_cur > limp->rlim_max)
393		limp->rlim_cur = limp->rlim_max;
394	if (p->p_limit->p_refcnt > 1 &&
395	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
396		p->p_limit->p_refcnt--;
397		p->p_limit = limcopy(p->p_limit);
398		alimp = &p->p_rlimit[which];
399	}
400
401	switch (which) {
402
403	case RLIMIT_CPU:
404		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
405			p->p_limit->p_cpulimit = RLIM_INFINITY;
406		else
407			p->p_limit->p_cpulimit =
408			    (rlim_t)1000000 * limp->rlim_cur;
409		break;
410	case RLIMIT_DATA:
411		if (limp->rlim_cur > MAXDSIZ)
412			limp->rlim_cur = MAXDSIZ;
413		if (limp->rlim_max > MAXDSIZ)
414			limp->rlim_max = MAXDSIZ;
415		break;
416
417	case RLIMIT_STACK:
418		if (limp->rlim_cur > MAXSSIZ)
419			limp->rlim_cur = MAXSSIZ;
420		if (limp->rlim_max > MAXSSIZ)
421			limp->rlim_max = MAXSSIZ;
422		/*
423		 * Stack is allocated to the max at exec time with only
424		 * "rlim_cur" bytes accessible.  If stack limit is going
425		 * up make more accessible, if going down make inaccessible.
426		 */
427		if (limp->rlim_cur != alimp->rlim_cur) {
428			vm_offset_t addr;
429			vm_size_t size;
430			vm_prot_t prot;
431
432			if (limp->rlim_cur > alimp->rlim_cur) {
433				prot = VM_PROT_ALL;
434				size = limp->rlim_cur - alimp->rlim_cur;
435				addr = USRSTACK - limp->rlim_cur;
436			} else {
437				prot = VM_PROT_NONE;
438				size = alimp->rlim_cur - limp->rlim_cur;
439				addr = USRSTACK - alimp->rlim_cur;
440			}
441			addr = trunc_page(addr);
442			size = round_page(size);
443			(void) vm_map_protect(&p->p_vmspace->vm_map,
444					      addr, addr+size, prot, FALSE);
445		}
446		break;
447
448	case RLIMIT_NOFILE:
449		if (limp->rlim_cur > maxfilesperproc)
450			limp->rlim_cur = maxfilesperproc;
451		if (limp->rlim_max > maxfilesperproc)
452			limp->rlim_max = maxfilesperproc;
453		break;
454
455	case RLIMIT_NPROC:
456		if (limp->rlim_cur > maxprocperuid)
457			limp->rlim_cur = maxprocperuid;
458		if (limp->rlim_max > maxprocperuid)
459			limp->rlim_max = maxprocperuid;
460		break;
461	}
462	*alimp = *limp;
463	return (0);
464}
465
466#ifndef _SYS_SYSPROTO_H_
467struct __getrlimit_args {
468	u_int	which;
469	struct	rlimit *rlp;
470};
471#endif
472/* ARGSUSED */
473int
474getrlimit(p, uap)
475	struct proc *p;
476	register struct __getrlimit_args *uap;
477{
478
479	if (uap->which >= RLIM_NLIMITS)
480		return (EINVAL);
481	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
482	    sizeof (struct rlimit)));
483}
484
485/*
486 * Transform the running time and tick information in proc p into user,
487 * system, and interrupt time usage.
488 */
489void
490calcru(p, up, sp, ip)
491	struct proc *p;
492	struct timeval *up;
493	struct timeval *sp;
494	struct timeval *ip;
495{
496	int64_t totusec;
497	u_int64_t u, st, ut, it, tot;
498	int s;
499	struct timeval tv;
500
501	/* XXX: why spl-protect ?  worst case is an off-by-one report */
502	s = splstatclock();
503	st = p->p_sticks;
504	ut = p->p_uticks;
505	it = p->p_iticks;
506	splx(s);
507
508	tot = st + ut + it;
509	if (tot == 0) {
510		st = 1;
511		tot = 1;
512	}
513
514	totusec = p->p_runtime;
515#ifdef SMP
516	if (p->p_oncpu != (char)0xff) {
517#else
518	if (p == curproc) {
519#endif
520		/*
521		 * Adjust for the current time slice.  This is actually fairly
522		 * important since the error here is on the order of a time
523		 * quantum, which is much greater than the sampling error.
524		 */
525		microuptime(&tv);
526		totusec += (tv.tv_usec - switchtime.tv_usec) +
527		    (tv.tv_sec - switchtime.tv_sec) * (int64_t)1000000;
528
529		/*
530		 * Copy the time that was just read to `switchtime' in case
531		 * we are being called from exit1().  Exits don't go through
532		 * mi_switch(), so `switchtime' doesn't get set in the normal
533		 * way.  We set it here instead of more cleanly in exit1()
534		 * to avoid losing track of the time between the calls to
535		 * microuptime().  Similarly for `switchticks'.
536		 */
537		switchtime = tv;
538		switchticks = ticks;
539	}
540	if (totusec < 0) {
541		/* XXX no %qd in kernel.  Truncate. */
542		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
543		       (long)totusec, p->p_pid, p->p_comm);
544		totusec = 0;
545	}
546	u = totusec;
547	st = (u * st) / tot;
548	sp->tv_sec = st / 1000000;
549	sp->tv_usec = st % 1000000;
550	ut = (u * ut) / tot;
551	up->tv_sec = ut / 1000000;
552	up->tv_usec = ut % 1000000;
553	if (ip != NULL) {
554		it = (u * it) / tot;
555		ip->tv_sec = it / 1000000;
556		ip->tv_usec = it % 1000000;
557	}
558}
559
560#ifndef _SYS_SYSPROTO_H_
561struct getrusage_args {
562	int	who;
563	struct	rusage *rusage;
564};
565#endif
566/* ARGSUSED */
567int
568getrusage(p, uap)
569	register struct proc *p;
570	register struct getrusage_args *uap;
571{
572	register struct rusage *rup;
573
574	switch (uap->who) {
575
576	case RUSAGE_SELF:
577		rup = &p->p_stats->p_ru;
578		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
579		break;
580
581	case RUSAGE_CHILDREN:
582		rup = &p->p_stats->p_cru;
583		break;
584
585	default:
586		return (EINVAL);
587	}
588	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
589	    sizeof (struct rusage)));
590}
591
592void
593ruadd(ru, ru2)
594	register struct rusage *ru, *ru2;
595{
596	register long *ip, *ip2;
597	register int i;
598
599	timevaladd(&ru->ru_utime, &ru2->ru_utime);
600	timevaladd(&ru->ru_stime, &ru2->ru_stime);
601	if (ru->ru_maxrss < ru2->ru_maxrss)
602		ru->ru_maxrss = ru2->ru_maxrss;
603	ip = &ru->ru_first; ip2 = &ru2->ru_first;
604	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
605		*ip++ += *ip2++;
606}
607
608/*
609 * Make a copy of the plimit structure.
610 * We share these structures copy-on-write after fork,
611 * and copy when a limit is changed.
612 */
613struct plimit *
614limcopy(lim)
615	struct plimit *lim;
616{
617	register struct plimit *copy;
618
619	MALLOC(copy, struct plimit *, sizeof(struct plimit),
620	    M_SUBPROC, M_WAITOK);
621	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
622	copy->p_lflags = 0;
623	copy->p_refcnt = 1;
624	return (copy);
625}
626