kern_resource.c revision 35029
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39 * $Id: kern_resource.c,v 1.33 1998/03/04 10:25:52 dufault Exp $
40 */
41
42#include "opt_compat.h"
43#include "opt_rlimit.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/kernel.h>
49#include <sys/file.h>
50#include <sys/resourcevar.h>
51#include <sys/malloc.h>
52#include <sys/proc.h>
53
54#include <vm/vm.h>
55#include <vm/vm_param.h>
56#include <vm/vm_prot.h>
57#include <sys/lock.h>
58#include <vm/pmap.h>
59#include <vm/vm_map.h>
60
61static int donice __P((struct proc *curp, struct proc *chgp, int n));
62static int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
63
64/*
65 * Resource controls and accounting.
66 */
67
68#ifndef _SYS_SYSPROTO_H_
69struct getpriority_args {
70	int	which;
71	int	who;
72};
73#endif
74int
75getpriority(curp, uap)
76	struct proc *curp;
77	register struct getpriority_args *uap;
78{
79	register struct proc *p;
80	register int low = PRIO_MAX + 1;
81
82	switch (uap->which) {
83
84	case PRIO_PROCESS:
85		if (uap->who == 0)
86			p = curp;
87		else
88			p = pfind(uap->who);
89		if (p == 0)
90			break;
91		low = p->p_nice;
92		break;
93
94	case PRIO_PGRP: {
95		register struct pgrp *pg;
96
97		if (uap->who == 0)
98			pg = curp->p_pgrp;
99		else if ((pg = pgfind(uap->who)) == NULL)
100			break;
101		for (p = pg->pg_members.lh_first; p != 0;
102		     p = p->p_pglist.le_next) {
103			if (p->p_nice < low)
104				low = p->p_nice;
105		}
106		break;
107	}
108
109	case PRIO_USER:
110		if (uap->who == 0)
111			uap->who = curp->p_ucred->cr_uid;
112		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
113			if (p->p_ucred->cr_uid == uap->who &&
114			    p->p_nice < low)
115				low = p->p_nice;
116		break;
117
118	default:
119		return (EINVAL);
120	}
121	if (low == PRIO_MAX + 1)
122		return (ESRCH);
123	curp->p_retval[0] = low;
124	return (0);
125}
126
127#ifndef _SYS_SYSPROTO_H_
128struct setpriority_args {
129	int	which;
130	int	who;
131	int	prio;
132};
133#endif
134/* ARGSUSED */
135int
136setpriority(curp, uap)
137	struct proc *curp;
138	register struct setpriority_args *uap;
139{
140	register struct proc *p;
141	int found = 0, error = 0;
142
143	switch (uap->which) {
144
145	case PRIO_PROCESS:
146		if (uap->who == 0)
147			p = curp;
148		else
149			p = pfind(uap->who);
150		if (p == 0)
151			break;
152		error = donice(curp, p, uap->prio);
153		found++;
154		break;
155
156	case PRIO_PGRP: {
157		register struct pgrp *pg;
158
159		if (uap->who == 0)
160			pg = curp->p_pgrp;
161		else if ((pg = pgfind(uap->who)) == NULL)
162			break;
163		for (p = pg->pg_members.lh_first; p != 0;
164		    p = p->p_pglist.le_next) {
165			error = donice(curp, p, uap->prio);
166			found++;
167		}
168		break;
169	}
170
171	case PRIO_USER:
172		if (uap->who == 0)
173			uap->who = curp->p_ucred->cr_uid;
174		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
175			if (p->p_ucred->cr_uid == uap->who) {
176				error = donice(curp, p, uap->prio);
177				found++;
178			}
179		break;
180
181	default:
182		return (EINVAL);
183	}
184	if (found == 0)
185		return (ESRCH);
186	return (error);
187}
188
189static int
190donice(curp, chgp, n)
191	register struct proc *curp, *chgp;
192	register int n;
193{
194	register struct pcred *pcred = curp->p_cred;
195
196	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
197	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
198	    pcred->p_ruid != chgp->p_ucred->cr_uid)
199		return (EPERM);
200	if (n > PRIO_MAX)
201		n = PRIO_MAX;
202	if (n < PRIO_MIN)
203		n = PRIO_MIN;
204	if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag))
205		return (EACCES);
206	chgp->p_nice = n;
207	(void)resetpriority(chgp);
208	return (0);
209}
210
211/* rtprio system call */
212#ifndef _SYS_SYSPROTO_H_
213struct rtprio_args {
214	int		function;
215	pid_t		pid;
216	struct rtprio	*rtp;
217};
218#endif
219
220/*
221 * Set realtime priority
222 */
223
224/* ARGSUSED */
225int
226rtprio(curp, uap)
227	struct proc *curp;
228	register struct rtprio_args *uap;
229{
230	register struct proc *p;
231	register struct pcred *pcred = curp->p_cred;
232	struct rtprio rtp;
233	int error;
234
235	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
236	if (error)
237		return (error);
238
239	if (uap->pid == 0)
240		p = curp;
241	else
242		p = pfind(uap->pid);
243
244	if (p == 0)
245		return (ESRCH);
246
247	switch (uap->function) {
248	case RTP_LOOKUP:
249		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
250	case RTP_SET:
251		if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
252		    pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid &&
253		    pcred->p_ruid != p->p_ucred->cr_uid)
254		        return (EPERM);
255		/* disallow setting rtprio in most cases if not superuser */
256		if (suser(pcred->pc_ucred, &curp->p_acflag)) {
257			/* can't set someone else's */
258			if (uap->pid)
259				return (EPERM);
260			/* can't set realtime priority */
261/*
262 * Realtime priority has to be restricted for reasons which should be
263 * obvious. However, for idle priority, there is a potential for
264 * system deadlock if an idleprio process gains a lock on a resource
265 * that other processes need (and the idleprio process can't run
266 * due to a CPU-bound normal process). Fix me! XXX
267 */
268#if 0
269 			if (RTP_PRIO_IS_REALTIME(rtp.type))
270#endif
271			if (rtp.type != RTP_PRIO_NORMAL)
272				return (EPERM);
273		}
274		switch (rtp.type) {
275#ifdef RTP_PRIO_FIFO
276		case RTP_PRIO_FIFO:
277#endif
278		case RTP_PRIO_REALTIME:
279		case RTP_PRIO_NORMAL:
280		case RTP_PRIO_IDLE:
281			if (rtp.prio > RTP_PRIO_MAX)
282				return (EINVAL);
283			p->p_rtprio = rtp;
284			return (0);
285		default:
286			return (EINVAL);
287		}
288
289	default:
290		return (EINVAL);
291	}
292}
293
294#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
295#ifndef _SYS_SYSPROTO_H_
296struct osetrlimit_args {
297	u_int	which;
298	struct	orlimit *rlp;
299};
300#endif
301/* ARGSUSED */
302int
303osetrlimit(p, uap)
304	struct proc *p;
305	register struct osetrlimit_args *uap;
306{
307	struct orlimit olim;
308	struct rlimit lim;
309	int error;
310
311	if ((error =
312	    copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
313		return (error);
314	lim.rlim_cur = olim.rlim_cur;
315	lim.rlim_max = olim.rlim_max;
316	return (dosetrlimit(p, uap->which, &lim));
317}
318
319#ifndef _SYS_SYSPROTO_H_
320struct ogetrlimit_args {
321	u_int	which;
322	struct	orlimit *rlp;
323};
324#endif
325/* ARGSUSED */
326int
327ogetrlimit(p, uap)
328	struct proc *p;
329	register struct ogetrlimit_args *uap;
330{
331	struct orlimit olim;
332
333	if (uap->which >= RLIM_NLIMITS)
334		return (EINVAL);
335	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
336	if (olim.rlim_cur == -1)
337		olim.rlim_cur = 0x7fffffff;
338	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
339	if (olim.rlim_max == -1)
340		olim.rlim_max = 0x7fffffff;
341	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
342}
343#endif /* COMPAT_43 || COMPAT_SUNOS */
344
345#ifndef _SYS_SYSPROTO_H_
346struct __setrlimit_args {
347	u_int	which;
348	struct	rlimit *rlp;
349};
350#endif
351/* ARGSUSED */
352int
353setrlimit(p, uap)
354	struct proc *p;
355	register struct __setrlimit_args *uap;
356{
357	struct rlimit alim;
358	int error;
359
360	if ((error =
361	    copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
362		return (error);
363	return (dosetrlimit(p, uap->which, &alim));
364}
365
366static int
367dosetrlimit(p, which, limp)
368	struct proc *p;
369	u_int which;
370	struct rlimit *limp;
371{
372	register struct rlimit *alimp;
373	int error;
374
375	if (which >= RLIM_NLIMITS)
376		return (EINVAL);
377	alimp = &p->p_rlimit[which];
378
379	/*
380	 * Preserve historical bugs by treating negative limits as unsigned.
381	 */
382	if (limp->rlim_cur < 0)
383		limp->rlim_cur = RLIM_INFINITY;
384	if (limp->rlim_max < 0)
385		limp->rlim_max = RLIM_INFINITY;
386
387	if (limp->rlim_cur > alimp->rlim_max ||
388	    limp->rlim_max > alimp->rlim_max)
389		if ((error = suser(p->p_ucred, &p->p_acflag)))
390			return (error);
391	if (limp->rlim_cur > limp->rlim_max)
392		limp->rlim_cur = limp->rlim_max;
393	if (p->p_limit->p_refcnt > 1 &&
394	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
395		p->p_limit->p_refcnt--;
396		p->p_limit = limcopy(p->p_limit);
397		alimp = &p->p_rlimit[which];
398	}
399
400	switch (which) {
401
402	case RLIMIT_DATA:
403		if (limp->rlim_cur > MAXDSIZ)
404			limp->rlim_cur = MAXDSIZ;
405		if (limp->rlim_max > MAXDSIZ)
406			limp->rlim_max = MAXDSIZ;
407		break;
408
409	case RLIMIT_STACK:
410		if (limp->rlim_cur > MAXSSIZ)
411			limp->rlim_cur = MAXSSIZ;
412		if (limp->rlim_max > MAXSSIZ)
413			limp->rlim_max = MAXSSIZ;
414		/*
415		 * Stack is allocated to the max at exec time with only
416		 * "rlim_cur" bytes accessible.  If stack limit is going
417		 * up make more accessible, if going down make inaccessible.
418		 */
419		if (limp->rlim_cur != alimp->rlim_cur) {
420			vm_offset_t addr;
421			vm_size_t size;
422			vm_prot_t prot;
423
424			if (limp->rlim_cur > alimp->rlim_cur) {
425				prot = VM_PROT_ALL;
426				size = limp->rlim_cur - alimp->rlim_cur;
427				addr = USRSTACK - limp->rlim_cur;
428			} else {
429				prot = VM_PROT_NONE;
430				size = alimp->rlim_cur - limp->rlim_cur;
431				addr = USRSTACK - alimp->rlim_cur;
432			}
433			addr = trunc_page(addr);
434			size = round_page(size);
435			(void) vm_map_protect(&p->p_vmspace->vm_map,
436					      addr, addr+size, prot, FALSE);
437		}
438		break;
439
440	case RLIMIT_NOFILE:
441		if (limp->rlim_cur > maxfilesperproc)
442			limp->rlim_cur = maxfilesperproc;
443		if (limp->rlim_max > maxfilesperproc)
444			limp->rlim_max = maxfilesperproc;
445		break;
446
447	case RLIMIT_NPROC:
448		if (limp->rlim_cur > maxprocperuid)
449			limp->rlim_cur = maxprocperuid;
450		if (limp->rlim_max > maxprocperuid)
451			limp->rlim_max = maxprocperuid;
452		break;
453	}
454	*alimp = *limp;
455	return (0);
456}
457
458#ifndef _SYS_SYSPROTO_H_
459struct __getrlimit_args {
460	u_int	which;
461	struct	rlimit *rlp;
462};
463#endif
464/* ARGSUSED */
465int
466getrlimit(p, uap)
467	struct proc *p;
468	register struct __getrlimit_args *uap;
469{
470
471	if (uap->which >= RLIM_NLIMITS)
472		return (EINVAL);
473	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
474	    sizeof (struct rlimit)));
475}
476
477/*
478 * Transform the running time and tick information in proc p into user,
479 * system, and interrupt time usage.
480 */
481void
482calcru(p, up, sp, ip)
483	struct proc *p;
484	struct timeval *up;
485	struct timeval *sp;
486	struct timeval *ip;
487{
488	quad_t totusec;
489	u_quad_t u, st, ut, it, tot;
490	long sec, usec;
491	int s;
492	struct timeval tv;
493
494	/* XXX: why spl-protect ?  worst case is an off-by-one report */
495	s = splstatclock();
496	st = p->p_sticks;
497	ut = p->p_uticks;
498	it = p->p_iticks;
499	splx(s);
500
501	tot = st + ut + it;
502	if (tot == 0) {
503		st = 1;
504		tot = 1;
505	}
506
507	sec = p->p_rtime.tv_sec;
508	usec = p->p_rtime.tv_usec;
509#ifdef SMP
510	if (p->p_oncpu != 0xff) {
511#else
512	if (p == curproc) {
513#endif
514		/*
515		 * Adjust for the current time slice.  This is actually fairly
516		 * important since the error here is on the order of a time
517		 * quantum, which is much greater than the sampling error.
518		 */
519		microruntime(&tv);
520		sec += tv.tv_sec - p->p_runtime.tv_sec;
521		usec += tv.tv_usec - p->p_runtime.tv_usec;
522	}
523	totusec = (quad_t)sec * 1000000 + usec;
524	if (totusec < 0) {
525		/* XXX no %qd in kernel.  Truncate. */
526		printf("calcru: negative time of %ld usec for pid %d (%s)\n",
527		       (long)totusec, p->p_pid, p->p_comm);
528		totusec = 0;
529	}
530	u = totusec;
531	st = (u * st) / tot;
532	sp->tv_sec = st / 1000000;
533	sp->tv_usec = st % 1000000;
534	ut = (u * ut) / tot;
535	up->tv_sec = ut / 1000000;
536	up->tv_usec = ut % 1000000;
537	if (ip != NULL) {
538		it = (u * it) / tot;
539		ip->tv_sec = it / 1000000;
540		ip->tv_usec = it % 1000000;
541	}
542}
543
544#ifndef _SYS_SYSPROTO_H_
545struct getrusage_args {
546	int	who;
547	struct	rusage *rusage;
548};
549#endif
550/* ARGSUSED */
551int
552getrusage(p, uap)
553	register struct proc *p;
554	register struct getrusage_args *uap;
555{
556	register struct rusage *rup;
557
558	switch (uap->who) {
559
560	case RUSAGE_SELF:
561		rup = &p->p_stats->p_ru;
562		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
563		break;
564
565	case RUSAGE_CHILDREN:
566		rup = &p->p_stats->p_cru;
567		break;
568
569	default:
570		return (EINVAL);
571	}
572	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
573	    sizeof (struct rusage)));
574}
575
576void
577ruadd(ru, ru2)
578	register struct rusage *ru, *ru2;
579{
580	register long *ip, *ip2;
581	register int i;
582
583	timevaladd(&ru->ru_utime, &ru2->ru_utime);
584	timevaladd(&ru->ru_stime, &ru2->ru_stime);
585	if (ru->ru_maxrss < ru2->ru_maxrss)
586		ru->ru_maxrss = ru2->ru_maxrss;
587	ip = &ru->ru_first; ip2 = &ru2->ru_first;
588	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
589		*ip++ += *ip2++;
590}
591
592/*
593 * Make a copy of the plimit structure.
594 * We share these structures copy-on-write after fork,
595 * and copy when a limit is changed.
596 */
597struct plimit *
598limcopy(lim)
599	struct plimit *lim;
600{
601	register struct plimit *copy;
602
603	MALLOC(copy, struct plimit *, sizeof(struct plimit),
604	    M_SUBPROC, M_WAITOK);
605	bcopy(lim->pl_rlimit, copy->pl_rlimit,
606	    sizeof(struct rlimit) * RLIM_NLIMITS);
607	copy->p_lflags = 0;
608	copy->p_refcnt = 1;
609	return (copy);
610}
611