kern_rctl.c revision 234383
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/kern/kern_rctl.c 234383 2012-04-17 14:31:02Z trasz $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_rctl.c 234383 2012-04-17 14:31:02Z trasz $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74/* Default buffer size for rctl_get_rules(2). */
75#define	RCTL_DEFAULT_BUFSIZE	4096
76#define	RCTL_MAX_INBUFLEN	4096
77#define	RCTL_LOG_BUFSIZE	128
78
79/*
80 * 'rctl_rule_link' connects a rule with every racct it's related to.
81 * For example, rule 'user:X:openfiles:deny=N/process' is linked
82 * with uidinfo for user X, and to each process of that user.
83 */
84struct rctl_rule_link {
85	LIST_ENTRY(rctl_rule_link)	rrl_next;
86	struct rctl_rule		*rrl_rule;
87	int				rrl_exceeded;
88};
89
90struct dict {
91	const char	*d_name;
92	int		d_value;
93};
94
95static struct dict subjectnames[] = {
96	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
97	{ "user", RCTL_SUBJECT_TYPE_USER },
98	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
99	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
100	{ NULL, -1 }};
101
102static struct dict resourcenames[] = {
103	{ "cputime", RACCT_CPU },
104	{ "datasize", RACCT_DATA },
105	{ "stacksize", RACCT_STACK },
106	{ "coredumpsize", RACCT_CORE },
107	{ "memoryuse", RACCT_RSS },
108	{ "memorylocked", RACCT_MEMLOCK },
109	{ "maxproc", RACCT_NPROC },
110	{ "openfiles", RACCT_NOFILE },
111	{ "vmemoryuse", RACCT_VMEM },
112	{ "pseudoterminals", RACCT_NPTS },
113	{ "swapuse", RACCT_SWAP },
114	{ "nthr", RACCT_NTHR },
115	{ "msgqqueued", RACCT_MSGQQUEUED },
116	{ "msgqsize", RACCT_MSGQSIZE },
117	{ "nmsgq", RACCT_NMSGQ },
118	{ "nsem", RACCT_NSEM },
119	{ "nsemop", RACCT_NSEMOP },
120	{ "nshm", RACCT_NSHM },
121	{ "shmsize", RACCT_SHMSIZE },
122	{ "wallclock", RACCT_WALLCLOCK },
123	{ NULL, -1 }};
124
125static struct dict actionnames[] = {
126	{ "sighup", RCTL_ACTION_SIGHUP },
127	{ "sigint", RCTL_ACTION_SIGINT },
128	{ "sigquit", RCTL_ACTION_SIGQUIT },
129	{ "sigill", RCTL_ACTION_SIGILL },
130	{ "sigtrap", RCTL_ACTION_SIGTRAP },
131	{ "sigabrt", RCTL_ACTION_SIGABRT },
132	{ "sigemt", RCTL_ACTION_SIGEMT },
133	{ "sigfpe", RCTL_ACTION_SIGFPE },
134	{ "sigkill", RCTL_ACTION_SIGKILL },
135	{ "sigbus", RCTL_ACTION_SIGBUS },
136	{ "sigsegv", RCTL_ACTION_SIGSEGV },
137	{ "sigsys", RCTL_ACTION_SIGSYS },
138	{ "sigpipe", RCTL_ACTION_SIGPIPE },
139	{ "sigalrm", RCTL_ACTION_SIGALRM },
140	{ "sigterm", RCTL_ACTION_SIGTERM },
141	{ "sigurg", RCTL_ACTION_SIGURG },
142	{ "sigstop", RCTL_ACTION_SIGSTOP },
143	{ "sigtstp", RCTL_ACTION_SIGTSTP },
144	{ "sigchld", RCTL_ACTION_SIGCHLD },
145	{ "sigttin", RCTL_ACTION_SIGTTIN },
146	{ "sigttou", RCTL_ACTION_SIGTTOU },
147	{ "sigio", RCTL_ACTION_SIGIO },
148	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
149	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
150	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
151	{ "sigprof", RCTL_ACTION_SIGPROF },
152	{ "sigwinch", RCTL_ACTION_SIGWINCH },
153	{ "siginfo", RCTL_ACTION_SIGINFO },
154	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
155	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
156	{ "sigthr", RCTL_ACTION_SIGTHR },
157	{ "deny", RCTL_ACTION_DENY },
158	{ "log", RCTL_ACTION_LOG },
159	{ "devctl", RCTL_ACTION_DEVCTL },
160	{ NULL, -1 }};
161
162static void rctl_init(void);
163SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
164
165static uma_zone_t rctl_rule_link_zone;
166static uma_zone_t rctl_rule_zone;
167static struct rwlock rctl_lock;
168RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
169
170static int rctl_rule_fully_specified(const struct rctl_rule *rule);
171static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
172
173static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
174
175static const char *
176rctl_subject_type_name(int subject)
177{
178	int i;
179
180	for (i = 0; subjectnames[i].d_name != NULL; i++) {
181		if (subjectnames[i].d_value == subject)
182			return (subjectnames[i].d_name);
183	}
184
185	panic("rctl_subject_type_name: unknown subject type %d", subject);
186}
187
188static const char *
189rctl_action_name(int action)
190{
191	int i;
192
193	for (i = 0; actionnames[i].d_name != NULL; i++) {
194		if (actionnames[i].d_value == action)
195			return (actionnames[i].d_name);
196	}
197
198	panic("rctl_action_name: unknown action %d", action);
199}
200
201const char *
202rctl_resource_name(int resource)
203{
204	int i;
205
206	for (i = 0; resourcenames[i].d_name != NULL; i++) {
207		if (resourcenames[i].d_value == resource)
208			return (resourcenames[i].d_name);
209	}
210
211	panic("rctl_resource_name: unknown resource %d", resource);
212}
213
214/*
215 * Return the amount of resource that can be allocated by 'p' before
216 * hitting 'rule'.
217 */
218static int64_t
219rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
220{
221	int resource;
222	int64_t available = INT64_MAX;
223	struct ucred *cred = p->p_ucred;
224
225	rw_assert(&rctl_lock, RA_LOCKED);
226
227	resource = rule->rr_resource;
228	switch (rule->rr_per) {
229	case RCTL_SUBJECT_TYPE_PROCESS:
230		available = rule->rr_amount -
231		    p->p_racct->r_resources[resource];
232		break;
233	case RCTL_SUBJECT_TYPE_USER:
234		available = rule->rr_amount -
235		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
236		break;
237	case RCTL_SUBJECT_TYPE_LOGINCLASS:
238		available = rule->rr_amount -
239		    cred->cr_loginclass->lc_racct->r_resources[resource];
240		break;
241	case RCTL_SUBJECT_TYPE_JAIL:
242		available = rule->rr_amount -
243		    cred->cr_prison->pr_prison_racct->prr_racct->
244		        r_resources[resource];
245		break;
246	default:
247		panic("rctl_compute_available: unknown per %d",
248		    rule->rr_per);
249	}
250
251	return (available);
252}
253
254/*
255 * Return non-zero if allocating 'amount' by proc 'p' would exceed
256 * resource limit specified by 'rule'.
257 */
258static int
259rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
260    int64_t amount)
261{
262	int64_t available;
263
264	rw_assert(&rctl_lock, RA_LOCKED);
265
266	available = rctl_available_resource(p, rule);
267	if (available >= amount)
268		return (0);
269
270	return (1);
271}
272
273/*
274 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
275 * to what it keeps allocated now.  Returns non-zero if the allocation should
276 * be denied, 0 otherwise.
277 */
278int
279rctl_enforce(struct proc *p, int resource, uint64_t amount)
280{
281	struct rctl_rule *rule;
282	struct rctl_rule_link *link;
283	struct sbuf sb;
284	int should_deny = 0;
285	char *buf;
286	static int curtime = 0;
287	static struct timeval lasttime;
288
289	rw_rlock(&rctl_lock);
290
291	/*
292	 * There may be more than one matching rule; go through all of them.
293	 * Denial should be done last, after logging and sending signals.
294	 */
295	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
296		rule = link->rrl_rule;
297		if (rule->rr_resource != resource)
298			continue;
299		if (!rctl_would_exceed(p, rule, amount)) {
300			link->rrl_exceeded = 0;
301			continue;
302		}
303
304		switch (rule->rr_action) {
305		case RCTL_ACTION_DENY:
306			should_deny = 1;
307			continue;
308		case RCTL_ACTION_LOG:
309			/*
310			 * If rrl_exceeded != 0, it means we've already
311			 * logged a warning for this process.
312			 */
313			if (link->rrl_exceeded != 0)
314				continue;
315
316			/*
317			 * If the process state is not fully initialized yet,
318			 * we can't access most of the required fields, e.g.
319			 * p->p_comm.  This happens when called from fork1().
320			 * Ignore this rule for now; it will be processed just
321			 * after fork, when called from racct_proc_fork_done().
322			 */
323			if (p->p_state != PRS_NORMAL)
324				continue;
325
326			if (!ppsratecheck(&lasttime, &curtime, 10))
327				continue;
328
329			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
330			if (buf == NULL) {
331				printf("rctl_enforce: out of memory\n");
332				continue;
333			}
334			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
335			rctl_rule_to_sbuf(&sb, rule);
336			sbuf_finish(&sb);
337			printf("rctl: rule \"%s\" matched by pid %d "
338			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
339			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
340			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
341			sbuf_delete(&sb);
342			free(buf, M_RCTL);
343			link->rrl_exceeded = 1;
344			continue;
345		case RCTL_ACTION_DEVCTL:
346			if (link->rrl_exceeded != 0)
347				continue;
348
349			if (p->p_state != PRS_NORMAL)
350				continue;
351
352			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
353			if (buf == NULL) {
354				printf("rctl_enforce: out of memory\n");
355				continue;
356			}
357			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
358			sbuf_printf(&sb, "rule=");
359			rctl_rule_to_sbuf(&sb, rule);
360			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
361			    p->p_pid, p->p_ucred->cr_ruid,
362			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
363			sbuf_finish(&sb);
364			devctl_notify_f("RCTL", "rule", "matched",
365			    sbuf_data(&sb), M_NOWAIT);
366			sbuf_delete(&sb);
367			free(buf, M_RCTL);
368			link->rrl_exceeded = 1;
369			continue;
370		default:
371			if (link->rrl_exceeded != 0)
372				continue;
373
374			if (p->p_state != PRS_NORMAL)
375				continue;
376
377			KASSERT(rule->rr_action > 0 &&
378			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
379			    ("rctl_enforce: unknown action %d",
380			     rule->rr_action));
381
382			/*
383			 * We're using the fact that RCTL_ACTION_SIG* values
384			 * are equal to their counterparts from sys/signal.h.
385			 */
386			kern_psignal(p, rule->rr_action);
387			link->rrl_exceeded = 1;
388			continue;
389		}
390	}
391
392	rw_runlock(&rctl_lock);
393
394	if (should_deny) {
395		/*
396		 * Return fake error code; the caller should change it
397		 * into one proper for the situation - EFSIZ, ENOMEM etc.
398		 */
399		return (EDOOFUS);
400	}
401
402	return (0);
403}
404
405uint64_t
406rctl_get_limit(struct proc *p, int resource)
407{
408	struct rctl_rule *rule;
409	struct rctl_rule_link *link;
410	uint64_t amount = UINT64_MAX;
411
412	rw_rlock(&rctl_lock);
413
414	/*
415	 * There may be more than one matching rule; go through all of them.
416	 * Denial should be done last, after logging and sending signals.
417	 */
418	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
419		rule = link->rrl_rule;
420		if (rule->rr_resource != resource)
421			continue;
422		if (rule->rr_action != RCTL_ACTION_DENY)
423			continue;
424		if (rule->rr_amount < amount)
425			amount = rule->rr_amount;
426	}
427
428	rw_runlock(&rctl_lock);
429
430	return (amount);
431}
432
433uint64_t
434rctl_get_available(struct proc *p, int resource)
435{
436	struct rctl_rule *rule;
437	struct rctl_rule_link *link;
438	int64_t available, minavailable, allocated;
439
440	minavailable = INT64_MAX;
441
442	rw_rlock(&rctl_lock);
443
444	/*
445	 * There may be more than one matching rule; go through all of them.
446	 * Denial should be done last, after logging and sending signals.
447	 */
448	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
449		rule = link->rrl_rule;
450		if (rule->rr_resource != resource)
451			continue;
452		if (rule->rr_action != RCTL_ACTION_DENY)
453			continue;
454		available = rctl_available_resource(p, rule);
455		if (available < minavailable)
456			minavailable = available;
457	}
458
459	rw_runlock(&rctl_lock);
460
461	/*
462	 * XXX: Think about this _hard_.
463	 */
464	allocated = p->p_racct->r_resources[resource];
465	if (minavailable < INT64_MAX - allocated)
466		minavailable += allocated;
467	if (minavailable < 0)
468		minavailable = 0;
469	return (minavailable);
470}
471
472static int
473rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
474{
475
476	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
477		if (rule->rr_subject_type != filter->rr_subject_type)
478			return (0);
479
480		switch (filter->rr_subject_type) {
481		case RCTL_SUBJECT_TYPE_PROCESS:
482			if (filter->rr_subject.rs_proc != NULL &&
483			    rule->rr_subject.rs_proc !=
484			    filter->rr_subject.rs_proc)
485				return (0);
486			break;
487		case RCTL_SUBJECT_TYPE_USER:
488			if (filter->rr_subject.rs_uip != NULL &&
489			    rule->rr_subject.rs_uip !=
490			    filter->rr_subject.rs_uip)
491				return (0);
492			break;
493		case RCTL_SUBJECT_TYPE_LOGINCLASS:
494			if (filter->rr_subject.rs_loginclass != NULL &&
495			    rule->rr_subject.rs_loginclass !=
496			    filter->rr_subject.rs_loginclass)
497				return (0);
498			break;
499		case RCTL_SUBJECT_TYPE_JAIL:
500			if (filter->rr_subject.rs_prison_racct != NULL &&
501			    rule->rr_subject.rs_prison_racct !=
502			    filter->rr_subject.rs_prison_racct)
503				return (0);
504			break;
505		default:
506			panic("rctl_rule_matches: unknown subject type %d",
507			    filter->rr_subject_type);
508		}
509	}
510
511	if (filter->rr_resource != RACCT_UNDEFINED) {
512		if (rule->rr_resource != filter->rr_resource)
513			return (0);
514	}
515
516	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
517		if (rule->rr_action != filter->rr_action)
518			return (0);
519	}
520
521	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
522		if (rule->rr_amount != filter->rr_amount)
523			return (0);
524	}
525
526	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
527		if (rule->rr_per != filter->rr_per)
528			return (0);
529	}
530
531	return (1);
532}
533
534static int
535str2value(const char *str, int *value, struct dict *table)
536{
537	int i;
538
539	if (value == NULL)
540		return (EINVAL);
541
542	for (i = 0; table[i].d_name != NULL; i++) {
543		if (strcasecmp(table[i].d_name, str) == 0) {
544			*value =  table[i].d_value;
545			return (0);
546		}
547	}
548
549	return (EINVAL);
550}
551
552static int
553str2id(const char *str, id_t *value)
554{
555	char *end;
556
557	if (str == NULL)
558		return (EINVAL);
559
560	*value = strtoul(str, &end, 10);
561	if ((size_t)(end - str) != strlen(str))
562		return (EINVAL);
563
564	return (0);
565}
566
567static int
568str2int64(const char *str, int64_t *value)
569{
570	char *end;
571
572	if (str == NULL)
573		return (EINVAL);
574
575	*value = strtoul(str, &end, 10);
576	if ((size_t)(end - str) != strlen(str))
577		return (EINVAL);
578
579	return (0);
580}
581
582/*
583 * Connect the rule to the racct, increasing refcount for the rule.
584 */
585static void
586rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
587{
588	struct rctl_rule_link *link;
589
590	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591
592	rctl_rule_acquire(rule);
593	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
594	link->rrl_rule = rule;
595	link->rrl_exceeded = 0;
596
597	rw_wlock(&rctl_lock);
598	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
599	rw_wunlock(&rctl_lock);
600}
601
602static int
603rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
604{
605	struct rctl_rule_link *link;
606
607	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
608	rw_assert(&rctl_lock, RA_WLOCKED);
609
610	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
611	if (link == NULL)
612		return (ENOMEM);
613	rctl_rule_acquire(rule);
614	link->rrl_rule = rule;
615	link->rrl_exceeded = 0;
616
617	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
618	return (0);
619}
620
621/*
622 * Remove limits for a rules matching the filter and release
623 * the refcounts for the rules, possibly freeing them.  Returns
624 * the number of limit structures removed.
625 */
626static int
627rctl_racct_remove_rules(struct racct *racct,
628    const struct rctl_rule *filter)
629{
630	int removed = 0;
631	struct rctl_rule_link *link, *linktmp;
632
633	rw_assert(&rctl_lock, RA_WLOCKED);
634
635	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
636		if (!rctl_rule_matches(link->rrl_rule, filter))
637			continue;
638
639		LIST_REMOVE(link, rrl_next);
640		rctl_rule_release(link->rrl_rule);
641		uma_zfree(rctl_rule_link_zone, link);
642		removed++;
643	}
644	return (removed);
645}
646
647static void
648rctl_rule_acquire_subject(struct rctl_rule *rule)
649{
650
651	switch (rule->rr_subject_type) {
652	case RCTL_SUBJECT_TYPE_UNDEFINED:
653	case RCTL_SUBJECT_TYPE_PROCESS:
654		break;
655	case RCTL_SUBJECT_TYPE_JAIL:
656		if (rule->rr_subject.rs_prison_racct != NULL)
657			prison_racct_hold(rule->rr_subject.rs_prison_racct);
658		break;
659	case RCTL_SUBJECT_TYPE_USER:
660		if (rule->rr_subject.rs_uip != NULL)
661			uihold(rule->rr_subject.rs_uip);
662		break;
663	case RCTL_SUBJECT_TYPE_LOGINCLASS:
664		if (rule->rr_subject.rs_loginclass != NULL)
665			loginclass_hold(rule->rr_subject.rs_loginclass);
666		break;
667	default:
668		panic("rctl_rule_acquire_subject: unknown subject type %d",
669		    rule->rr_subject_type);
670	}
671}
672
673static void
674rctl_rule_release_subject(struct rctl_rule *rule)
675{
676
677	switch (rule->rr_subject_type) {
678	case RCTL_SUBJECT_TYPE_UNDEFINED:
679	case RCTL_SUBJECT_TYPE_PROCESS:
680		break;
681	case RCTL_SUBJECT_TYPE_JAIL:
682		if (rule->rr_subject.rs_prison_racct != NULL)
683			prison_racct_free(rule->rr_subject.rs_prison_racct);
684		break;
685	case RCTL_SUBJECT_TYPE_USER:
686		if (rule->rr_subject.rs_uip != NULL)
687			uifree(rule->rr_subject.rs_uip);
688		break;
689	case RCTL_SUBJECT_TYPE_LOGINCLASS:
690		if (rule->rr_subject.rs_loginclass != NULL)
691			loginclass_free(rule->rr_subject.rs_loginclass);
692		break;
693	default:
694		panic("rctl_rule_release_subject: unknown subject type %d",
695		    rule->rr_subject_type);
696	}
697}
698
699struct rctl_rule *
700rctl_rule_alloc(int flags)
701{
702	struct rctl_rule *rule;
703
704	rule = uma_zalloc(rctl_rule_zone, flags);
705	if (rule == NULL)
706		return (NULL);
707	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
708	rule->rr_subject.rs_proc = NULL;
709	rule->rr_subject.rs_uip = NULL;
710	rule->rr_subject.rs_loginclass = NULL;
711	rule->rr_subject.rs_prison_racct = NULL;
712	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
713	rule->rr_resource = RACCT_UNDEFINED;
714	rule->rr_action = RCTL_ACTION_UNDEFINED;
715	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
716	refcount_init(&rule->rr_refcount, 1);
717
718	return (rule);
719}
720
721struct rctl_rule *
722rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
723{
724	struct rctl_rule *copy;
725
726	copy = uma_zalloc(rctl_rule_zone, flags);
727	if (copy == NULL)
728		return (NULL);
729	copy->rr_subject_type = rule->rr_subject_type;
730	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
731	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
732	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
733	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
734	copy->rr_per = rule->rr_per;
735	copy->rr_resource = rule->rr_resource;
736	copy->rr_action = rule->rr_action;
737	copy->rr_amount = rule->rr_amount;
738	refcount_init(&copy->rr_refcount, 1);
739	rctl_rule_acquire_subject(copy);
740
741	return (copy);
742}
743
744void
745rctl_rule_acquire(struct rctl_rule *rule)
746{
747
748	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
749
750	refcount_acquire(&rule->rr_refcount);
751}
752
753static void
754rctl_rule_free(void *context, int pending)
755{
756	struct rctl_rule *rule;
757
758	rule = (struct rctl_rule *)context;
759
760	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
761
762	/*
763	 * We don't need locking here; rule is guaranteed to be inaccessible.
764	 */
765
766	rctl_rule_release_subject(rule);
767	uma_zfree(rctl_rule_zone, rule);
768}
769
770void
771rctl_rule_release(struct rctl_rule *rule)
772{
773
774	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
775
776	if (refcount_release(&rule->rr_refcount)) {
777		/*
778		 * rctl_rule_release() is often called when iterating
779		 * over all the uidinfo structures in the system,
780		 * holding uihashtbl_lock.  Since rctl_rule_free()
781		 * might end up calling uifree(), this would lead
782		 * to lock recursion.  Use taskqueue to avoid this.
783		 */
784		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
785		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
786	}
787}
788
789static int
790rctl_rule_fully_specified(const struct rctl_rule *rule)
791{
792
793	switch (rule->rr_subject_type) {
794	case RCTL_SUBJECT_TYPE_UNDEFINED:
795		return (0);
796	case RCTL_SUBJECT_TYPE_PROCESS:
797		if (rule->rr_subject.rs_proc == NULL)
798			return (0);
799		break;
800	case RCTL_SUBJECT_TYPE_USER:
801		if (rule->rr_subject.rs_uip == NULL)
802			return (0);
803		break;
804	case RCTL_SUBJECT_TYPE_LOGINCLASS:
805		if (rule->rr_subject.rs_loginclass == NULL)
806			return (0);
807		break;
808	case RCTL_SUBJECT_TYPE_JAIL:
809		if (rule->rr_subject.rs_prison_racct == NULL)
810			return (0);
811		break;
812	default:
813		panic("rctl_rule_fully_specified: unknown subject type %d",
814		    rule->rr_subject_type);
815	}
816	if (rule->rr_resource == RACCT_UNDEFINED)
817		return (0);
818	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
819		return (0);
820	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
821		return (0);
822	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
823		return (0);
824
825	return (1);
826}
827
828static int
829rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
830{
831	int error = 0;
832	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
833	     *amountstr, *perstr;
834	struct rctl_rule *rule;
835	id_t id;
836
837	rule = rctl_rule_alloc(M_WAITOK);
838
839	subjectstr = strsep(&rulestr, ":");
840	subject_idstr = strsep(&rulestr, ":");
841	resourcestr = strsep(&rulestr, ":");
842	actionstr = strsep(&rulestr, "=/");
843	amountstr = strsep(&rulestr, "/");
844	perstr = rulestr;
845
846	if (subjectstr == NULL || subjectstr[0] == '\0')
847		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
848	else {
849		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
850		if (error != 0)
851			goto out;
852	}
853
854	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
855		rule->rr_subject.rs_proc = NULL;
856		rule->rr_subject.rs_uip = NULL;
857		rule->rr_subject.rs_loginclass = NULL;
858		rule->rr_subject.rs_prison_racct = NULL;
859	} else {
860		switch (rule->rr_subject_type) {
861		case RCTL_SUBJECT_TYPE_UNDEFINED:
862			error = EINVAL;
863			goto out;
864		case RCTL_SUBJECT_TYPE_PROCESS:
865			error = str2id(subject_idstr, &id);
866			if (error != 0)
867				goto out;
868			sx_assert(&allproc_lock, SA_LOCKED);
869			rule->rr_subject.rs_proc = pfind(id);
870			if (rule->rr_subject.rs_proc == NULL) {
871				error = ESRCH;
872				goto out;
873			}
874			PROC_UNLOCK(rule->rr_subject.rs_proc);
875			break;
876		case RCTL_SUBJECT_TYPE_USER:
877			error = str2id(subject_idstr, &id);
878			if (error != 0)
879				goto out;
880			rule->rr_subject.rs_uip = uifind(id);
881			break;
882		case RCTL_SUBJECT_TYPE_LOGINCLASS:
883			rule->rr_subject.rs_loginclass =
884			    loginclass_find(subject_idstr);
885			if (rule->rr_subject.rs_loginclass == NULL) {
886				error = ENAMETOOLONG;
887				goto out;
888			}
889			break;
890		case RCTL_SUBJECT_TYPE_JAIL:
891			rule->rr_subject.rs_prison_racct =
892			    prison_racct_find(subject_idstr);
893			if (rule->rr_subject.rs_prison_racct == NULL) {
894				error = ENAMETOOLONG;
895				goto out;
896			}
897			break;
898               default:
899                       panic("rctl_string_to_rule: unknown subject type %d",
900                           rule->rr_subject_type);
901               }
902	}
903
904	if (resourcestr == NULL || resourcestr[0] == '\0')
905		rule->rr_resource = RACCT_UNDEFINED;
906	else {
907		error = str2value(resourcestr, &rule->rr_resource,
908		    resourcenames);
909		if (error != 0)
910			goto out;
911	}
912
913	if (actionstr == NULL || actionstr[0] == '\0')
914		rule->rr_action = RCTL_ACTION_UNDEFINED;
915	else {
916		error = str2value(actionstr, &rule->rr_action, actionnames);
917		if (error != 0)
918			goto out;
919	}
920
921	if (amountstr == NULL || amountstr[0] == '\0')
922		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
923	else {
924		error = str2int64(amountstr, &rule->rr_amount);
925		if (error != 0)
926			goto out;
927		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
928			rule->rr_amount *= 1000000;
929	}
930
931	if (perstr == NULL || perstr[0] == '\0')
932		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
933	else {
934		error = str2value(perstr, &rule->rr_per, subjectnames);
935		if (error != 0)
936			goto out;
937	}
938
939out:
940	if (error == 0)
941		*rulep = rule;
942	else
943		rctl_rule_release(rule);
944
945	return (error);
946}
947
948/*
949 * Link a rule with all the subjects it applies to.
950 */
951int
952rctl_rule_add(struct rctl_rule *rule)
953{
954	struct proc *p;
955	struct ucred *cred;
956	struct uidinfo *uip;
957	struct prison *pr;
958	struct prison_racct *prr;
959	struct loginclass *lc;
960	struct rctl_rule *rule2;
961	int match;
962
963	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
964
965	/*
966	 * Some rules just don't make sense.  Note that the one below
967	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
968	 * for example, is not deniable in the racct sense, but the
969	 * limit is enforced in a different way, so "deny" rules for %CPU
970	 * do make sense.
971	 */
972	if (rule->rr_action == RCTL_ACTION_DENY &&
973	    (rule->rr_resource == RACCT_CPU ||
974	    rule->rr_resource == RACCT_WALLCLOCK))
975		return (EOPNOTSUPP);
976
977	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
978	    RACCT_IS_SLOPPY(rule->rr_resource))
979		return (EOPNOTSUPP);
980
981	/*
982	 * Make sure there are no duplicated rules.  Also, for the "deny"
983	 * rules, remove ones differing only by "amount".
984	 */
985	if (rule->rr_action == RCTL_ACTION_DENY) {
986		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
987		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
988		rctl_rule_remove(rule2);
989		rctl_rule_release(rule2);
990	} else
991		rctl_rule_remove(rule);
992
993	switch (rule->rr_subject_type) {
994	case RCTL_SUBJECT_TYPE_PROCESS:
995		p = rule->rr_subject.rs_proc;
996		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
997
998		rctl_racct_add_rule(p->p_racct, rule);
999		/*
1000		 * In case of per-process rule, we don't have anything more
1001		 * to do.
1002		 */
1003		return (0);
1004
1005	case RCTL_SUBJECT_TYPE_USER:
1006		uip = rule->rr_subject.rs_uip;
1007		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1008		rctl_racct_add_rule(uip->ui_racct, rule);
1009		break;
1010
1011	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1012		lc = rule->rr_subject.rs_loginclass;
1013		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1014		rctl_racct_add_rule(lc->lc_racct, rule);
1015		break;
1016
1017	case RCTL_SUBJECT_TYPE_JAIL:
1018		prr = rule->rr_subject.rs_prison_racct;
1019		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1020		rctl_racct_add_rule(prr->prr_racct, rule);
1021		break;
1022
1023	default:
1024		panic("rctl_rule_add: unknown subject type %d",
1025		    rule->rr_subject_type);
1026	}
1027
1028	/*
1029	 * Now go through all the processes and add the new rule to the ones
1030	 * it applies to.
1031	 */
1032	sx_assert(&allproc_lock, SA_LOCKED);
1033	FOREACH_PROC_IN_SYSTEM(p) {
1034		cred = p->p_ucred;
1035		switch (rule->rr_subject_type) {
1036		case RCTL_SUBJECT_TYPE_USER:
1037			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1038			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1039				break;
1040			continue;
1041		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1042			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1043				break;
1044			continue;
1045		case RCTL_SUBJECT_TYPE_JAIL:
1046			match = 0;
1047			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1048				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1049					match = 1;
1050					break;
1051				}
1052			}
1053			if (match)
1054				break;
1055			continue;
1056		default:
1057			panic("rctl_rule_add: unknown subject type %d",
1058			    rule->rr_subject_type);
1059		}
1060
1061		rctl_racct_add_rule(p->p_racct, rule);
1062	}
1063
1064	return (0);
1065}
1066
1067static void
1068rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1069{
1070	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1071	int found = 0;
1072
1073	rw_wlock(&rctl_lock);
1074	found += rctl_racct_remove_rules(racct, filter);
1075	rw_wunlock(&rctl_lock);
1076
1077	*((int *)arg3) += found;
1078}
1079
1080/*
1081 * Remove all rules that match the filter.
1082 */
1083int
1084rctl_rule_remove(struct rctl_rule *filter)
1085{
1086	int found = 0;
1087	struct proc *p;
1088
1089	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1090	    filter->rr_subject.rs_proc != NULL) {
1091		p = filter->rr_subject.rs_proc;
1092		rw_wlock(&rctl_lock);
1093		found = rctl_racct_remove_rules(p->p_racct, filter);
1094		rw_wunlock(&rctl_lock);
1095		if (found)
1096			return (0);
1097		return (ESRCH);
1098	}
1099
1100	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1101	    (void *)&found);
1102	ui_racct_foreach(rctl_rule_remove_callback, filter,
1103	    (void *)&found);
1104	prison_racct_foreach(rctl_rule_remove_callback, filter,
1105	    (void *)&found);
1106
1107	sx_assert(&allproc_lock, SA_LOCKED);
1108	rw_wlock(&rctl_lock);
1109	FOREACH_PROC_IN_SYSTEM(p) {
1110		found += rctl_racct_remove_rules(p->p_racct, filter);
1111	}
1112	rw_wunlock(&rctl_lock);
1113
1114	if (found)
1115		return (0);
1116	return (ESRCH);
1117}
1118
1119/*
1120 * Appends a rule to the sbuf.
1121 */
1122static void
1123rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1124{
1125	int64_t amount;
1126
1127	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1128
1129	switch (rule->rr_subject_type) {
1130	case RCTL_SUBJECT_TYPE_PROCESS:
1131		if (rule->rr_subject.rs_proc == NULL)
1132			sbuf_printf(sb, ":");
1133		else
1134			sbuf_printf(sb, "%d:",
1135			    rule->rr_subject.rs_proc->p_pid);
1136		break;
1137	case RCTL_SUBJECT_TYPE_USER:
1138		if (rule->rr_subject.rs_uip == NULL)
1139			sbuf_printf(sb, ":");
1140		else
1141			sbuf_printf(sb, "%d:",
1142			    rule->rr_subject.rs_uip->ui_uid);
1143		break;
1144	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1145		if (rule->rr_subject.rs_loginclass == NULL)
1146			sbuf_printf(sb, ":");
1147		else
1148			sbuf_printf(sb, "%s:",
1149			    rule->rr_subject.rs_loginclass->lc_name);
1150		break;
1151	case RCTL_SUBJECT_TYPE_JAIL:
1152		if (rule->rr_subject.rs_prison_racct == NULL)
1153			sbuf_printf(sb, ":");
1154		else
1155			sbuf_printf(sb, "%s:",
1156			    rule->rr_subject.rs_prison_racct->prr_name);
1157		break;
1158	default:
1159		panic("rctl_rule_to_sbuf: unknown subject type %d",
1160		    rule->rr_subject_type);
1161	}
1162
1163	amount = rule->rr_amount;
1164	if (amount != RCTL_AMOUNT_UNDEFINED &&
1165	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1166		amount /= 1000000;
1167
1168	sbuf_printf(sb, "%s:%s=%jd",
1169	    rctl_resource_name(rule->rr_resource),
1170	    rctl_action_name(rule->rr_action),
1171	    amount);
1172
1173	if (rule->rr_per != rule->rr_subject_type)
1174		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1175}
1176
1177/*
1178 * Routine used by RCTL syscalls to read in input string.
1179 */
1180static int
1181rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1182{
1183	int error;
1184	char *str;
1185
1186	if (inbuflen <= 0)
1187		return (EINVAL);
1188	if (inbuflen > RCTL_MAX_INBUFLEN)
1189		return (E2BIG);
1190
1191	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1192	error = copyinstr(inbufp, str, inbuflen, NULL);
1193	if (error != 0) {
1194		free(str, M_RCTL);
1195		return (error);
1196	}
1197
1198	*inputstr = str;
1199
1200	return (0);
1201}
1202
1203/*
1204 * Routine used by RCTL syscalls to write out output string.
1205 */
1206static int
1207rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1208{
1209	int error;
1210
1211	if (outputsbuf == NULL)
1212		return (0);
1213
1214	sbuf_finish(outputsbuf);
1215	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1216		sbuf_delete(outputsbuf);
1217		return (ERANGE);
1218	}
1219	error = copyout(sbuf_data(outputsbuf), outbufp,
1220	    sbuf_len(outputsbuf) + 1);
1221	sbuf_delete(outputsbuf);
1222	return (error);
1223}
1224
1225static struct sbuf *
1226rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1227{
1228	int i;
1229	int64_t amount;
1230	struct sbuf *sb;
1231
1232	sb = sbuf_new_auto();
1233	for (i = 0; i <= RACCT_MAX; i++) {
1234		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1235			continue;
1236		amount = racct->r_resources[i];
1237		if (RACCT_IS_IN_MILLIONS(i))
1238			amount /= 1000000;
1239		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1240	}
1241	sbuf_setpos(sb, sbuf_len(sb) - 1);
1242	return (sb);
1243}
1244
1245int
1246sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1247{
1248	int error;
1249	char *inputstr;
1250	struct rctl_rule *filter;
1251	struct sbuf *outputsbuf = NULL;
1252	struct proc *p;
1253	struct uidinfo *uip;
1254	struct loginclass *lc;
1255	struct prison_racct *prr;
1256
1257	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1258	if (error != 0)
1259		return (error);
1260
1261	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1262	if (error != 0)
1263		return (error);
1264
1265	sx_slock(&allproc_lock);
1266	error = rctl_string_to_rule(inputstr, &filter);
1267	free(inputstr, M_RCTL);
1268	if (error != 0) {
1269		sx_sunlock(&allproc_lock);
1270		return (error);
1271	}
1272
1273	switch (filter->rr_subject_type) {
1274	case RCTL_SUBJECT_TYPE_PROCESS:
1275		p = filter->rr_subject.rs_proc;
1276		if (p == NULL) {
1277			error = EINVAL;
1278			goto out;
1279		}
1280		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1281		break;
1282	case RCTL_SUBJECT_TYPE_USER:
1283		uip = filter->rr_subject.rs_uip;
1284		if (uip == NULL) {
1285			error = EINVAL;
1286			goto out;
1287		}
1288		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1289		break;
1290	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1291		lc = filter->rr_subject.rs_loginclass;
1292		if (lc == NULL) {
1293			error = EINVAL;
1294			goto out;
1295		}
1296		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1297		break;
1298	case RCTL_SUBJECT_TYPE_JAIL:
1299		prr = filter->rr_subject.rs_prison_racct;
1300		if (prr == NULL) {
1301			error = EINVAL;
1302			goto out;
1303		}
1304		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1305		break;
1306	default:
1307		error = EINVAL;
1308	}
1309out:
1310	rctl_rule_release(filter);
1311	sx_sunlock(&allproc_lock);
1312	if (error != 0)
1313		return (error);
1314
1315	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1316
1317	return (error);
1318}
1319
1320static void
1321rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1322{
1323	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1324	struct rctl_rule_link *link;
1325	struct sbuf *sb = (struct sbuf *)arg3;
1326
1327	rw_rlock(&rctl_lock);
1328	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1329		if (!rctl_rule_matches(link->rrl_rule, filter))
1330			continue;
1331		rctl_rule_to_sbuf(sb, link->rrl_rule);
1332		sbuf_printf(sb, ",");
1333	}
1334	rw_runlock(&rctl_lock);
1335}
1336
1337int
1338sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1339{
1340	int error;
1341	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1342	char *inputstr, *buf;
1343	struct sbuf *sb;
1344	struct rctl_rule *filter;
1345	struct rctl_rule_link *link;
1346	struct proc *p;
1347
1348	error = priv_check(td, PRIV_RCTL_GET_RULES);
1349	if (error != 0)
1350		return (error);
1351
1352	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1353	if (error != 0)
1354		return (error);
1355
1356	sx_slock(&allproc_lock);
1357	error = rctl_string_to_rule(inputstr, &filter);
1358	free(inputstr, M_RCTL);
1359	if (error != 0) {
1360		sx_sunlock(&allproc_lock);
1361		return (error);
1362	}
1363
1364again:
1365	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1366	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1367	KASSERT(sb != NULL, ("sbuf_new failed"));
1368
1369	sx_assert(&allproc_lock, SA_LOCKED);
1370	FOREACH_PROC_IN_SYSTEM(p) {
1371		rw_rlock(&rctl_lock);
1372		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1373			/*
1374			 * Non-process rules will be added to the buffer later.
1375			 * Adding them here would result in duplicated output.
1376			 */
1377			if (link->rrl_rule->rr_subject_type !=
1378			    RCTL_SUBJECT_TYPE_PROCESS)
1379				continue;
1380			if (!rctl_rule_matches(link->rrl_rule, filter))
1381				continue;
1382			rctl_rule_to_sbuf(sb, link->rrl_rule);
1383			sbuf_printf(sb, ",");
1384		}
1385		rw_runlock(&rctl_lock);
1386	}
1387
1388	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1389	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1390	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1391	if (sbuf_error(sb) == ENOMEM) {
1392		sbuf_delete(sb);
1393		free(buf, M_RCTL);
1394		bufsize *= 4;
1395		goto again;
1396	}
1397
1398	/*
1399	 * Remove trailing ",".
1400	 */
1401	if (sbuf_len(sb) > 0)
1402		sbuf_setpos(sb, sbuf_len(sb) - 1);
1403
1404	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1405
1406	rctl_rule_release(filter);
1407	sx_sunlock(&allproc_lock);
1408	free(buf, M_RCTL);
1409	return (error);
1410}
1411
1412int
1413sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1414{
1415	int error;
1416	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1417	char *inputstr, *buf;
1418	struct sbuf *sb;
1419	struct rctl_rule *filter;
1420	struct rctl_rule_link *link;
1421
1422	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1423	if (error != 0)
1424		return (error);
1425
1426	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1427	if (error != 0)
1428		return (error);
1429
1430	sx_slock(&allproc_lock);
1431	error = rctl_string_to_rule(inputstr, &filter);
1432	free(inputstr, M_RCTL);
1433	if (error != 0) {
1434		sx_sunlock(&allproc_lock);
1435		return (error);
1436	}
1437
1438	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1439		rctl_rule_release(filter);
1440		sx_sunlock(&allproc_lock);
1441		return (EINVAL);
1442	}
1443	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1444		rctl_rule_release(filter);
1445		sx_sunlock(&allproc_lock);
1446		return (EOPNOTSUPP);
1447	}
1448	if (filter->rr_subject.rs_proc == NULL) {
1449		rctl_rule_release(filter);
1450		sx_sunlock(&allproc_lock);
1451		return (EINVAL);
1452	}
1453
1454again:
1455	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1456	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1457	KASSERT(sb != NULL, ("sbuf_new failed"));
1458
1459	rw_rlock(&rctl_lock);
1460	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1461	    rrl_next) {
1462		rctl_rule_to_sbuf(sb, link->rrl_rule);
1463		sbuf_printf(sb, ",");
1464	}
1465	rw_runlock(&rctl_lock);
1466	if (sbuf_error(sb) == ENOMEM) {
1467		sbuf_delete(sb);
1468		free(buf, M_RCTL);
1469		bufsize *= 4;
1470		goto again;
1471	}
1472
1473	/*
1474	 * Remove trailing ",".
1475	 */
1476	if (sbuf_len(sb) > 0)
1477		sbuf_setpos(sb, sbuf_len(sb) - 1);
1478
1479	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1480	rctl_rule_release(filter);
1481	sx_sunlock(&allproc_lock);
1482	free(buf, M_RCTL);
1483	return (error);
1484}
1485
1486int
1487sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1488{
1489	int error;
1490	struct rctl_rule *rule;
1491	char *inputstr;
1492
1493	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1494	if (error != 0)
1495		return (error);
1496
1497	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1498	if (error != 0)
1499		return (error);
1500
1501	sx_slock(&allproc_lock);
1502	error = rctl_string_to_rule(inputstr, &rule);
1503	free(inputstr, M_RCTL);
1504	if (error != 0) {
1505		sx_sunlock(&allproc_lock);
1506		return (error);
1507	}
1508	/*
1509	 * The 'per' part of a rule is optional.
1510	 */
1511	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1512	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1513		rule->rr_per = rule->rr_subject_type;
1514
1515	if (!rctl_rule_fully_specified(rule)) {
1516		error = EINVAL;
1517		goto out;
1518	}
1519
1520	error = rctl_rule_add(rule);
1521
1522out:
1523	rctl_rule_release(rule);
1524	sx_sunlock(&allproc_lock);
1525	return (error);
1526}
1527
1528int
1529sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1530{
1531	int error;
1532	struct rctl_rule *filter;
1533	char *inputstr;
1534
1535	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1536	if (error != 0)
1537		return (error);
1538
1539	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1540	if (error != 0)
1541		return (error);
1542
1543	sx_slock(&allproc_lock);
1544	error = rctl_string_to_rule(inputstr, &filter);
1545	free(inputstr, M_RCTL);
1546	if (error != 0) {
1547		sx_sunlock(&allproc_lock);
1548		return (error);
1549	}
1550
1551	error = rctl_rule_remove(filter);
1552	rctl_rule_release(filter);
1553	sx_sunlock(&allproc_lock);
1554
1555	return (error);
1556}
1557
1558/*
1559 * Update RCTL rule list after credential change.
1560 */
1561void
1562rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1563{
1564	int rulecnt, i;
1565	struct rctl_rule_link *link, *newlink;
1566	struct uidinfo *newuip;
1567	struct loginclass *newlc;
1568	struct prison_racct *newprr;
1569	LIST_HEAD(, rctl_rule_link) newrules;
1570
1571	newuip = newcred->cr_ruidinfo;
1572	newlc = newcred->cr_loginclass;
1573	newprr = newcred->cr_prison->pr_prison_racct;
1574
1575	LIST_INIT(&newrules);
1576
1577again:
1578	/*
1579	 * First, count the rules that apply to the process with new
1580	 * credentials.
1581	 */
1582	rulecnt = 0;
1583	rw_rlock(&rctl_lock);
1584	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1585		if (link->rrl_rule->rr_subject_type ==
1586		    RCTL_SUBJECT_TYPE_PROCESS)
1587			rulecnt++;
1588	}
1589	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1590		rulecnt++;
1591	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1592		rulecnt++;
1593	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1594		rulecnt++;
1595	rw_runlock(&rctl_lock);
1596
1597	/*
1598	 * Create temporary list.  We've dropped the rctl_lock in order
1599	 * to use M_WAITOK.
1600	 */
1601	for (i = 0; i < rulecnt; i++) {
1602		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1603		newlink->rrl_rule = NULL;
1604		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1605	}
1606
1607	newlink = LIST_FIRST(&newrules);
1608
1609	/*
1610	 * Assign rules to the newly allocated list entries.
1611	 */
1612	rw_wlock(&rctl_lock);
1613	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1614		if (link->rrl_rule->rr_subject_type ==
1615		    RCTL_SUBJECT_TYPE_PROCESS) {
1616			if (newlink == NULL)
1617				goto goaround;
1618			rctl_rule_acquire(link->rrl_rule);
1619			newlink->rrl_rule = link->rrl_rule;
1620			newlink = LIST_NEXT(newlink, rrl_next);
1621			rulecnt--;
1622		}
1623	}
1624
1625	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1626		if (newlink == NULL)
1627			goto goaround;
1628		rctl_rule_acquire(link->rrl_rule);
1629		newlink->rrl_rule = link->rrl_rule;
1630		newlink = LIST_NEXT(newlink, rrl_next);
1631		rulecnt--;
1632	}
1633
1634	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1635		if (newlink == NULL)
1636			goto goaround;
1637		rctl_rule_acquire(link->rrl_rule);
1638		newlink->rrl_rule = link->rrl_rule;
1639		newlink = LIST_NEXT(newlink, rrl_next);
1640		rulecnt--;
1641	}
1642
1643	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1644		if (newlink == NULL)
1645			goto goaround;
1646		rctl_rule_acquire(link->rrl_rule);
1647		newlink->rrl_rule = link->rrl_rule;
1648		newlink = LIST_NEXT(newlink, rrl_next);
1649		rulecnt--;
1650	}
1651
1652	if (rulecnt == 0) {
1653		/*
1654		 * Free the old rule list.
1655		 */
1656		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1657			link = LIST_FIRST(&p->p_racct->r_rule_links);
1658			LIST_REMOVE(link, rrl_next);
1659			rctl_rule_release(link->rrl_rule);
1660			uma_zfree(rctl_rule_link_zone, link);
1661		}
1662
1663		/*
1664		 * Replace lists and we're done.
1665		 *
1666		 * XXX: Is there any way to switch list heads instead
1667		 *      of iterating here?
1668		 */
1669		while (!LIST_EMPTY(&newrules)) {
1670			newlink = LIST_FIRST(&newrules);
1671			LIST_REMOVE(newlink, rrl_next);
1672			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1673			    newlink, rrl_next);
1674		}
1675
1676		rw_wunlock(&rctl_lock);
1677
1678		return;
1679	}
1680
1681goaround:
1682	rw_wunlock(&rctl_lock);
1683
1684	/*
1685	 * Rule list changed while we were not holding the rctl_lock.
1686	 * Free the new list and try again.
1687	 */
1688	while (!LIST_EMPTY(&newrules)) {
1689		newlink = LIST_FIRST(&newrules);
1690		LIST_REMOVE(newlink, rrl_next);
1691		if (newlink->rrl_rule != NULL)
1692			rctl_rule_release(newlink->rrl_rule);
1693		uma_zfree(rctl_rule_link_zone, newlink);
1694	}
1695
1696	goto again;
1697}
1698
1699/*
1700 * Assign RCTL rules to the newly created process.
1701 */
1702int
1703rctl_proc_fork(struct proc *parent, struct proc *child)
1704{
1705	int error;
1706	struct rctl_rule_link *link;
1707	struct rctl_rule *rule;
1708
1709	LIST_INIT(&child->p_racct->r_rule_links);
1710
1711	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1712
1713	rw_wlock(&rctl_lock);
1714
1715	/*
1716	 * Go through limits applicable to the parent and assign them
1717	 * to the child.  Rules with 'process' subject have to be duplicated
1718	 * in order to make their rr_subject point to the new process.
1719	 */
1720	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1721		if (link->rrl_rule->rr_subject_type ==
1722		    RCTL_SUBJECT_TYPE_PROCESS) {
1723			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1724			if (rule == NULL)
1725				goto fail;
1726			KASSERT(rule->rr_subject.rs_proc == parent,
1727			    ("rule->rr_subject.rs_proc != parent"));
1728			rule->rr_subject.rs_proc = child;
1729			error = rctl_racct_add_rule_locked(child->p_racct,
1730			    rule);
1731			rctl_rule_release(rule);
1732			if (error != 0)
1733				goto fail;
1734		} else {
1735			error = rctl_racct_add_rule_locked(child->p_racct,
1736			    link->rrl_rule);
1737			if (error != 0)
1738				goto fail;
1739		}
1740	}
1741
1742	rw_wunlock(&rctl_lock);
1743	return (0);
1744
1745fail:
1746	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1747		link = LIST_FIRST(&child->p_racct->r_rule_links);
1748		LIST_REMOVE(link, rrl_next);
1749		rctl_rule_release(link->rrl_rule);
1750		uma_zfree(rctl_rule_link_zone, link);
1751	}
1752	rw_wunlock(&rctl_lock);
1753	return (EAGAIN);
1754}
1755
1756/*
1757 * Release rules attached to the racct.
1758 */
1759void
1760rctl_racct_release(struct racct *racct)
1761{
1762	struct rctl_rule_link *link;
1763
1764	rw_wlock(&rctl_lock);
1765	while (!LIST_EMPTY(&racct->r_rule_links)) {
1766		link = LIST_FIRST(&racct->r_rule_links);
1767		LIST_REMOVE(link, rrl_next);
1768		rctl_rule_release(link->rrl_rule);
1769		uma_zfree(rctl_rule_link_zone, link);
1770	}
1771	rw_wunlock(&rctl_lock);
1772}
1773
1774static void
1775rctl_init(void)
1776{
1777
1778	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1779	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1780	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1781	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1782	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1783}
1784
1785#else /* !RCTL */
1786
1787int
1788sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1789{
1790
1791	return (ENOSYS);
1792}
1793
1794int
1795sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1796{
1797
1798	return (ENOSYS);
1799}
1800
1801int
1802sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1803{
1804
1805	return (ENOSYS);
1806}
1807
1808int
1809sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1810{
1811
1812	return (ENOSYS);
1813}
1814
1815int
1816sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1817{
1818
1819	return (ENOSYS);
1820}
1821
1822#endif /* !RCTL */
1823