kern_rctl.c revision 290857
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/kern/kern_rctl.c 290857 2015-11-15 12:10:51Z trasz $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_rctl.c 290857 2015-11-15 12:10:51Z trasz $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74#define	RCTL_MAX_INBUFSIZE	4 * 1024
75#define	RCTL_MAX_OUTBUFSIZE	16 * 1024 * 1024
76#define	RCTL_LOG_BUFSIZE	128
77
78#define	RCTL_PCPU_SHIFT		(10 * 1000000)
79
80unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
81
82SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
83SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
84    &rctl_maxbufsize, 0, "Maximum output buffer size");
85
86/*
87 * 'rctl_rule_link' connects a rule with every racct it's related to.
88 * For example, rule 'user:X:openfiles:deny=N/process' is linked
89 * with uidinfo for user X, and to each process of that user.
90 */
91struct rctl_rule_link {
92	LIST_ENTRY(rctl_rule_link)	rrl_next;
93	struct rctl_rule		*rrl_rule;
94	int				rrl_exceeded;
95};
96
97struct dict {
98	const char	*d_name;
99	int		d_value;
100};
101
102static struct dict subjectnames[] = {
103	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
104	{ "user", RCTL_SUBJECT_TYPE_USER },
105	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
106	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
107	{ NULL, -1 }};
108
109static struct dict resourcenames[] = {
110	{ "cputime", RACCT_CPU },
111	{ "datasize", RACCT_DATA },
112	{ "stacksize", RACCT_STACK },
113	{ "coredumpsize", RACCT_CORE },
114	{ "memoryuse", RACCT_RSS },
115	{ "memorylocked", RACCT_MEMLOCK },
116	{ "maxproc", RACCT_NPROC },
117	{ "openfiles", RACCT_NOFILE },
118	{ "vmemoryuse", RACCT_VMEM },
119	{ "pseudoterminals", RACCT_NPTS },
120	{ "swapuse", RACCT_SWAP },
121	{ "nthr", RACCT_NTHR },
122	{ "msgqqueued", RACCT_MSGQQUEUED },
123	{ "msgqsize", RACCT_MSGQSIZE },
124	{ "nmsgq", RACCT_NMSGQ },
125	{ "nsem", RACCT_NSEM },
126	{ "nsemop", RACCT_NSEMOP },
127	{ "nshm", RACCT_NSHM },
128	{ "shmsize", RACCT_SHMSIZE },
129	{ "wallclock", RACCT_WALLCLOCK },
130	{ "pcpu", RACCT_PCTCPU },
131	{ NULL, -1 }};
132
133static struct dict actionnames[] = {
134	{ "sighup", RCTL_ACTION_SIGHUP },
135	{ "sigint", RCTL_ACTION_SIGINT },
136	{ "sigquit", RCTL_ACTION_SIGQUIT },
137	{ "sigill", RCTL_ACTION_SIGILL },
138	{ "sigtrap", RCTL_ACTION_SIGTRAP },
139	{ "sigabrt", RCTL_ACTION_SIGABRT },
140	{ "sigemt", RCTL_ACTION_SIGEMT },
141	{ "sigfpe", RCTL_ACTION_SIGFPE },
142	{ "sigkill", RCTL_ACTION_SIGKILL },
143	{ "sigbus", RCTL_ACTION_SIGBUS },
144	{ "sigsegv", RCTL_ACTION_SIGSEGV },
145	{ "sigsys", RCTL_ACTION_SIGSYS },
146	{ "sigpipe", RCTL_ACTION_SIGPIPE },
147	{ "sigalrm", RCTL_ACTION_SIGALRM },
148	{ "sigterm", RCTL_ACTION_SIGTERM },
149	{ "sigurg", RCTL_ACTION_SIGURG },
150	{ "sigstop", RCTL_ACTION_SIGSTOP },
151	{ "sigtstp", RCTL_ACTION_SIGTSTP },
152	{ "sigchld", RCTL_ACTION_SIGCHLD },
153	{ "sigttin", RCTL_ACTION_SIGTTIN },
154	{ "sigttou", RCTL_ACTION_SIGTTOU },
155	{ "sigio", RCTL_ACTION_SIGIO },
156	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
157	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
158	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
159	{ "sigprof", RCTL_ACTION_SIGPROF },
160	{ "sigwinch", RCTL_ACTION_SIGWINCH },
161	{ "siginfo", RCTL_ACTION_SIGINFO },
162	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
163	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
164	{ "sigthr", RCTL_ACTION_SIGTHR },
165	{ "deny", RCTL_ACTION_DENY },
166	{ "log", RCTL_ACTION_LOG },
167	{ "devctl", RCTL_ACTION_DEVCTL },
168	{ NULL, -1 }};
169
170static void rctl_init(void);
171SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
172
173static uma_zone_t rctl_rule_link_zone;
174static uma_zone_t rctl_rule_zone;
175static struct rwlock rctl_lock;
176RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
177
178static int rctl_rule_fully_specified(const struct rctl_rule *rule);
179static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
180
181static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
182
183static const char *
184rctl_subject_type_name(int subject)
185{
186	int i;
187
188	for (i = 0; subjectnames[i].d_name != NULL; i++) {
189		if (subjectnames[i].d_value == subject)
190			return (subjectnames[i].d_name);
191	}
192
193	panic("rctl_subject_type_name: unknown subject type %d", subject);
194}
195
196static const char *
197rctl_action_name(int action)
198{
199	int i;
200
201	for (i = 0; actionnames[i].d_name != NULL; i++) {
202		if (actionnames[i].d_value == action)
203			return (actionnames[i].d_name);
204	}
205
206	panic("rctl_action_name: unknown action %d", action);
207}
208
209const char *
210rctl_resource_name(int resource)
211{
212	int i;
213
214	for (i = 0; resourcenames[i].d_name != NULL; i++) {
215		if (resourcenames[i].d_value == resource)
216			return (resourcenames[i].d_name);
217	}
218
219	panic("rctl_resource_name: unknown resource %d", resource);
220}
221
222/*
223 * Return the amount of resource that can be allocated by 'p' before
224 * hitting 'rule'.
225 */
226static int64_t
227rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
228{
229	int resource;
230	int64_t available = INT64_MAX;
231	struct ucred *cred = p->p_ucred;
232
233	ASSERT_RACCT_ENABLED();
234	rw_assert(&rctl_lock, RA_LOCKED);
235
236	resource = rule->rr_resource;
237	switch (rule->rr_per) {
238	case RCTL_SUBJECT_TYPE_PROCESS:
239		available = rule->rr_amount -
240		    p->p_racct->r_resources[resource];
241		break;
242	case RCTL_SUBJECT_TYPE_USER:
243		available = rule->rr_amount -
244		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
245		break;
246	case RCTL_SUBJECT_TYPE_LOGINCLASS:
247		available = rule->rr_amount -
248		    cred->cr_loginclass->lc_racct->r_resources[resource];
249		break;
250	case RCTL_SUBJECT_TYPE_JAIL:
251		available = rule->rr_amount -
252		    cred->cr_prison->pr_prison_racct->prr_racct->
253		        r_resources[resource];
254		break;
255	default:
256		panic("rctl_compute_available: unknown per %d",
257		    rule->rr_per);
258	}
259
260	return (available);
261}
262
263/*
264 * Return non-zero if allocating 'amount' by proc 'p' would exceed
265 * resource limit specified by 'rule'.
266 */
267static int
268rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
269    int64_t amount)
270{
271	int64_t available;
272
273	ASSERT_RACCT_ENABLED();
274
275	rw_assert(&rctl_lock, RA_LOCKED);
276
277	available = rctl_available_resource(p, rule);
278	if (available >= amount)
279		return (0);
280
281	return (1);
282}
283
284/*
285 * Special version of rctl_available() function for the %cpu resource.
286 * We slightly cheat here and return less than we normally would.
287 */
288int64_t
289rctl_pcpu_available(const struct proc *p) {
290	struct rctl_rule *rule;
291	struct rctl_rule_link *link;
292	int64_t available, minavailable, limit;
293
294	ASSERT_RACCT_ENABLED();
295
296	minavailable = INT64_MAX;
297	limit = 0;
298
299	rw_rlock(&rctl_lock);
300
301	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
302		rule = link->rrl_rule;
303		if (rule->rr_resource != RACCT_PCTCPU)
304			continue;
305		if (rule->rr_action != RCTL_ACTION_DENY)
306			continue;
307		available = rctl_available_resource(p, rule);
308		if (available < minavailable) {
309			minavailable = available;
310			limit = rule->rr_amount;
311		}
312	}
313
314	rw_runlock(&rctl_lock);
315
316	/*
317	 * Return slightly less than actual value of the available
318	 * %cpu resource.  This makes %cpu throttling more agressive
319	 * and lets us act sooner than the limits are already exceeded.
320	 */
321	if (limit != 0) {
322		if (limit > 2 * RCTL_PCPU_SHIFT)
323			minavailable -= RCTL_PCPU_SHIFT;
324		else
325			minavailable -= (limit / 2);
326	}
327
328	return (minavailable);
329}
330
331/*
332 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
333 * to what it keeps allocated now.  Returns non-zero if the allocation should
334 * be denied, 0 otherwise.
335 */
336int
337rctl_enforce(struct proc *p, int resource, uint64_t amount)
338{
339	struct rctl_rule *rule;
340	struct rctl_rule_link *link;
341	struct sbuf sb;
342	int should_deny = 0;
343	char *buf;
344	static int curtime = 0;
345	static struct timeval lasttime;
346
347	ASSERT_RACCT_ENABLED();
348
349	rw_rlock(&rctl_lock);
350
351	/*
352	 * There may be more than one matching rule; go through all of them.
353	 * Denial should be done last, after logging and sending signals.
354	 */
355	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
356		rule = link->rrl_rule;
357		if (rule->rr_resource != resource)
358			continue;
359		if (!rctl_would_exceed(p, rule, amount)) {
360			link->rrl_exceeded = 0;
361			continue;
362		}
363
364		switch (rule->rr_action) {
365		case RCTL_ACTION_DENY:
366			should_deny = 1;
367			continue;
368		case RCTL_ACTION_LOG:
369			/*
370			 * If rrl_exceeded != 0, it means we've already
371			 * logged a warning for this process.
372			 */
373			if (link->rrl_exceeded != 0)
374				continue;
375
376			/*
377			 * If the process state is not fully initialized yet,
378			 * we can't access most of the required fields, e.g.
379			 * p->p_comm.  This happens when called from fork1().
380			 * Ignore this rule for now; it will be processed just
381			 * after fork, when called from racct_proc_fork_done().
382			 */
383			if (p->p_state != PRS_NORMAL)
384				continue;
385
386			if (!ppsratecheck(&lasttime, &curtime, 10))
387				continue;
388
389			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
390			if (buf == NULL) {
391				printf("rctl_enforce: out of memory\n");
392				continue;
393			}
394			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
395			rctl_rule_to_sbuf(&sb, rule);
396			sbuf_finish(&sb);
397			printf("rctl: rule \"%s\" matched by pid %d "
398			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
399			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
400			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
401			sbuf_delete(&sb);
402			free(buf, M_RCTL);
403			link->rrl_exceeded = 1;
404			continue;
405		case RCTL_ACTION_DEVCTL:
406			if (link->rrl_exceeded != 0)
407				continue;
408
409			if (p->p_state != PRS_NORMAL)
410				continue;
411
412			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
413			if (buf == NULL) {
414				printf("rctl_enforce: out of memory\n");
415				continue;
416			}
417			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
418			sbuf_printf(&sb, "rule=");
419			rctl_rule_to_sbuf(&sb, rule);
420			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
421			    p->p_pid, p->p_ucred->cr_ruid,
422			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
423			sbuf_finish(&sb);
424			devctl_notify_f("RCTL", "rule", "matched",
425			    sbuf_data(&sb), M_NOWAIT);
426			sbuf_delete(&sb);
427			free(buf, M_RCTL);
428			link->rrl_exceeded = 1;
429			continue;
430		default:
431			if (link->rrl_exceeded != 0)
432				continue;
433
434			if (p->p_state != PRS_NORMAL)
435				continue;
436
437			KASSERT(rule->rr_action > 0 &&
438			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
439			    ("rctl_enforce: unknown action %d",
440			     rule->rr_action));
441
442			/*
443			 * We're using the fact that RCTL_ACTION_SIG* values
444			 * are equal to their counterparts from sys/signal.h.
445			 */
446			kern_psignal(p, rule->rr_action);
447			link->rrl_exceeded = 1;
448			continue;
449		}
450	}
451
452	rw_runlock(&rctl_lock);
453
454	if (should_deny) {
455		/*
456		 * Return fake error code; the caller should change it
457		 * into one proper for the situation - EFSIZ, ENOMEM etc.
458		 */
459		return (EDOOFUS);
460	}
461
462	return (0);
463}
464
465uint64_t
466rctl_get_limit(struct proc *p, int resource)
467{
468	struct rctl_rule *rule;
469	struct rctl_rule_link *link;
470	uint64_t amount = UINT64_MAX;
471
472	ASSERT_RACCT_ENABLED();
473
474	rw_rlock(&rctl_lock);
475
476	/*
477	 * There may be more than one matching rule; go through all of them.
478	 * Denial should be done last, after logging and sending signals.
479	 */
480	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
481		rule = link->rrl_rule;
482		if (rule->rr_resource != resource)
483			continue;
484		if (rule->rr_action != RCTL_ACTION_DENY)
485			continue;
486		if (rule->rr_amount < amount)
487			amount = rule->rr_amount;
488	}
489
490	rw_runlock(&rctl_lock);
491
492	return (amount);
493}
494
495uint64_t
496rctl_get_available(struct proc *p, int resource)
497{
498	struct rctl_rule *rule;
499	struct rctl_rule_link *link;
500	int64_t available, minavailable, allocated;
501
502	minavailable = INT64_MAX;
503
504	ASSERT_RACCT_ENABLED();
505
506	rw_rlock(&rctl_lock);
507
508	/*
509	 * There may be more than one matching rule; go through all of them.
510	 * Denial should be done last, after logging and sending signals.
511	 */
512	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
513		rule = link->rrl_rule;
514		if (rule->rr_resource != resource)
515			continue;
516		if (rule->rr_action != RCTL_ACTION_DENY)
517			continue;
518		available = rctl_available_resource(p, rule);
519		if (available < minavailable)
520			minavailable = available;
521	}
522
523	rw_runlock(&rctl_lock);
524
525	/*
526	 * XXX: Think about this _hard_.
527	 */
528	allocated = p->p_racct->r_resources[resource];
529	if (minavailable < INT64_MAX - allocated)
530		minavailable += allocated;
531	if (minavailable < 0)
532		minavailable = 0;
533	return (minavailable);
534}
535
536static int
537rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
538{
539
540	ASSERT_RACCT_ENABLED();
541
542	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
543		if (rule->rr_subject_type != filter->rr_subject_type)
544			return (0);
545
546		switch (filter->rr_subject_type) {
547		case RCTL_SUBJECT_TYPE_PROCESS:
548			if (filter->rr_subject.rs_proc != NULL &&
549			    rule->rr_subject.rs_proc !=
550			    filter->rr_subject.rs_proc)
551				return (0);
552			break;
553		case RCTL_SUBJECT_TYPE_USER:
554			if (filter->rr_subject.rs_uip != NULL &&
555			    rule->rr_subject.rs_uip !=
556			    filter->rr_subject.rs_uip)
557				return (0);
558			break;
559		case RCTL_SUBJECT_TYPE_LOGINCLASS:
560			if (filter->rr_subject.rs_loginclass != NULL &&
561			    rule->rr_subject.rs_loginclass !=
562			    filter->rr_subject.rs_loginclass)
563				return (0);
564			break;
565		case RCTL_SUBJECT_TYPE_JAIL:
566			if (filter->rr_subject.rs_prison_racct != NULL &&
567			    rule->rr_subject.rs_prison_racct !=
568			    filter->rr_subject.rs_prison_racct)
569				return (0);
570			break;
571		default:
572			panic("rctl_rule_matches: unknown subject type %d",
573			    filter->rr_subject_type);
574		}
575	}
576
577	if (filter->rr_resource != RACCT_UNDEFINED) {
578		if (rule->rr_resource != filter->rr_resource)
579			return (0);
580	}
581
582	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
583		if (rule->rr_action != filter->rr_action)
584			return (0);
585	}
586
587	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
588		if (rule->rr_amount != filter->rr_amount)
589			return (0);
590	}
591
592	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
593		if (rule->rr_per != filter->rr_per)
594			return (0);
595	}
596
597	return (1);
598}
599
600static int
601str2value(const char *str, int *value, struct dict *table)
602{
603	int i;
604
605	if (value == NULL)
606		return (EINVAL);
607
608	for (i = 0; table[i].d_name != NULL; i++) {
609		if (strcasecmp(table[i].d_name, str) == 0) {
610			*value =  table[i].d_value;
611			return (0);
612		}
613	}
614
615	return (EINVAL);
616}
617
618static int
619str2id(const char *str, id_t *value)
620{
621	char *end;
622
623	if (str == NULL)
624		return (EINVAL);
625
626	*value = strtoul(str, &end, 10);
627	if ((size_t)(end - str) != strlen(str))
628		return (EINVAL);
629
630	return (0);
631}
632
633static int
634str2int64(const char *str, int64_t *value)
635{
636	char *end;
637
638	if (str == NULL)
639		return (EINVAL);
640
641	*value = strtoul(str, &end, 10);
642	if ((size_t)(end - str) != strlen(str))
643		return (EINVAL);
644
645	return (0);
646}
647
648/*
649 * Connect the rule to the racct, increasing refcount for the rule.
650 */
651static void
652rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
653{
654	struct rctl_rule_link *link;
655
656	ASSERT_RACCT_ENABLED();
657	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
658
659	rctl_rule_acquire(rule);
660	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
661	link->rrl_rule = rule;
662	link->rrl_exceeded = 0;
663
664	rw_wlock(&rctl_lock);
665	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
666	rw_wunlock(&rctl_lock);
667}
668
669static int
670rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
671{
672	struct rctl_rule_link *link;
673
674	ASSERT_RACCT_ENABLED();
675	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
676	rw_assert(&rctl_lock, RA_WLOCKED);
677
678	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
679	if (link == NULL)
680		return (ENOMEM);
681	rctl_rule_acquire(rule);
682	link->rrl_rule = rule;
683	link->rrl_exceeded = 0;
684
685	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
686	return (0);
687}
688
689/*
690 * Remove limits for a rules matching the filter and release
691 * the refcounts for the rules, possibly freeing them.  Returns
692 * the number of limit structures removed.
693 */
694static int
695rctl_racct_remove_rules(struct racct *racct,
696    const struct rctl_rule *filter)
697{
698	int removed = 0;
699	struct rctl_rule_link *link, *linktmp;
700
701	ASSERT_RACCT_ENABLED();
702	rw_assert(&rctl_lock, RA_WLOCKED);
703
704	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
705		if (!rctl_rule_matches(link->rrl_rule, filter))
706			continue;
707
708		LIST_REMOVE(link, rrl_next);
709		rctl_rule_release(link->rrl_rule);
710		uma_zfree(rctl_rule_link_zone, link);
711		removed++;
712	}
713	return (removed);
714}
715
716static void
717rctl_rule_acquire_subject(struct rctl_rule *rule)
718{
719
720	ASSERT_RACCT_ENABLED();
721
722	switch (rule->rr_subject_type) {
723	case RCTL_SUBJECT_TYPE_UNDEFINED:
724	case RCTL_SUBJECT_TYPE_PROCESS:
725		break;
726	case RCTL_SUBJECT_TYPE_JAIL:
727		if (rule->rr_subject.rs_prison_racct != NULL)
728			prison_racct_hold(rule->rr_subject.rs_prison_racct);
729		break;
730	case RCTL_SUBJECT_TYPE_USER:
731		if (rule->rr_subject.rs_uip != NULL)
732			uihold(rule->rr_subject.rs_uip);
733		break;
734	case RCTL_SUBJECT_TYPE_LOGINCLASS:
735		if (rule->rr_subject.rs_loginclass != NULL)
736			loginclass_hold(rule->rr_subject.rs_loginclass);
737		break;
738	default:
739		panic("rctl_rule_acquire_subject: unknown subject type %d",
740		    rule->rr_subject_type);
741	}
742}
743
744static void
745rctl_rule_release_subject(struct rctl_rule *rule)
746{
747
748	ASSERT_RACCT_ENABLED();
749
750	switch (rule->rr_subject_type) {
751	case RCTL_SUBJECT_TYPE_UNDEFINED:
752	case RCTL_SUBJECT_TYPE_PROCESS:
753		break;
754	case RCTL_SUBJECT_TYPE_JAIL:
755		if (rule->rr_subject.rs_prison_racct != NULL)
756			prison_racct_free(rule->rr_subject.rs_prison_racct);
757		break;
758	case RCTL_SUBJECT_TYPE_USER:
759		if (rule->rr_subject.rs_uip != NULL)
760			uifree(rule->rr_subject.rs_uip);
761		break;
762	case RCTL_SUBJECT_TYPE_LOGINCLASS:
763		if (rule->rr_subject.rs_loginclass != NULL)
764			loginclass_free(rule->rr_subject.rs_loginclass);
765		break;
766	default:
767		panic("rctl_rule_release_subject: unknown subject type %d",
768		    rule->rr_subject_type);
769	}
770}
771
772struct rctl_rule *
773rctl_rule_alloc(int flags)
774{
775	struct rctl_rule *rule;
776
777	ASSERT_RACCT_ENABLED();
778
779	rule = uma_zalloc(rctl_rule_zone, flags);
780	if (rule == NULL)
781		return (NULL);
782	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
783	rule->rr_subject.rs_proc = NULL;
784	rule->rr_subject.rs_uip = NULL;
785	rule->rr_subject.rs_loginclass = NULL;
786	rule->rr_subject.rs_prison_racct = NULL;
787	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
788	rule->rr_resource = RACCT_UNDEFINED;
789	rule->rr_action = RCTL_ACTION_UNDEFINED;
790	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
791	refcount_init(&rule->rr_refcount, 1);
792
793	return (rule);
794}
795
796struct rctl_rule *
797rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
798{
799	struct rctl_rule *copy;
800
801	ASSERT_RACCT_ENABLED();
802
803	copy = uma_zalloc(rctl_rule_zone, flags);
804	if (copy == NULL)
805		return (NULL);
806	copy->rr_subject_type = rule->rr_subject_type;
807	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
808	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
809	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
810	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
811	copy->rr_per = rule->rr_per;
812	copy->rr_resource = rule->rr_resource;
813	copy->rr_action = rule->rr_action;
814	copy->rr_amount = rule->rr_amount;
815	refcount_init(&copy->rr_refcount, 1);
816	rctl_rule_acquire_subject(copy);
817
818	return (copy);
819}
820
821void
822rctl_rule_acquire(struct rctl_rule *rule)
823{
824
825	ASSERT_RACCT_ENABLED();
826	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
827
828	refcount_acquire(&rule->rr_refcount);
829}
830
831static void
832rctl_rule_free(void *context, int pending)
833{
834	struct rctl_rule *rule;
835
836	rule = (struct rctl_rule *)context;
837
838	ASSERT_RACCT_ENABLED();
839	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
840
841	/*
842	 * We don't need locking here; rule is guaranteed to be inaccessible.
843	 */
844
845	rctl_rule_release_subject(rule);
846	uma_zfree(rctl_rule_zone, rule);
847}
848
849void
850rctl_rule_release(struct rctl_rule *rule)
851{
852
853	ASSERT_RACCT_ENABLED();
854	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
855
856	if (refcount_release(&rule->rr_refcount)) {
857		/*
858		 * rctl_rule_release() is often called when iterating
859		 * over all the uidinfo structures in the system,
860		 * holding uihashtbl_lock.  Since rctl_rule_free()
861		 * might end up calling uifree(), this would lead
862		 * to lock recursion.  Use taskqueue to avoid this.
863		 */
864		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
865		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
866	}
867}
868
869static int
870rctl_rule_fully_specified(const struct rctl_rule *rule)
871{
872
873	ASSERT_RACCT_ENABLED();
874
875	switch (rule->rr_subject_type) {
876	case RCTL_SUBJECT_TYPE_UNDEFINED:
877		return (0);
878	case RCTL_SUBJECT_TYPE_PROCESS:
879		if (rule->rr_subject.rs_proc == NULL)
880			return (0);
881		break;
882	case RCTL_SUBJECT_TYPE_USER:
883		if (rule->rr_subject.rs_uip == NULL)
884			return (0);
885		break;
886	case RCTL_SUBJECT_TYPE_LOGINCLASS:
887		if (rule->rr_subject.rs_loginclass == NULL)
888			return (0);
889		break;
890	case RCTL_SUBJECT_TYPE_JAIL:
891		if (rule->rr_subject.rs_prison_racct == NULL)
892			return (0);
893		break;
894	default:
895		panic("rctl_rule_fully_specified: unknown subject type %d",
896		    rule->rr_subject_type);
897	}
898	if (rule->rr_resource == RACCT_UNDEFINED)
899		return (0);
900	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
901		return (0);
902	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
903		return (0);
904	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
905		return (0);
906
907	return (1);
908}
909
910static int
911rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
912{
913	int error = 0;
914	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
915	     *amountstr, *perstr;
916	struct rctl_rule *rule;
917	id_t id;
918
919	ASSERT_RACCT_ENABLED();
920
921	rule = rctl_rule_alloc(M_WAITOK);
922
923	subjectstr = strsep(&rulestr, ":");
924	subject_idstr = strsep(&rulestr, ":");
925	resourcestr = strsep(&rulestr, ":");
926	actionstr = strsep(&rulestr, "=/");
927	amountstr = strsep(&rulestr, "/");
928	perstr = rulestr;
929
930	if (subjectstr == NULL || subjectstr[0] == '\0')
931		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
932	else {
933		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
934		if (error != 0)
935			goto out;
936	}
937
938	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
939		rule->rr_subject.rs_proc = NULL;
940		rule->rr_subject.rs_uip = NULL;
941		rule->rr_subject.rs_loginclass = NULL;
942		rule->rr_subject.rs_prison_racct = NULL;
943	} else {
944		switch (rule->rr_subject_type) {
945		case RCTL_SUBJECT_TYPE_UNDEFINED:
946			error = EINVAL;
947			goto out;
948		case RCTL_SUBJECT_TYPE_PROCESS:
949			error = str2id(subject_idstr, &id);
950			if (error != 0)
951				goto out;
952			sx_assert(&allproc_lock, SA_LOCKED);
953			rule->rr_subject.rs_proc = pfind(id);
954			if (rule->rr_subject.rs_proc == NULL) {
955				error = ESRCH;
956				goto out;
957			}
958			PROC_UNLOCK(rule->rr_subject.rs_proc);
959			break;
960		case RCTL_SUBJECT_TYPE_USER:
961			error = str2id(subject_idstr, &id);
962			if (error != 0)
963				goto out;
964			rule->rr_subject.rs_uip = uifind(id);
965			break;
966		case RCTL_SUBJECT_TYPE_LOGINCLASS:
967			rule->rr_subject.rs_loginclass =
968			    loginclass_find(subject_idstr);
969			if (rule->rr_subject.rs_loginclass == NULL) {
970				error = ENAMETOOLONG;
971				goto out;
972			}
973			break;
974		case RCTL_SUBJECT_TYPE_JAIL:
975			rule->rr_subject.rs_prison_racct =
976			    prison_racct_find(subject_idstr);
977			if (rule->rr_subject.rs_prison_racct == NULL) {
978				error = ENAMETOOLONG;
979				goto out;
980			}
981			break;
982               default:
983                       panic("rctl_string_to_rule: unknown subject type %d",
984                           rule->rr_subject_type);
985               }
986	}
987
988	if (resourcestr == NULL || resourcestr[0] == '\0')
989		rule->rr_resource = RACCT_UNDEFINED;
990	else {
991		error = str2value(resourcestr, &rule->rr_resource,
992		    resourcenames);
993		if (error != 0)
994			goto out;
995	}
996
997	if (actionstr == NULL || actionstr[0] == '\0')
998		rule->rr_action = RCTL_ACTION_UNDEFINED;
999	else {
1000		error = str2value(actionstr, &rule->rr_action, actionnames);
1001		if (error != 0)
1002			goto out;
1003	}
1004
1005	if (amountstr == NULL || amountstr[0] == '\0')
1006		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1007	else {
1008		error = str2int64(amountstr, &rule->rr_amount);
1009		if (error != 0)
1010			goto out;
1011		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
1012			rule->rr_amount *= 1000000;
1013	}
1014
1015	if (perstr == NULL || perstr[0] == '\0')
1016		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1017	else {
1018		error = str2value(perstr, &rule->rr_per, subjectnames);
1019		if (error != 0)
1020			goto out;
1021	}
1022
1023out:
1024	if (error == 0)
1025		*rulep = rule;
1026	else
1027		rctl_rule_release(rule);
1028
1029	return (error);
1030}
1031
1032/*
1033 * Link a rule with all the subjects it applies to.
1034 */
1035int
1036rctl_rule_add(struct rctl_rule *rule)
1037{
1038	struct proc *p;
1039	struct ucred *cred;
1040	struct uidinfo *uip;
1041	struct prison *pr;
1042	struct prison_racct *prr;
1043	struct loginclass *lc;
1044	struct rctl_rule *rule2;
1045	int match;
1046
1047	ASSERT_RACCT_ENABLED();
1048	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1049
1050	/*
1051	 * Some rules just don't make sense.  Note that the one below
1052	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
1053	 * for example, is not deniable in the racct sense, but the
1054	 * limit is enforced in a different way, so "deny" rules for %CPU
1055	 * do make sense.
1056	 */
1057	if (rule->rr_action == RCTL_ACTION_DENY &&
1058	    (rule->rr_resource == RACCT_CPU ||
1059	    rule->rr_resource == RACCT_WALLCLOCK))
1060		return (EOPNOTSUPP);
1061
1062	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1063	    RACCT_IS_SLOPPY(rule->rr_resource))
1064		return (EOPNOTSUPP);
1065
1066	/*
1067	 * Make sure there are no duplicated rules.  Also, for the "deny"
1068	 * rules, remove ones differing only by "amount".
1069	 */
1070	if (rule->rr_action == RCTL_ACTION_DENY) {
1071		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1072		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1073		rctl_rule_remove(rule2);
1074		rctl_rule_release(rule2);
1075	} else
1076		rctl_rule_remove(rule);
1077
1078	switch (rule->rr_subject_type) {
1079	case RCTL_SUBJECT_TYPE_PROCESS:
1080		p = rule->rr_subject.rs_proc;
1081		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1082
1083		rctl_racct_add_rule(p->p_racct, rule);
1084		/*
1085		 * In case of per-process rule, we don't have anything more
1086		 * to do.
1087		 */
1088		return (0);
1089
1090	case RCTL_SUBJECT_TYPE_USER:
1091		uip = rule->rr_subject.rs_uip;
1092		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1093		rctl_racct_add_rule(uip->ui_racct, rule);
1094		break;
1095
1096	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1097		lc = rule->rr_subject.rs_loginclass;
1098		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1099		rctl_racct_add_rule(lc->lc_racct, rule);
1100		break;
1101
1102	case RCTL_SUBJECT_TYPE_JAIL:
1103		prr = rule->rr_subject.rs_prison_racct;
1104		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1105		rctl_racct_add_rule(prr->prr_racct, rule);
1106		break;
1107
1108	default:
1109		panic("rctl_rule_add: unknown subject type %d",
1110		    rule->rr_subject_type);
1111	}
1112
1113	/*
1114	 * Now go through all the processes and add the new rule to the ones
1115	 * it applies to.
1116	 */
1117	sx_assert(&allproc_lock, SA_LOCKED);
1118	FOREACH_PROC_IN_SYSTEM(p) {
1119		cred = p->p_ucred;
1120		switch (rule->rr_subject_type) {
1121		case RCTL_SUBJECT_TYPE_USER:
1122			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1123			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1124				break;
1125			continue;
1126		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1127			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1128				break;
1129			continue;
1130		case RCTL_SUBJECT_TYPE_JAIL:
1131			match = 0;
1132			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1133				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1134					match = 1;
1135					break;
1136				}
1137			}
1138			if (match)
1139				break;
1140			continue;
1141		default:
1142			panic("rctl_rule_add: unknown subject type %d",
1143			    rule->rr_subject_type);
1144		}
1145
1146		rctl_racct_add_rule(p->p_racct, rule);
1147	}
1148
1149	return (0);
1150}
1151
1152static void
1153rctl_rule_pre_callback(void)
1154{
1155
1156	rw_wlock(&rctl_lock);
1157}
1158
1159static void
1160rctl_rule_post_callback(void)
1161{
1162
1163	rw_wunlock(&rctl_lock);
1164}
1165
1166static void
1167rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1168{
1169	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1170	int found = 0;
1171
1172	ASSERT_RACCT_ENABLED();
1173	rw_assert(&rctl_lock, RA_WLOCKED);
1174
1175	found += rctl_racct_remove_rules(racct, filter);
1176
1177	*((int *)arg3) += found;
1178}
1179
1180/*
1181 * Remove all rules that match the filter.
1182 */
1183int
1184rctl_rule_remove(struct rctl_rule *filter)
1185{
1186	int found = 0;
1187	struct proc *p;
1188
1189	ASSERT_RACCT_ENABLED();
1190
1191	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1192	    filter->rr_subject.rs_proc != NULL) {
1193		p = filter->rr_subject.rs_proc;
1194		rw_wlock(&rctl_lock);
1195		found = rctl_racct_remove_rules(p->p_racct, filter);
1196		rw_wunlock(&rctl_lock);
1197		if (found)
1198			return (0);
1199		return (ESRCH);
1200	}
1201
1202	loginclass_racct_foreach(rctl_rule_remove_callback,
1203	    rctl_rule_pre_callback, rctl_rule_post_callback,
1204	    filter, (void *)&found);
1205	ui_racct_foreach(rctl_rule_remove_callback,
1206	    rctl_rule_pre_callback, rctl_rule_post_callback,
1207	    filter, (void *)&found);
1208	prison_racct_foreach(rctl_rule_remove_callback,
1209	    rctl_rule_pre_callback, rctl_rule_post_callback,
1210	    filter, (void *)&found);
1211
1212	sx_assert(&allproc_lock, SA_LOCKED);
1213	rw_wlock(&rctl_lock);
1214	FOREACH_PROC_IN_SYSTEM(p) {
1215		found += rctl_racct_remove_rules(p->p_racct, filter);
1216	}
1217	rw_wunlock(&rctl_lock);
1218
1219	if (found)
1220		return (0);
1221	return (ESRCH);
1222}
1223
1224/*
1225 * Appends a rule to the sbuf.
1226 */
1227static void
1228rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1229{
1230	int64_t amount;
1231
1232	ASSERT_RACCT_ENABLED();
1233
1234	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1235
1236	switch (rule->rr_subject_type) {
1237	case RCTL_SUBJECT_TYPE_PROCESS:
1238		if (rule->rr_subject.rs_proc == NULL)
1239			sbuf_printf(sb, ":");
1240		else
1241			sbuf_printf(sb, "%d:",
1242			    rule->rr_subject.rs_proc->p_pid);
1243		break;
1244	case RCTL_SUBJECT_TYPE_USER:
1245		if (rule->rr_subject.rs_uip == NULL)
1246			sbuf_printf(sb, ":");
1247		else
1248			sbuf_printf(sb, "%d:",
1249			    rule->rr_subject.rs_uip->ui_uid);
1250		break;
1251	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1252		if (rule->rr_subject.rs_loginclass == NULL)
1253			sbuf_printf(sb, ":");
1254		else
1255			sbuf_printf(sb, "%s:",
1256			    rule->rr_subject.rs_loginclass->lc_name);
1257		break;
1258	case RCTL_SUBJECT_TYPE_JAIL:
1259		if (rule->rr_subject.rs_prison_racct == NULL)
1260			sbuf_printf(sb, ":");
1261		else
1262			sbuf_printf(sb, "%s:",
1263			    rule->rr_subject.rs_prison_racct->prr_name);
1264		break;
1265	default:
1266		panic("rctl_rule_to_sbuf: unknown subject type %d",
1267		    rule->rr_subject_type);
1268	}
1269
1270	amount = rule->rr_amount;
1271	if (amount != RCTL_AMOUNT_UNDEFINED &&
1272	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1273		amount /= 1000000;
1274
1275	sbuf_printf(sb, "%s:%s=%jd",
1276	    rctl_resource_name(rule->rr_resource),
1277	    rctl_action_name(rule->rr_action),
1278	    amount);
1279
1280	if (rule->rr_per != rule->rr_subject_type)
1281		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1282}
1283
1284/*
1285 * Routine used by RCTL syscalls to read in input string.
1286 */
1287static int
1288rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1289{
1290	int error;
1291	char *str;
1292
1293	ASSERT_RACCT_ENABLED();
1294
1295	if (inbuflen <= 0)
1296		return (EINVAL);
1297	if (inbuflen > RCTL_MAX_INBUFSIZE)
1298		return (E2BIG);
1299
1300	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1301	error = copyinstr(inbufp, str, inbuflen, NULL);
1302	if (error != 0) {
1303		free(str, M_RCTL);
1304		return (error);
1305	}
1306
1307	*inputstr = str;
1308
1309	return (0);
1310}
1311
1312/*
1313 * Routine used by RCTL syscalls to write out output string.
1314 */
1315static int
1316rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1317{
1318	int error;
1319
1320	ASSERT_RACCT_ENABLED();
1321
1322	if (outputsbuf == NULL)
1323		return (0);
1324
1325	sbuf_finish(outputsbuf);
1326	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1327		sbuf_delete(outputsbuf);
1328		return (ERANGE);
1329	}
1330	error = copyout(sbuf_data(outputsbuf), outbufp,
1331	    sbuf_len(outputsbuf) + 1);
1332	sbuf_delete(outputsbuf);
1333	return (error);
1334}
1335
1336static struct sbuf *
1337rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1338{
1339	int i;
1340	int64_t amount;
1341	struct sbuf *sb;
1342
1343	ASSERT_RACCT_ENABLED();
1344
1345	sb = sbuf_new_auto();
1346	for (i = 0; i <= RACCT_MAX; i++) {
1347		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1348			continue;
1349		amount = racct->r_resources[i];
1350		if (RACCT_IS_IN_MILLIONS(i))
1351			amount /= 1000000;
1352		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1353	}
1354	sbuf_setpos(sb, sbuf_len(sb) - 1);
1355	return (sb);
1356}
1357
1358int
1359sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1360{
1361	int error;
1362	char *inputstr;
1363	struct rctl_rule *filter;
1364	struct sbuf *outputsbuf = NULL;
1365	struct proc *p;
1366	struct uidinfo *uip;
1367	struct loginclass *lc;
1368	struct prison_racct *prr;
1369
1370	if (!racct_enable)
1371		return (ENOSYS);
1372
1373	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1374	if (error != 0)
1375		return (error);
1376
1377	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1378	if (error != 0)
1379		return (error);
1380
1381	sx_slock(&allproc_lock);
1382	error = rctl_string_to_rule(inputstr, &filter);
1383	free(inputstr, M_RCTL);
1384	if (error != 0) {
1385		sx_sunlock(&allproc_lock);
1386		return (error);
1387	}
1388
1389	switch (filter->rr_subject_type) {
1390	case RCTL_SUBJECT_TYPE_PROCESS:
1391		p = filter->rr_subject.rs_proc;
1392		if (p == NULL) {
1393			error = EINVAL;
1394			goto out;
1395		}
1396		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1397		break;
1398	case RCTL_SUBJECT_TYPE_USER:
1399		uip = filter->rr_subject.rs_uip;
1400		if (uip == NULL) {
1401			error = EINVAL;
1402			goto out;
1403		}
1404		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1405		break;
1406	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1407		lc = filter->rr_subject.rs_loginclass;
1408		if (lc == NULL) {
1409			error = EINVAL;
1410			goto out;
1411		}
1412		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1413		break;
1414	case RCTL_SUBJECT_TYPE_JAIL:
1415		prr = filter->rr_subject.rs_prison_racct;
1416		if (prr == NULL) {
1417			error = EINVAL;
1418			goto out;
1419		}
1420		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1421		break;
1422	default:
1423		error = EINVAL;
1424	}
1425out:
1426	rctl_rule_release(filter);
1427	sx_sunlock(&allproc_lock);
1428	if (error != 0)
1429		return (error);
1430
1431	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1432
1433	return (error);
1434}
1435
1436static void
1437rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1438{
1439	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1440	struct rctl_rule_link *link;
1441	struct sbuf *sb = (struct sbuf *)arg3;
1442
1443	ASSERT_RACCT_ENABLED();
1444	rw_assert(&rctl_lock, RA_LOCKED);
1445
1446	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1447		if (!rctl_rule_matches(link->rrl_rule, filter))
1448			continue;
1449		rctl_rule_to_sbuf(sb, link->rrl_rule);
1450		sbuf_printf(sb, ",");
1451	}
1452}
1453
1454int
1455sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1456{
1457	int error;
1458	size_t bufsize;
1459	char *inputstr, *buf;
1460	struct sbuf *sb;
1461	struct rctl_rule *filter;
1462	struct rctl_rule_link *link;
1463	struct proc *p;
1464
1465	if (!racct_enable)
1466		return (ENOSYS);
1467
1468	error = priv_check(td, PRIV_RCTL_GET_RULES);
1469	if (error != 0)
1470		return (error);
1471
1472	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1473	if (error != 0)
1474		return (error);
1475
1476	sx_slock(&allproc_lock);
1477	error = rctl_string_to_rule(inputstr, &filter);
1478	free(inputstr, M_RCTL);
1479	if (error != 0) {
1480		sx_sunlock(&allproc_lock);
1481		return (error);
1482	}
1483
1484	bufsize = uap->outbuflen;
1485	if (bufsize > rctl_maxbufsize) {
1486		sx_sunlock(&allproc_lock);
1487		return (E2BIG);
1488	}
1489
1490	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1491	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1492	KASSERT(sb != NULL, ("sbuf_new failed"));
1493
1494	FOREACH_PROC_IN_SYSTEM(p) {
1495		rw_rlock(&rctl_lock);
1496		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1497			/*
1498			 * Non-process rules will be added to the buffer later.
1499			 * Adding them here would result in duplicated output.
1500			 */
1501			if (link->rrl_rule->rr_subject_type !=
1502			    RCTL_SUBJECT_TYPE_PROCESS)
1503				continue;
1504			if (!rctl_rule_matches(link->rrl_rule, filter))
1505				continue;
1506			rctl_rule_to_sbuf(sb, link->rrl_rule);
1507			sbuf_printf(sb, ",");
1508		}
1509		rw_runlock(&rctl_lock);
1510	}
1511
1512	loginclass_racct_foreach(rctl_get_rules_callback,
1513	    rctl_rule_pre_callback, rctl_rule_post_callback,
1514	    filter, sb);
1515	ui_racct_foreach(rctl_get_rules_callback,
1516	    rctl_rule_pre_callback, rctl_rule_post_callback,
1517	    filter, sb);
1518	prison_racct_foreach(rctl_get_rules_callback,
1519	    rctl_rule_pre_callback, rctl_rule_post_callback,
1520	    filter, sb);
1521	if (sbuf_error(sb) == ENOMEM) {
1522		error = ERANGE;
1523		goto out;
1524	}
1525
1526	/*
1527	 * Remove trailing ",".
1528	 */
1529	if (sbuf_len(sb) > 0)
1530		sbuf_setpos(sb, sbuf_len(sb) - 1);
1531
1532	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1533out:
1534	rctl_rule_release(filter);
1535	sx_sunlock(&allproc_lock);
1536	free(buf, M_RCTL);
1537	return (error);
1538}
1539
1540int
1541sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1542{
1543	int error;
1544	size_t bufsize;
1545	char *inputstr, *buf;
1546	struct sbuf *sb;
1547	struct rctl_rule *filter;
1548	struct rctl_rule_link *link;
1549
1550	if (!racct_enable)
1551		return (ENOSYS);
1552
1553	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1554	if (error != 0)
1555		return (error);
1556
1557	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1558	if (error != 0)
1559		return (error);
1560
1561	sx_slock(&allproc_lock);
1562	error = rctl_string_to_rule(inputstr, &filter);
1563	free(inputstr, M_RCTL);
1564	if (error != 0) {
1565		sx_sunlock(&allproc_lock);
1566		return (error);
1567	}
1568
1569	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1570		rctl_rule_release(filter);
1571		sx_sunlock(&allproc_lock);
1572		return (EINVAL);
1573	}
1574	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1575		rctl_rule_release(filter);
1576		sx_sunlock(&allproc_lock);
1577		return (EOPNOTSUPP);
1578	}
1579	if (filter->rr_subject.rs_proc == NULL) {
1580		rctl_rule_release(filter);
1581		sx_sunlock(&allproc_lock);
1582		return (EINVAL);
1583	}
1584
1585	bufsize = uap->outbuflen;
1586	if (bufsize > rctl_maxbufsize) {
1587		rctl_rule_release(filter);
1588		sx_sunlock(&allproc_lock);
1589		return (E2BIG);
1590	}
1591
1592	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1593	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1594	KASSERT(sb != NULL, ("sbuf_new failed"));
1595
1596	rw_rlock(&rctl_lock);
1597	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1598	    rrl_next) {
1599		rctl_rule_to_sbuf(sb, link->rrl_rule);
1600		sbuf_printf(sb, ",");
1601	}
1602	rw_runlock(&rctl_lock);
1603	if (sbuf_error(sb) == ENOMEM) {
1604		error = ERANGE;
1605		goto out;
1606	}
1607
1608	/*
1609	 * Remove trailing ",".
1610	 */
1611	if (sbuf_len(sb) > 0)
1612		sbuf_setpos(sb, sbuf_len(sb) - 1);
1613
1614	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1615out:
1616	rctl_rule_release(filter);
1617	sx_sunlock(&allproc_lock);
1618	free(buf, M_RCTL);
1619	return (error);
1620}
1621
1622int
1623sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1624{
1625	int error;
1626	struct rctl_rule *rule;
1627	char *inputstr;
1628
1629	if (!racct_enable)
1630		return (ENOSYS);
1631
1632	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1633	if (error != 0)
1634		return (error);
1635
1636	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1637	if (error != 0)
1638		return (error);
1639
1640	sx_slock(&allproc_lock);
1641	error = rctl_string_to_rule(inputstr, &rule);
1642	free(inputstr, M_RCTL);
1643	if (error != 0) {
1644		sx_sunlock(&allproc_lock);
1645		return (error);
1646	}
1647	/*
1648	 * The 'per' part of a rule is optional.
1649	 */
1650	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1651	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1652		rule->rr_per = rule->rr_subject_type;
1653
1654	if (!rctl_rule_fully_specified(rule)) {
1655		error = EINVAL;
1656		goto out;
1657	}
1658
1659	error = rctl_rule_add(rule);
1660
1661out:
1662	rctl_rule_release(rule);
1663	sx_sunlock(&allproc_lock);
1664	return (error);
1665}
1666
1667int
1668sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1669{
1670	int error;
1671	struct rctl_rule *filter;
1672	char *inputstr;
1673
1674	if (!racct_enable)
1675		return (ENOSYS);
1676
1677	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1678	if (error != 0)
1679		return (error);
1680
1681	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1682	if (error != 0)
1683		return (error);
1684
1685	sx_slock(&allproc_lock);
1686	error = rctl_string_to_rule(inputstr, &filter);
1687	free(inputstr, M_RCTL);
1688	if (error != 0) {
1689		sx_sunlock(&allproc_lock);
1690		return (error);
1691	}
1692
1693	error = rctl_rule_remove(filter);
1694	rctl_rule_release(filter);
1695	sx_sunlock(&allproc_lock);
1696
1697	return (error);
1698}
1699
1700/*
1701 * Update RCTL rule list after credential change.
1702 */
1703void
1704rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1705{
1706	int rulecnt, i;
1707	struct rctl_rule_link *link, *newlink;
1708	struct uidinfo *newuip;
1709	struct loginclass *newlc;
1710	struct prison_racct *newprr;
1711	LIST_HEAD(, rctl_rule_link) newrules;
1712
1713	ASSERT_RACCT_ENABLED();
1714
1715	newuip = newcred->cr_ruidinfo;
1716	newlc = newcred->cr_loginclass;
1717	newprr = newcred->cr_prison->pr_prison_racct;
1718
1719	LIST_INIT(&newrules);
1720
1721again:
1722	/*
1723	 * First, count the rules that apply to the process with new
1724	 * credentials.
1725	 */
1726	rulecnt = 0;
1727	rw_rlock(&rctl_lock);
1728	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1729		if (link->rrl_rule->rr_subject_type ==
1730		    RCTL_SUBJECT_TYPE_PROCESS)
1731			rulecnt++;
1732	}
1733	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1734		rulecnt++;
1735	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1736		rulecnt++;
1737	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1738		rulecnt++;
1739	rw_runlock(&rctl_lock);
1740
1741	/*
1742	 * Create temporary list.  We've dropped the rctl_lock in order
1743	 * to use M_WAITOK.
1744	 */
1745	for (i = 0; i < rulecnt; i++) {
1746		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1747		newlink->rrl_rule = NULL;
1748		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1749	}
1750
1751	newlink = LIST_FIRST(&newrules);
1752
1753	/*
1754	 * Assign rules to the newly allocated list entries.
1755	 */
1756	rw_wlock(&rctl_lock);
1757	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1758		if (link->rrl_rule->rr_subject_type ==
1759		    RCTL_SUBJECT_TYPE_PROCESS) {
1760			if (newlink == NULL)
1761				goto goaround;
1762			rctl_rule_acquire(link->rrl_rule);
1763			newlink->rrl_rule = link->rrl_rule;
1764			newlink = LIST_NEXT(newlink, rrl_next);
1765			rulecnt--;
1766		}
1767	}
1768
1769	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1770		if (newlink == NULL)
1771			goto goaround;
1772		rctl_rule_acquire(link->rrl_rule);
1773		newlink->rrl_rule = link->rrl_rule;
1774		newlink = LIST_NEXT(newlink, rrl_next);
1775		rulecnt--;
1776	}
1777
1778	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1779		if (newlink == NULL)
1780			goto goaround;
1781		rctl_rule_acquire(link->rrl_rule);
1782		newlink->rrl_rule = link->rrl_rule;
1783		newlink = LIST_NEXT(newlink, rrl_next);
1784		rulecnt--;
1785	}
1786
1787	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1788		if (newlink == NULL)
1789			goto goaround;
1790		rctl_rule_acquire(link->rrl_rule);
1791		newlink->rrl_rule = link->rrl_rule;
1792		newlink = LIST_NEXT(newlink, rrl_next);
1793		rulecnt--;
1794	}
1795
1796	if (rulecnt == 0) {
1797		/*
1798		 * Free the old rule list.
1799		 */
1800		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1801			link = LIST_FIRST(&p->p_racct->r_rule_links);
1802			LIST_REMOVE(link, rrl_next);
1803			rctl_rule_release(link->rrl_rule);
1804			uma_zfree(rctl_rule_link_zone, link);
1805		}
1806
1807		/*
1808		 * Replace lists and we're done.
1809		 *
1810		 * XXX: Is there any way to switch list heads instead
1811		 *      of iterating here?
1812		 */
1813		while (!LIST_EMPTY(&newrules)) {
1814			newlink = LIST_FIRST(&newrules);
1815			LIST_REMOVE(newlink, rrl_next);
1816			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1817			    newlink, rrl_next);
1818		}
1819
1820		rw_wunlock(&rctl_lock);
1821
1822		return;
1823	}
1824
1825goaround:
1826	rw_wunlock(&rctl_lock);
1827
1828	/*
1829	 * Rule list changed while we were not holding the rctl_lock.
1830	 * Free the new list and try again.
1831	 */
1832	while (!LIST_EMPTY(&newrules)) {
1833		newlink = LIST_FIRST(&newrules);
1834		LIST_REMOVE(newlink, rrl_next);
1835		if (newlink->rrl_rule != NULL)
1836			rctl_rule_release(newlink->rrl_rule);
1837		uma_zfree(rctl_rule_link_zone, newlink);
1838	}
1839
1840	goto again;
1841}
1842
1843/*
1844 * Assign RCTL rules to the newly created process.
1845 */
1846int
1847rctl_proc_fork(struct proc *parent, struct proc *child)
1848{
1849	int error;
1850	struct rctl_rule_link *link;
1851	struct rctl_rule *rule;
1852
1853	LIST_INIT(&child->p_racct->r_rule_links);
1854
1855	ASSERT_RACCT_ENABLED();
1856	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1857
1858	rw_wlock(&rctl_lock);
1859
1860	/*
1861	 * Go through limits applicable to the parent and assign them
1862	 * to the child.  Rules with 'process' subject have to be duplicated
1863	 * in order to make their rr_subject point to the new process.
1864	 */
1865	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1866		if (link->rrl_rule->rr_subject_type ==
1867		    RCTL_SUBJECT_TYPE_PROCESS) {
1868			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1869			if (rule == NULL)
1870				goto fail;
1871			KASSERT(rule->rr_subject.rs_proc == parent,
1872			    ("rule->rr_subject.rs_proc != parent"));
1873			rule->rr_subject.rs_proc = child;
1874			error = rctl_racct_add_rule_locked(child->p_racct,
1875			    rule);
1876			rctl_rule_release(rule);
1877			if (error != 0)
1878				goto fail;
1879		} else {
1880			error = rctl_racct_add_rule_locked(child->p_racct,
1881			    link->rrl_rule);
1882			if (error != 0)
1883				goto fail;
1884		}
1885	}
1886
1887	rw_wunlock(&rctl_lock);
1888	return (0);
1889
1890fail:
1891	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1892		link = LIST_FIRST(&child->p_racct->r_rule_links);
1893		LIST_REMOVE(link, rrl_next);
1894		rctl_rule_release(link->rrl_rule);
1895		uma_zfree(rctl_rule_link_zone, link);
1896	}
1897	rw_wunlock(&rctl_lock);
1898	return (EAGAIN);
1899}
1900
1901/*
1902 * Release rules attached to the racct.
1903 */
1904void
1905rctl_racct_release(struct racct *racct)
1906{
1907	struct rctl_rule_link *link;
1908
1909	ASSERT_RACCT_ENABLED();
1910
1911	rw_wlock(&rctl_lock);
1912	while (!LIST_EMPTY(&racct->r_rule_links)) {
1913		link = LIST_FIRST(&racct->r_rule_links);
1914		LIST_REMOVE(link, rrl_next);
1915		rctl_rule_release(link->rrl_rule);
1916		uma_zfree(rctl_rule_link_zone, link);
1917	}
1918	rw_wunlock(&rctl_lock);
1919}
1920
1921static void
1922rctl_init(void)
1923{
1924
1925	if (!racct_enable)
1926		return;
1927
1928	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1929	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1930	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1931	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1932	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1933}
1934
1935#else /* !RCTL */
1936
1937int
1938sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1939{
1940
1941	return (ENOSYS);
1942}
1943
1944int
1945sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1946{
1947
1948	return (ENOSYS);
1949}
1950
1951int
1952sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1953{
1954
1955	return (ENOSYS);
1956}
1957
1958int
1959sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1960{
1961
1962	return (ENOSYS);
1963}
1964
1965int
1966sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1967{
1968
1969	return (ENOSYS);
1970}
1971
1972#endif /* !RCTL */
1973