1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74/* Default buffer size for rctl_get_rules(2). */
75#define	RCTL_DEFAULT_BUFSIZE	4096
76#define	RCTL_MAX_INBUFLEN	4096
77#define	RCTL_LOG_BUFSIZE	128
78
79#define	RCTL_PCPU_SHIFT		(10 * 1000000)
80
81/*
82 * 'rctl_rule_link' connects a rule with every racct it's related to.
83 * For example, rule 'user:X:openfiles:deny=N/process' is linked
84 * with uidinfo for user X, and to each process of that user.
85 */
86struct rctl_rule_link {
87	LIST_ENTRY(rctl_rule_link)	rrl_next;
88	struct rctl_rule		*rrl_rule;
89	int				rrl_exceeded;
90};
91
92struct dict {
93	const char	*d_name;
94	int		d_value;
95};
96
97static struct dict subjectnames[] = {
98	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
99	{ "user", RCTL_SUBJECT_TYPE_USER },
100	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
101	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
102	{ NULL, -1 }};
103
104static struct dict resourcenames[] = {
105	{ "cputime", RACCT_CPU },
106	{ "datasize", RACCT_DATA },
107	{ "stacksize", RACCT_STACK },
108	{ "coredumpsize", RACCT_CORE },
109	{ "memoryuse", RACCT_RSS },
110	{ "memorylocked", RACCT_MEMLOCK },
111	{ "maxproc", RACCT_NPROC },
112	{ "openfiles", RACCT_NOFILE },
113	{ "vmemoryuse", RACCT_VMEM },
114	{ "pseudoterminals", RACCT_NPTS },
115	{ "swapuse", RACCT_SWAP },
116	{ "nthr", RACCT_NTHR },
117	{ "msgqqueued", RACCT_MSGQQUEUED },
118	{ "msgqsize", RACCT_MSGQSIZE },
119	{ "nmsgq", RACCT_NMSGQ },
120	{ "nsem", RACCT_NSEM },
121	{ "nsemop", RACCT_NSEMOP },
122	{ "nshm", RACCT_NSHM },
123	{ "shmsize", RACCT_SHMSIZE },
124	{ "wallclock", RACCT_WALLCLOCK },
125	{ "pcpu", RACCT_PCTCPU },
126	{ NULL, -1 }};
127
128static struct dict actionnames[] = {
129	{ "sighup", RCTL_ACTION_SIGHUP },
130	{ "sigint", RCTL_ACTION_SIGINT },
131	{ "sigquit", RCTL_ACTION_SIGQUIT },
132	{ "sigill", RCTL_ACTION_SIGILL },
133	{ "sigtrap", RCTL_ACTION_SIGTRAP },
134	{ "sigabrt", RCTL_ACTION_SIGABRT },
135	{ "sigemt", RCTL_ACTION_SIGEMT },
136	{ "sigfpe", RCTL_ACTION_SIGFPE },
137	{ "sigkill", RCTL_ACTION_SIGKILL },
138	{ "sigbus", RCTL_ACTION_SIGBUS },
139	{ "sigsegv", RCTL_ACTION_SIGSEGV },
140	{ "sigsys", RCTL_ACTION_SIGSYS },
141	{ "sigpipe", RCTL_ACTION_SIGPIPE },
142	{ "sigalrm", RCTL_ACTION_SIGALRM },
143	{ "sigterm", RCTL_ACTION_SIGTERM },
144	{ "sigurg", RCTL_ACTION_SIGURG },
145	{ "sigstop", RCTL_ACTION_SIGSTOP },
146	{ "sigtstp", RCTL_ACTION_SIGTSTP },
147	{ "sigchld", RCTL_ACTION_SIGCHLD },
148	{ "sigttin", RCTL_ACTION_SIGTTIN },
149	{ "sigttou", RCTL_ACTION_SIGTTOU },
150	{ "sigio", RCTL_ACTION_SIGIO },
151	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
152	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
153	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
154	{ "sigprof", RCTL_ACTION_SIGPROF },
155	{ "sigwinch", RCTL_ACTION_SIGWINCH },
156	{ "siginfo", RCTL_ACTION_SIGINFO },
157	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
158	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
159	{ "sigthr", RCTL_ACTION_SIGTHR },
160	{ "deny", RCTL_ACTION_DENY },
161	{ "log", RCTL_ACTION_LOG },
162	{ "devctl", RCTL_ACTION_DEVCTL },
163	{ NULL, -1 }};
164
165static void rctl_init(void);
166SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
167
168static uma_zone_t rctl_rule_link_zone;
169static uma_zone_t rctl_rule_zone;
170static struct rwlock rctl_lock;
171RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
172
173static int rctl_rule_fully_specified(const struct rctl_rule *rule);
174static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
175
176static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
177
178static const char *
179rctl_subject_type_name(int subject)
180{
181	int i;
182
183	for (i = 0; subjectnames[i].d_name != NULL; i++) {
184		if (subjectnames[i].d_value == subject)
185			return (subjectnames[i].d_name);
186	}
187
188	panic("rctl_subject_type_name: unknown subject type %d", subject);
189}
190
191static const char *
192rctl_action_name(int action)
193{
194	int i;
195
196	for (i = 0; actionnames[i].d_name != NULL; i++) {
197		if (actionnames[i].d_value == action)
198			return (actionnames[i].d_name);
199	}
200
201	panic("rctl_action_name: unknown action %d", action);
202}
203
204const char *
205rctl_resource_name(int resource)
206{
207	int i;
208
209	for (i = 0; resourcenames[i].d_name != NULL; i++) {
210		if (resourcenames[i].d_value == resource)
211			return (resourcenames[i].d_name);
212	}
213
214	panic("rctl_resource_name: unknown resource %d", resource);
215}
216
217/*
218 * Return the amount of resource that can be allocated by 'p' before
219 * hitting 'rule'.
220 */
221static int64_t
222rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
223{
224	int resource;
225	int64_t available = INT64_MAX;
226	struct ucred *cred = p->p_ucred;
227
228	rw_assert(&rctl_lock, RA_LOCKED);
229
230	resource = rule->rr_resource;
231	switch (rule->rr_per) {
232	case RCTL_SUBJECT_TYPE_PROCESS:
233		available = rule->rr_amount -
234		    p->p_racct->r_resources[resource];
235		break;
236	case RCTL_SUBJECT_TYPE_USER:
237		available = rule->rr_amount -
238		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
239		break;
240	case RCTL_SUBJECT_TYPE_LOGINCLASS:
241		available = rule->rr_amount -
242		    cred->cr_loginclass->lc_racct->r_resources[resource];
243		break;
244	case RCTL_SUBJECT_TYPE_JAIL:
245		available = rule->rr_amount -
246		    cred->cr_prison->pr_prison_racct->prr_racct->
247		        r_resources[resource];
248		break;
249	default:
250		panic("rctl_compute_available: unknown per %d",
251		    rule->rr_per);
252	}
253
254	return (available);
255}
256
257/*
258 * Return non-zero if allocating 'amount' by proc 'p' would exceed
259 * resource limit specified by 'rule'.
260 */
261static int
262rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
263    int64_t amount)
264{
265	int64_t available;
266
267	rw_assert(&rctl_lock, RA_LOCKED);
268
269	available = rctl_available_resource(p, rule);
270	if (available >= amount)
271		return (0);
272
273	return (1);
274}
275
276/*
277 * Special version of rctl_available() function for the %cpu resource.
278 * We slightly cheat here and return less than we normally would.
279 */
280int64_t
281rctl_pcpu_available(const struct proc *p) {
282	struct rctl_rule *rule;
283	struct rctl_rule_link *link;
284	int64_t available, minavailable, limit;
285
286	minavailable = INT64_MAX;
287	limit = 0;
288
289	rw_rlock(&rctl_lock);
290
291	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
292		rule = link->rrl_rule;
293		if (rule->rr_resource != RACCT_PCTCPU)
294			continue;
295		if (rule->rr_action != RCTL_ACTION_DENY)
296			continue;
297		available = rctl_available_resource(p, rule);
298		if (available < minavailable) {
299			minavailable = available;
300			limit = rule->rr_amount;
301		}
302	}
303
304	rw_runlock(&rctl_lock);
305
306	/*
307	 * Return slightly less than actual value of the available
308	 * %cpu resource.  This makes %cpu throttling more agressive
309	 * and lets us act sooner than the limits are already exceeded.
310	 */
311	if (limit != 0) {
312		if (limit > 2 * RCTL_PCPU_SHIFT)
313			minavailable -= RCTL_PCPU_SHIFT;
314		else
315			minavailable -= (limit / 2);
316	}
317
318	return (minavailable);
319}
320
321/*
322 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
323 * to what it keeps allocated now.  Returns non-zero if the allocation should
324 * be denied, 0 otherwise.
325 */
326int
327rctl_enforce(struct proc *p, int resource, uint64_t amount)
328{
329	struct rctl_rule *rule;
330	struct rctl_rule_link *link;
331	struct sbuf sb;
332	int should_deny = 0;
333	char *buf;
334	static int curtime = 0;
335	static struct timeval lasttime;
336
337	rw_rlock(&rctl_lock);
338
339	/*
340	 * There may be more than one matching rule; go through all of them.
341	 * Denial should be done last, after logging and sending signals.
342	 */
343	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
344		rule = link->rrl_rule;
345		if (rule->rr_resource != resource)
346			continue;
347		if (!rctl_would_exceed(p, rule, amount)) {
348			link->rrl_exceeded = 0;
349			continue;
350		}
351
352		switch (rule->rr_action) {
353		case RCTL_ACTION_DENY:
354			should_deny = 1;
355			continue;
356		case RCTL_ACTION_LOG:
357			/*
358			 * If rrl_exceeded != 0, it means we've already
359			 * logged a warning for this process.
360			 */
361			if (link->rrl_exceeded != 0)
362				continue;
363
364			/*
365			 * If the process state is not fully initialized yet,
366			 * we can't access most of the required fields, e.g.
367			 * p->p_comm.  This happens when called from fork1().
368			 * Ignore this rule for now; it will be processed just
369			 * after fork, when called from racct_proc_fork_done().
370			 */
371			if (p->p_state != PRS_NORMAL)
372				continue;
373
374			if (!ppsratecheck(&lasttime, &curtime, 10))
375				continue;
376
377			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
378			if (buf == NULL) {
379				printf("rctl_enforce: out of memory\n");
380				continue;
381			}
382			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
383			rctl_rule_to_sbuf(&sb, rule);
384			sbuf_finish(&sb);
385			printf("rctl: rule \"%s\" matched by pid %d "
386			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
387			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
388			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
389			sbuf_delete(&sb);
390			free(buf, M_RCTL);
391			link->rrl_exceeded = 1;
392			continue;
393		case RCTL_ACTION_DEVCTL:
394			if (link->rrl_exceeded != 0)
395				continue;
396
397			if (p->p_state != PRS_NORMAL)
398				continue;
399
400			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
401			if (buf == NULL) {
402				printf("rctl_enforce: out of memory\n");
403				continue;
404			}
405			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
406			sbuf_printf(&sb, "rule=");
407			rctl_rule_to_sbuf(&sb, rule);
408			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
409			    p->p_pid, p->p_ucred->cr_ruid,
410			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
411			sbuf_finish(&sb);
412			devctl_notify_f("RCTL", "rule", "matched",
413			    sbuf_data(&sb), M_NOWAIT);
414			sbuf_delete(&sb);
415			free(buf, M_RCTL);
416			link->rrl_exceeded = 1;
417			continue;
418		default:
419			if (link->rrl_exceeded != 0)
420				continue;
421
422			if (p->p_state != PRS_NORMAL)
423				continue;
424
425			KASSERT(rule->rr_action > 0 &&
426			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
427			    ("rctl_enforce: unknown action %d",
428			     rule->rr_action));
429
430			/*
431			 * We're using the fact that RCTL_ACTION_SIG* values
432			 * are equal to their counterparts from sys/signal.h.
433			 */
434			kern_psignal(p, rule->rr_action);
435			link->rrl_exceeded = 1;
436			continue;
437		}
438	}
439
440	rw_runlock(&rctl_lock);
441
442	if (should_deny) {
443		/*
444		 * Return fake error code; the caller should change it
445		 * into one proper for the situation - EFSIZ, ENOMEM etc.
446		 */
447		return (EDOOFUS);
448	}
449
450	return (0);
451}
452
453uint64_t
454rctl_get_limit(struct proc *p, int resource)
455{
456	struct rctl_rule *rule;
457	struct rctl_rule_link *link;
458	uint64_t amount = UINT64_MAX;
459
460	rw_rlock(&rctl_lock);
461
462	/*
463	 * There may be more than one matching rule; go through all of them.
464	 * Denial should be done last, after logging and sending signals.
465	 */
466	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
467		rule = link->rrl_rule;
468		if (rule->rr_resource != resource)
469			continue;
470		if (rule->rr_action != RCTL_ACTION_DENY)
471			continue;
472		if (rule->rr_amount < amount)
473			amount = rule->rr_amount;
474	}
475
476	rw_runlock(&rctl_lock);
477
478	return (amount);
479}
480
481uint64_t
482rctl_get_available(struct proc *p, int resource)
483{
484	struct rctl_rule *rule;
485	struct rctl_rule_link *link;
486	int64_t available, minavailable, allocated;
487
488	minavailable = INT64_MAX;
489
490	rw_rlock(&rctl_lock);
491
492	/*
493	 * There may be more than one matching rule; go through all of them.
494	 * Denial should be done last, after logging and sending signals.
495	 */
496	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
497		rule = link->rrl_rule;
498		if (rule->rr_resource != resource)
499			continue;
500		if (rule->rr_action != RCTL_ACTION_DENY)
501			continue;
502		available = rctl_available_resource(p, rule);
503		if (available < minavailable)
504			minavailable = available;
505	}
506
507	rw_runlock(&rctl_lock);
508
509	/*
510	 * XXX: Think about this _hard_.
511	 */
512	allocated = p->p_racct->r_resources[resource];
513	if (minavailable < INT64_MAX - allocated)
514		minavailable += allocated;
515	if (minavailable < 0)
516		minavailable = 0;
517	return (minavailable);
518}
519
520static int
521rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
522{
523
524	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
525		if (rule->rr_subject_type != filter->rr_subject_type)
526			return (0);
527
528		switch (filter->rr_subject_type) {
529		case RCTL_SUBJECT_TYPE_PROCESS:
530			if (filter->rr_subject.rs_proc != NULL &&
531			    rule->rr_subject.rs_proc !=
532			    filter->rr_subject.rs_proc)
533				return (0);
534			break;
535		case RCTL_SUBJECT_TYPE_USER:
536			if (filter->rr_subject.rs_uip != NULL &&
537			    rule->rr_subject.rs_uip !=
538			    filter->rr_subject.rs_uip)
539				return (0);
540			break;
541		case RCTL_SUBJECT_TYPE_LOGINCLASS:
542			if (filter->rr_subject.rs_loginclass != NULL &&
543			    rule->rr_subject.rs_loginclass !=
544			    filter->rr_subject.rs_loginclass)
545				return (0);
546			break;
547		case RCTL_SUBJECT_TYPE_JAIL:
548			if (filter->rr_subject.rs_prison_racct != NULL &&
549			    rule->rr_subject.rs_prison_racct !=
550			    filter->rr_subject.rs_prison_racct)
551				return (0);
552			break;
553		default:
554			panic("rctl_rule_matches: unknown subject type %d",
555			    filter->rr_subject_type);
556		}
557	}
558
559	if (filter->rr_resource != RACCT_UNDEFINED) {
560		if (rule->rr_resource != filter->rr_resource)
561			return (0);
562	}
563
564	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
565		if (rule->rr_action != filter->rr_action)
566			return (0);
567	}
568
569	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
570		if (rule->rr_amount != filter->rr_amount)
571			return (0);
572	}
573
574	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
575		if (rule->rr_per != filter->rr_per)
576			return (0);
577	}
578
579	return (1);
580}
581
582static int
583str2value(const char *str, int *value, struct dict *table)
584{
585	int i;
586
587	if (value == NULL)
588		return (EINVAL);
589
590	for (i = 0; table[i].d_name != NULL; i++) {
591		if (strcasecmp(table[i].d_name, str) == 0) {
592			*value =  table[i].d_value;
593			return (0);
594		}
595	}
596
597	return (EINVAL);
598}
599
600static int
601str2id(const char *str, id_t *value)
602{
603	char *end;
604
605	if (str == NULL)
606		return (EINVAL);
607
608	*value = strtoul(str, &end, 10);
609	if ((size_t)(end - str) != strlen(str))
610		return (EINVAL);
611
612	return (0);
613}
614
615static int
616str2int64(const char *str, int64_t *value)
617{
618	char *end;
619
620	if (str == NULL)
621		return (EINVAL);
622
623	*value = strtoul(str, &end, 10);
624	if ((size_t)(end - str) != strlen(str))
625		return (EINVAL);
626
627	return (0);
628}
629
630/*
631 * Connect the rule to the racct, increasing refcount for the rule.
632 */
633static void
634rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
635{
636	struct rctl_rule_link *link;
637
638	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
639
640	rctl_rule_acquire(rule);
641	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
642	link->rrl_rule = rule;
643	link->rrl_exceeded = 0;
644
645	rw_wlock(&rctl_lock);
646	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
647	rw_wunlock(&rctl_lock);
648}
649
650static int
651rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
652{
653	struct rctl_rule_link *link;
654
655	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
656	rw_assert(&rctl_lock, RA_WLOCKED);
657
658	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
659	if (link == NULL)
660		return (ENOMEM);
661	rctl_rule_acquire(rule);
662	link->rrl_rule = rule;
663	link->rrl_exceeded = 0;
664
665	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
666	return (0);
667}
668
669/*
670 * Remove limits for a rules matching the filter and release
671 * the refcounts for the rules, possibly freeing them.  Returns
672 * the number of limit structures removed.
673 */
674static int
675rctl_racct_remove_rules(struct racct *racct,
676    const struct rctl_rule *filter)
677{
678	int removed = 0;
679	struct rctl_rule_link *link, *linktmp;
680
681	rw_assert(&rctl_lock, RA_WLOCKED);
682
683	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
684		if (!rctl_rule_matches(link->rrl_rule, filter))
685			continue;
686
687		LIST_REMOVE(link, rrl_next);
688		rctl_rule_release(link->rrl_rule);
689		uma_zfree(rctl_rule_link_zone, link);
690		removed++;
691	}
692	return (removed);
693}
694
695static void
696rctl_rule_acquire_subject(struct rctl_rule *rule)
697{
698
699	switch (rule->rr_subject_type) {
700	case RCTL_SUBJECT_TYPE_UNDEFINED:
701	case RCTL_SUBJECT_TYPE_PROCESS:
702		break;
703	case RCTL_SUBJECT_TYPE_JAIL:
704		if (rule->rr_subject.rs_prison_racct != NULL)
705			prison_racct_hold(rule->rr_subject.rs_prison_racct);
706		break;
707	case RCTL_SUBJECT_TYPE_USER:
708		if (rule->rr_subject.rs_uip != NULL)
709			uihold(rule->rr_subject.rs_uip);
710		break;
711	case RCTL_SUBJECT_TYPE_LOGINCLASS:
712		if (rule->rr_subject.rs_loginclass != NULL)
713			loginclass_hold(rule->rr_subject.rs_loginclass);
714		break;
715	default:
716		panic("rctl_rule_acquire_subject: unknown subject type %d",
717		    rule->rr_subject_type);
718	}
719}
720
721static void
722rctl_rule_release_subject(struct rctl_rule *rule)
723{
724
725	switch (rule->rr_subject_type) {
726	case RCTL_SUBJECT_TYPE_UNDEFINED:
727	case RCTL_SUBJECT_TYPE_PROCESS:
728		break;
729	case RCTL_SUBJECT_TYPE_JAIL:
730		if (rule->rr_subject.rs_prison_racct != NULL)
731			prison_racct_free(rule->rr_subject.rs_prison_racct);
732		break;
733	case RCTL_SUBJECT_TYPE_USER:
734		if (rule->rr_subject.rs_uip != NULL)
735			uifree(rule->rr_subject.rs_uip);
736		break;
737	case RCTL_SUBJECT_TYPE_LOGINCLASS:
738		if (rule->rr_subject.rs_loginclass != NULL)
739			loginclass_free(rule->rr_subject.rs_loginclass);
740		break;
741	default:
742		panic("rctl_rule_release_subject: unknown subject type %d",
743		    rule->rr_subject_type);
744	}
745}
746
747struct rctl_rule *
748rctl_rule_alloc(int flags)
749{
750	struct rctl_rule *rule;
751
752	rule = uma_zalloc(rctl_rule_zone, flags);
753	if (rule == NULL)
754		return (NULL);
755	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
756	rule->rr_subject.rs_proc = NULL;
757	rule->rr_subject.rs_uip = NULL;
758	rule->rr_subject.rs_loginclass = NULL;
759	rule->rr_subject.rs_prison_racct = NULL;
760	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
761	rule->rr_resource = RACCT_UNDEFINED;
762	rule->rr_action = RCTL_ACTION_UNDEFINED;
763	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
764	refcount_init(&rule->rr_refcount, 1);
765
766	return (rule);
767}
768
769struct rctl_rule *
770rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
771{
772	struct rctl_rule *copy;
773
774	copy = uma_zalloc(rctl_rule_zone, flags);
775	if (copy == NULL)
776		return (NULL);
777	copy->rr_subject_type = rule->rr_subject_type;
778	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
779	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
780	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
781	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
782	copy->rr_per = rule->rr_per;
783	copy->rr_resource = rule->rr_resource;
784	copy->rr_action = rule->rr_action;
785	copy->rr_amount = rule->rr_amount;
786	refcount_init(&copy->rr_refcount, 1);
787	rctl_rule_acquire_subject(copy);
788
789	return (copy);
790}
791
792void
793rctl_rule_acquire(struct rctl_rule *rule)
794{
795
796	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
797
798	refcount_acquire(&rule->rr_refcount);
799}
800
801static void
802rctl_rule_free(void *context, int pending)
803{
804	struct rctl_rule *rule;
805
806	rule = (struct rctl_rule *)context;
807
808	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
809
810	/*
811	 * We don't need locking here; rule is guaranteed to be inaccessible.
812	 */
813
814	rctl_rule_release_subject(rule);
815	uma_zfree(rctl_rule_zone, rule);
816}
817
818void
819rctl_rule_release(struct rctl_rule *rule)
820{
821
822	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
823
824	if (refcount_release(&rule->rr_refcount)) {
825		/*
826		 * rctl_rule_release() is often called when iterating
827		 * over all the uidinfo structures in the system,
828		 * holding uihashtbl_lock.  Since rctl_rule_free()
829		 * might end up calling uifree(), this would lead
830		 * to lock recursion.  Use taskqueue to avoid this.
831		 */
832		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
833		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
834	}
835}
836
837static int
838rctl_rule_fully_specified(const struct rctl_rule *rule)
839{
840
841	switch (rule->rr_subject_type) {
842	case RCTL_SUBJECT_TYPE_UNDEFINED:
843		return (0);
844	case RCTL_SUBJECT_TYPE_PROCESS:
845		if (rule->rr_subject.rs_proc == NULL)
846			return (0);
847		break;
848	case RCTL_SUBJECT_TYPE_USER:
849		if (rule->rr_subject.rs_uip == NULL)
850			return (0);
851		break;
852	case RCTL_SUBJECT_TYPE_LOGINCLASS:
853		if (rule->rr_subject.rs_loginclass == NULL)
854			return (0);
855		break;
856	case RCTL_SUBJECT_TYPE_JAIL:
857		if (rule->rr_subject.rs_prison_racct == NULL)
858			return (0);
859		break;
860	default:
861		panic("rctl_rule_fully_specified: unknown subject type %d",
862		    rule->rr_subject_type);
863	}
864	if (rule->rr_resource == RACCT_UNDEFINED)
865		return (0);
866	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
867		return (0);
868	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
869		return (0);
870	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
871		return (0);
872
873	return (1);
874}
875
876static int
877rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
878{
879	int error = 0;
880	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
881	     *amountstr, *perstr;
882	struct rctl_rule *rule;
883	id_t id;
884
885	rule = rctl_rule_alloc(M_WAITOK);
886
887	subjectstr = strsep(&rulestr, ":");
888	subject_idstr = strsep(&rulestr, ":");
889	resourcestr = strsep(&rulestr, ":");
890	actionstr = strsep(&rulestr, "=/");
891	amountstr = strsep(&rulestr, "/");
892	perstr = rulestr;
893
894	if (subjectstr == NULL || subjectstr[0] == '\0')
895		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
896	else {
897		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
898		if (error != 0)
899			goto out;
900	}
901
902	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
903		rule->rr_subject.rs_proc = NULL;
904		rule->rr_subject.rs_uip = NULL;
905		rule->rr_subject.rs_loginclass = NULL;
906		rule->rr_subject.rs_prison_racct = NULL;
907	} else {
908		switch (rule->rr_subject_type) {
909		case RCTL_SUBJECT_TYPE_UNDEFINED:
910			error = EINVAL;
911			goto out;
912		case RCTL_SUBJECT_TYPE_PROCESS:
913			error = str2id(subject_idstr, &id);
914			if (error != 0)
915				goto out;
916			sx_assert(&allproc_lock, SA_LOCKED);
917			rule->rr_subject.rs_proc = pfind(id);
918			if (rule->rr_subject.rs_proc == NULL) {
919				error = ESRCH;
920				goto out;
921			}
922			PROC_UNLOCK(rule->rr_subject.rs_proc);
923			break;
924		case RCTL_SUBJECT_TYPE_USER:
925			error = str2id(subject_idstr, &id);
926			if (error != 0)
927				goto out;
928			rule->rr_subject.rs_uip = uifind(id);
929			break;
930		case RCTL_SUBJECT_TYPE_LOGINCLASS:
931			rule->rr_subject.rs_loginclass =
932			    loginclass_find(subject_idstr);
933			if (rule->rr_subject.rs_loginclass == NULL) {
934				error = ENAMETOOLONG;
935				goto out;
936			}
937			break;
938		case RCTL_SUBJECT_TYPE_JAIL:
939			rule->rr_subject.rs_prison_racct =
940			    prison_racct_find(subject_idstr);
941			if (rule->rr_subject.rs_prison_racct == NULL) {
942				error = ENAMETOOLONG;
943				goto out;
944			}
945			break;
946               default:
947                       panic("rctl_string_to_rule: unknown subject type %d",
948                           rule->rr_subject_type);
949               }
950	}
951
952	if (resourcestr == NULL || resourcestr[0] == '\0')
953		rule->rr_resource = RACCT_UNDEFINED;
954	else {
955		error = str2value(resourcestr, &rule->rr_resource,
956		    resourcenames);
957		if (error != 0)
958			goto out;
959	}
960
961	if (actionstr == NULL || actionstr[0] == '\0')
962		rule->rr_action = RCTL_ACTION_UNDEFINED;
963	else {
964		error = str2value(actionstr, &rule->rr_action, actionnames);
965		if (error != 0)
966			goto out;
967	}
968
969	if (amountstr == NULL || amountstr[0] == '\0')
970		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
971	else {
972		error = str2int64(amountstr, &rule->rr_amount);
973		if (error != 0)
974			goto out;
975		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
976			rule->rr_amount *= 1000000;
977	}
978
979	if (perstr == NULL || perstr[0] == '\0')
980		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
981	else {
982		error = str2value(perstr, &rule->rr_per, subjectnames);
983		if (error != 0)
984			goto out;
985	}
986
987out:
988	if (error == 0)
989		*rulep = rule;
990	else
991		rctl_rule_release(rule);
992
993	return (error);
994}
995
996/*
997 * Link a rule with all the subjects it applies to.
998 */
999int
1000rctl_rule_add(struct rctl_rule *rule)
1001{
1002	struct proc *p;
1003	struct ucred *cred;
1004	struct uidinfo *uip;
1005	struct prison *pr;
1006	struct prison_racct *prr;
1007	struct loginclass *lc;
1008	struct rctl_rule *rule2;
1009	int match;
1010
1011	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1012
1013	/*
1014	 * Some rules just don't make sense.  Note that the one below
1015	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
1016	 * for example, is not deniable in the racct sense, but the
1017	 * limit is enforced in a different way, so "deny" rules for %CPU
1018	 * do make sense.
1019	 */
1020	if (rule->rr_action == RCTL_ACTION_DENY &&
1021	    (rule->rr_resource == RACCT_CPU ||
1022	    rule->rr_resource == RACCT_WALLCLOCK))
1023		return (EOPNOTSUPP);
1024
1025	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1026	    RACCT_IS_SLOPPY(rule->rr_resource))
1027		return (EOPNOTSUPP);
1028
1029	/*
1030	 * Make sure there are no duplicated rules.  Also, for the "deny"
1031	 * rules, remove ones differing only by "amount".
1032	 */
1033	if (rule->rr_action == RCTL_ACTION_DENY) {
1034		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1035		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1036		rctl_rule_remove(rule2);
1037		rctl_rule_release(rule2);
1038	} else
1039		rctl_rule_remove(rule);
1040
1041	switch (rule->rr_subject_type) {
1042	case RCTL_SUBJECT_TYPE_PROCESS:
1043		p = rule->rr_subject.rs_proc;
1044		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1045
1046		rctl_racct_add_rule(p->p_racct, rule);
1047		/*
1048		 * In case of per-process rule, we don't have anything more
1049		 * to do.
1050		 */
1051		return (0);
1052
1053	case RCTL_SUBJECT_TYPE_USER:
1054		uip = rule->rr_subject.rs_uip;
1055		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1056		rctl_racct_add_rule(uip->ui_racct, rule);
1057		break;
1058
1059	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1060		lc = rule->rr_subject.rs_loginclass;
1061		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1062		rctl_racct_add_rule(lc->lc_racct, rule);
1063		break;
1064
1065	case RCTL_SUBJECT_TYPE_JAIL:
1066		prr = rule->rr_subject.rs_prison_racct;
1067		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1068		rctl_racct_add_rule(prr->prr_racct, rule);
1069		break;
1070
1071	default:
1072		panic("rctl_rule_add: unknown subject type %d",
1073		    rule->rr_subject_type);
1074	}
1075
1076	/*
1077	 * Now go through all the processes and add the new rule to the ones
1078	 * it applies to.
1079	 */
1080	sx_assert(&allproc_lock, SA_LOCKED);
1081	FOREACH_PROC_IN_SYSTEM(p) {
1082		cred = p->p_ucred;
1083		switch (rule->rr_subject_type) {
1084		case RCTL_SUBJECT_TYPE_USER:
1085			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1086			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1087				break;
1088			continue;
1089		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1090			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1091				break;
1092			continue;
1093		case RCTL_SUBJECT_TYPE_JAIL:
1094			match = 0;
1095			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1096				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1097					match = 1;
1098					break;
1099				}
1100			}
1101			if (match)
1102				break;
1103			continue;
1104		default:
1105			panic("rctl_rule_add: unknown subject type %d",
1106			    rule->rr_subject_type);
1107		}
1108
1109		rctl_racct_add_rule(p->p_racct, rule);
1110	}
1111
1112	return (0);
1113}
1114
1115static void
1116rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1117{
1118	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1119	int found = 0;
1120
1121	rw_wlock(&rctl_lock);
1122	found += rctl_racct_remove_rules(racct, filter);
1123	rw_wunlock(&rctl_lock);
1124
1125	*((int *)arg3) += found;
1126}
1127
1128/*
1129 * Remove all rules that match the filter.
1130 */
1131int
1132rctl_rule_remove(struct rctl_rule *filter)
1133{
1134	int found = 0;
1135	struct proc *p;
1136
1137	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1138	    filter->rr_subject.rs_proc != NULL) {
1139		p = filter->rr_subject.rs_proc;
1140		rw_wlock(&rctl_lock);
1141		found = rctl_racct_remove_rules(p->p_racct, filter);
1142		rw_wunlock(&rctl_lock);
1143		if (found)
1144			return (0);
1145		return (ESRCH);
1146	}
1147
1148	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1149	    (void *)&found);
1150	ui_racct_foreach(rctl_rule_remove_callback, filter,
1151	    (void *)&found);
1152	prison_racct_foreach(rctl_rule_remove_callback, filter,
1153	    (void *)&found);
1154
1155	sx_assert(&allproc_lock, SA_LOCKED);
1156	rw_wlock(&rctl_lock);
1157	FOREACH_PROC_IN_SYSTEM(p) {
1158		found += rctl_racct_remove_rules(p->p_racct, filter);
1159	}
1160	rw_wunlock(&rctl_lock);
1161
1162	if (found)
1163		return (0);
1164	return (ESRCH);
1165}
1166
1167/*
1168 * Appends a rule to the sbuf.
1169 */
1170static void
1171rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1172{
1173	int64_t amount;
1174
1175	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1176
1177	switch (rule->rr_subject_type) {
1178	case RCTL_SUBJECT_TYPE_PROCESS:
1179		if (rule->rr_subject.rs_proc == NULL)
1180			sbuf_printf(sb, ":");
1181		else
1182			sbuf_printf(sb, "%d:",
1183			    rule->rr_subject.rs_proc->p_pid);
1184		break;
1185	case RCTL_SUBJECT_TYPE_USER:
1186		if (rule->rr_subject.rs_uip == NULL)
1187			sbuf_printf(sb, ":");
1188		else
1189			sbuf_printf(sb, "%d:",
1190			    rule->rr_subject.rs_uip->ui_uid);
1191		break;
1192	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1193		if (rule->rr_subject.rs_loginclass == NULL)
1194			sbuf_printf(sb, ":");
1195		else
1196			sbuf_printf(sb, "%s:",
1197			    rule->rr_subject.rs_loginclass->lc_name);
1198		break;
1199	case RCTL_SUBJECT_TYPE_JAIL:
1200		if (rule->rr_subject.rs_prison_racct == NULL)
1201			sbuf_printf(sb, ":");
1202		else
1203			sbuf_printf(sb, "%s:",
1204			    rule->rr_subject.rs_prison_racct->prr_name);
1205		break;
1206	default:
1207		panic("rctl_rule_to_sbuf: unknown subject type %d",
1208		    rule->rr_subject_type);
1209	}
1210
1211	amount = rule->rr_amount;
1212	if (amount != RCTL_AMOUNT_UNDEFINED &&
1213	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1214		amount /= 1000000;
1215
1216	sbuf_printf(sb, "%s:%s=%jd",
1217	    rctl_resource_name(rule->rr_resource),
1218	    rctl_action_name(rule->rr_action),
1219	    amount);
1220
1221	if (rule->rr_per != rule->rr_subject_type)
1222		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1223}
1224
1225/*
1226 * Routine used by RCTL syscalls to read in input string.
1227 */
1228static int
1229rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1230{
1231	int error;
1232	char *str;
1233
1234	if (inbuflen <= 0)
1235		return (EINVAL);
1236	if (inbuflen > RCTL_MAX_INBUFLEN)
1237		return (E2BIG);
1238
1239	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1240	error = copyinstr(inbufp, str, inbuflen, NULL);
1241	if (error != 0) {
1242		free(str, M_RCTL);
1243		return (error);
1244	}
1245
1246	*inputstr = str;
1247
1248	return (0);
1249}
1250
1251/*
1252 * Routine used by RCTL syscalls to write out output string.
1253 */
1254static int
1255rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1256{
1257	int error;
1258
1259	if (outputsbuf == NULL)
1260		return (0);
1261
1262	sbuf_finish(outputsbuf);
1263	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1264		sbuf_delete(outputsbuf);
1265		return (ERANGE);
1266	}
1267	error = copyout(sbuf_data(outputsbuf), outbufp,
1268	    sbuf_len(outputsbuf) + 1);
1269	sbuf_delete(outputsbuf);
1270	return (error);
1271}
1272
1273static struct sbuf *
1274rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1275{
1276	int i;
1277	int64_t amount;
1278	struct sbuf *sb;
1279
1280	sb = sbuf_new_auto();
1281	for (i = 0; i <= RACCT_MAX; i++) {
1282		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1283			continue;
1284		amount = racct->r_resources[i];
1285		if (RACCT_IS_IN_MILLIONS(i))
1286			amount /= 1000000;
1287		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1288	}
1289	sbuf_setpos(sb, sbuf_len(sb) - 1);
1290	return (sb);
1291}
1292
1293int
1294sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1295{
1296	int error;
1297	char *inputstr;
1298	struct rctl_rule *filter;
1299	struct sbuf *outputsbuf = NULL;
1300	struct proc *p;
1301	struct uidinfo *uip;
1302	struct loginclass *lc;
1303	struct prison_racct *prr;
1304
1305	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1306	if (error != 0)
1307		return (error);
1308
1309	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1310	if (error != 0)
1311		return (error);
1312
1313	sx_slock(&allproc_lock);
1314	error = rctl_string_to_rule(inputstr, &filter);
1315	free(inputstr, M_RCTL);
1316	if (error != 0) {
1317		sx_sunlock(&allproc_lock);
1318		return (error);
1319	}
1320
1321	switch (filter->rr_subject_type) {
1322	case RCTL_SUBJECT_TYPE_PROCESS:
1323		p = filter->rr_subject.rs_proc;
1324		if (p == NULL) {
1325			error = EINVAL;
1326			goto out;
1327		}
1328		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1329		break;
1330	case RCTL_SUBJECT_TYPE_USER:
1331		uip = filter->rr_subject.rs_uip;
1332		if (uip == NULL) {
1333			error = EINVAL;
1334			goto out;
1335		}
1336		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1337		break;
1338	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1339		lc = filter->rr_subject.rs_loginclass;
1340		if (lc == NULL) {
1341			error = EINVAL;
1342			goto out;
1343		}
1344		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1345		break;
1346	case RCTL_SUBJECT_TYPE_JAIL:
1347		prr = filter->rr_subject.rs_prison_racct;
1348		if (prr == NULL) {
1349			error = EINVAL;
1350			goto out;
1351		}
1352		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1353		break;
1354	default:
1355		error = EINVAL;
1356	}
1357out:
1358	rctl_rule_release(filter);
1359	sx_sunlock(&allproc_lock);
1360	if (error != 0)
1361		return (error);
1362
1363	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1364
1365	return (error);
1366}
1367
1368static void
1369rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1370{
1371	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1372	struct rctl_rule_link *link;
1373	struct sbuf *sb = (struct sbuf *)arg3;
1374
1375	rw_rlock(&rctl_lock);
1376	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1377		if (!rctl_rule_matches(link->rrl_rule, filter))
1378			continue;
1379		rctl_rule_to_sbuf(sb, link->rrl_rule);
1380		sbuf_printf(sb, ",");
1381	}
1382	rw_runlock(&rctl_lock);
1383}
1384
1385int
1386sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1387{
1388	int error;
1389	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1390	char *inputstr, *buf;
1391	struct sbuf *sb;
1392	struct rctl_rule *filter;
1393	struct rctl_rule_link *link;
1394	struct proc *p;
1395
1396	error = priv_check(td, PRIV_RCTL_GET_RULES);
1397	if (error != 0)
1398		return (error);
1399
1400	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1401	if (error != 0)
1402		return (error);
1403
1404	sx_slock(&allproc_lock);
1405	error = rctl_string_to_rule(inputstr, &filter);
1406	free(inputstr, M_RCTL);
1407	if (error != 0) {
1408		sx_sunlock(&allproc_lock);
1409		return (error);
1410	}
1411
1412again:
1413	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1414	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1415	KASSERT(sb != NULL, ("sbuf_new failed"));
1416
1417	sx_assert(&allproc_lock, SA_LOCKED);
1418	FOREACH_PROC_IN_SYSTEM(p) {
1419		rw_rlock(&rctl_lock);
1420		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1421			/*
1422			 * Non-process rules will be added to the buffer later.
1423			 * Adding them here would result in duplicated output.
1424			 */
1425			if (link->rrl_rule->rr_subject_type !=
1426			    RCTL_SUBJECT_TYPE_PROCESS)
1427				continue;
1428			if (!rctl_rule_matches(link->rrl_rule, filter))
1429				continue;
1430			rctl_rule_to_sbuf(sb, link->rrl_rule);
1431			sbuf_printf(sb, ",");
1432		}
1433		rw_runlock(&rctl_lock);
1434	}
1435
1436	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1437	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1438	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1439	if (sbuf_error(sb) == ENOMEM) {
1440		sbuf_delete(sb);
1441		free(buf, M_RCTL);
1442		bufsize *= 4;
1443		goto again;
1444	}
1445
1446	/*
1447	 * Remove trailing ",".
1448	 */
1449	if (sbuf_len(sb) > 0)
1450		sbuf_setpos(sb, sbuf_len(sb) - 1);
1451
1452	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1453
1454	rctl_rule_release(filter);
1455	sx_sunlock(&allproc_lock);
1456	free(buf, M_RCTL);
1457	return (error);
1458}
1459
1460int
1461sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1462{
1463	int error;
1464	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1465	char *inputstr, *buf;
1466	struct sbuf *sb;
1467	struct rctl_rule *filter;
1468	struct rctl_rule_link *link;
1469
1470	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1471	if (error != 0)
1472		return (error);
1473
1474	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1475	if (error != 0)
1476		return (error);
1477
1478	sx_slock(&allproc_lock);
1479	error = rctl_string_to_rule(inputstr, &filter);
1480	free(inputstr, M_RCTL);
1481	if (error != 0) {
1482		sx_sunlock(&allproc_lock);
1483		return (error);
1484	}
1485
1486	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1487		rctl_rule_release(filter);
1488		sx_sunlock(&allproc_lock);
1489		return (EINVAL);
1490	}
1491	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1492		rctl_rule_release(filter);
1493		sx_sunlock(&allproc_lock);
1494		return (EOPNOTSUPP);
1495	}
1496	if (filter->rr_subject.rs_proc == NULL) {
1497		rctl_rule_release(filter);
1498		sx_sunlock(&allproc_lock);
1499		return (EINVAL);
1500	}
1501
1502again:
1503	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1504	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1505	KASSERT(sb != NULL, ("sbuf_new failed"));
1506
1507	rw_rlock(&rctl_lock);
1508	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1509	    rrl_next) {
1510		rctl_rule_to_sbuf(sb, link->rrl_rule);
1511		sbuf_printf(sb, ",");
1512	}
1513	rw_runlock(&rctl_lock);
1514	if (sbuf_error(sb) == ENOMEM) {
1515		sbuf_delete(sb);
1516		free(buf, M_RCTL);
1517		bufsize *= 4;
1518		goto again;
1519	}
1520
1521	/*
1522	 * Remove trailing ",".
1523	 */
1524	if (sbuf_len(sb) > 0)
1525		sbuf_setpos(sb, sbuf_len(sb) - 1);
1526
1527	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1528	rctl_rule_release(filter);
1529	sx_sunlock(&allproc_lock);
1530	free(buf, M_RCTL);
1531	return (error);
1532}
1533
1534int
1535sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1536{
1537	int error;
1538	struct rctl_rule *rule;
1539	char *inputstr;
1540
1541	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1542	if (error != 0)
1543		return (error);
1544
1545	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1546	if (error != 0)
1547		return (error);
1548
1549	sx_slock(&allproc_lock);
1550	error = rctl_string_to_rule(inputstr, &rule);
1551	free(inputstr, M_RCTL);
1552	if (error != 0) {
1553		sx_sunlock(&allproc_lock);
1554		return (error);
1555	}
1556	/*
1557	 * The 'per' part of a rule is optional.
1558	 */
1559	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1560	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1561		rule->rr_per = rule->rr_subject_type;
1562
1563	if (!rctl_rule_fully_specified(rule)) {
1564		error = EINVAL;
1565		goto out;
1566	}
1567
1568	error = rctl_rule_add(rule);
1569
1570out:
1571	rctl_rule_release(rule);
1572	sx_sunlock(&allproc_lock);
1573	return (error);
1574}
1575
1576int
1577sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1578{
1579	int error;
1580	struct rctl_rule *filter;
1581	char *inputstr;
1582
1583	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1584	if (error != 0)
1585		return (error);
1586
1587	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1588	if (error != 0)
1589		return (error);
1590
1591	sx_slock(&allproc_lock);
1592	error = rctl_string_to_rule(inputstr, &filter);
1593	free(inputstr, M_RCTL);
1594	if (error != 0) {
1595		sx_sunlock(&allproc_lock);
1596		return (error);
1597	}
1598
1599	error = rctl_rule_remove(filter);
1600	rctl_rule_release(filter);
1601	sx_sunlock(&allproc_lock);
1602
1603	return (error);
1604}
1605
1606/*
1607 * Update RCTL rule list after credential change.
1608 */
1609void
1610rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1611{
1612	int rulecnt, i;
1613	struct rctl_rule_link *link, *newlink;
1614	struct uidinfo *newuip;
1615	struct loginclass *newlc;
1616	struct prison_racct *newprr;
1617	LIST_HEAD(, rctl_rule_link) newrules;
1618
1619	newuip = newcred->cr_ruidinfo;
1620	newlc = newcred->cr_loginclass;
1621	newprr = newcred->cr_prison->pr_prison_racct;
1622
1623	LIST_INIT(&newrules);
1624
1625again:
1626	/*
1627	 * First, count the rules that apply to the process with new
1628	 * credentials.
1629	 */
1630	rulecnt = 0;
1631	rw_rlock(&rctl_lock);
1632	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1633		if (link->rrl_rule->rr_subject_type ==
1634		    RCTL_SUBJECT_TYPE_PROCESS)
1635			rulecnt++;
1636	}
1637	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1638		rulecnt++;
1639	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1640		rulecnt++;
1641	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1642		rulecnt++;
1643	rw_runlock(&rctl_lock);
1644
1645	/*
1646	 * Create temporary list.  We've dropped the rctl_lock in order
1647	 * to use M_WAITOK.
1648	 */
1649	for (i = 0; i < rulecnt; i++) {
1650		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1651		newlink->rrl_rule = NULL;
1652		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1653	}
1654
1655	newlink = LIST_FIRST(&newrules);
1656
1657	/*
1658	 * Assign rules to the newly allocated list entries.
1659	 */
1660	rw_wlock(&rctl_lock);
1661	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1662		if (link->rrl_rule->rr_subject_type ==
1663		    RCTL_SUBJECT_TYPE_PROCESS) {
1664			if (newlink == NULL)
1665				goto goaround;
1666			rctl_rule_acquire(link->rrl_rule);
1667			newlink->rrl_rule = link->rrl_rule;
1668			newlink = LIST_NEXT(newlink, rrl_next);
1669			rulecnt--;
1670		}
1671	}
1672
1673	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1674		if (newlink == NULL)
1675			goto goaround;
1676		rctl_rule_acquire(link->rrl_rule);
1677		newlink->rrl_rule = link->rrl_rule;
1678		newlink = LIST_NEXT(newlink, rrl_next);
1679		rulecnt--;
1680	}
1681
1682	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1683		if (newlink == NULL)
1684			goto goaround;
1685		rctl_rule_acquire(link->rrl_rule);
1686		newlink->rrl_rule = link->rrl_rule;
1687		newlink = LIST_NEXT(newlink, rrl_next);
1688		rulecnt--;
1689	}
1690
1691	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1692		if (newlink == NULL)
1693			goto goaround;
1694		rctl_rule_acquire(link->rrl_rule);
1695		newlink->rrl_rule = link->rrl_rule;
1696		newlink = LIST_NEXT(newlink, rrl_next);
1697		rulecnt--;
1698	}
1699
1700	if (rulecnt == 0) {
1701		/*
1702		 * Free the old rule list.
1703		 */
1704		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1705			link = LIST_FIRST(&p->p_racct->r_rule_links);
1706			LIST_REMOVE(link, rrl_next);
1707			rctl_rule_release(link->rrl_rule);
1708			uma_zfree(rctl_rule_link_zone, link);
1709		}
1710
1711		/*
1712		 * Replace lists and we're done.
1713		 *
1714		 * XXX: Is there any way to switch list heads instead
1715		 *      of iterating here?
1716		 */
1717		while (!LIST_EMPTY(&newrules)) {
1718			newlink = LIST_FIRST(&newrules);
1719			LIST_REMOVE(newlink, rrl_next);
1720			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1721			    newlink, rrl_next);
1722		}
1723
1724		rw_wunlock(&rctl_lock);
1725
1726		return;
1727	}
1728
1729goaround:
1730	rw_wunlock(&rctl_lock);
1731
1732	/*
1733	 * Rule list changed while we were not holding the rctl_lock.
1734	 * Free the new list and try again.
1735	 */
1736	while (!LIST_EMPTY(&newrules)) {
1737		newlink = LIST_FIRST(&newrules);
1738		LIST_REMOVE(newlink, rrl_next);
1739		if (newlink->rrl_rule != NULL)
1740			rctl_rule_release(newlink->rrl_rule);
1741		uma_zfree(rctl_rule_link_zone, newlink);
1742	}
1743
1744	goto again;
1745}
1746
1747/*
1748 * Assign RCTL rules to the newly created process.
1749 */
1750int
1751rctl_proc_fork(struct proc *parent, struct proc *child)
1752{
1753	int error;
1754	struct rctl_rule_link *link;
1755	struct rctl_rule *rule;
1756
1757	LIST_INIT(&child->p_racct->r_rule_links);
1758
1759	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1760
1761	rw_wlock(&rctl_lock);
1762
1763	/*
1764	 * Go through limits applicable to the parent and assign them
1765	 * to the child.  Rules with 'process' subject have to be duplicated
1766	 * in order to make their rr_subject point to the new process.
1767	 */
1768	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1769		if (link->rrl_rule->rr_subject_type ==
1770		    RCTL_SUBJECT_TYPE_PROCESS) {
1771			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1772			if (rule == NULL)
1773				goto fail;
1774			KASSERT(rule->rr_subject.rs_proc == parent,
1775			    ("rule->rr_subject.rs_proc != parent"));
1776			rule->rr_subject.rs_proc = child;
1777			error = rctl_racct_add_rule_locked(child->p_racct,
1778			    rule);
1779			rctl_rule_release(rule);
1780			if (error != 0)
1781				goto fail;
1782		} else {
1783			error = rctl_racct_add_rule_locked(child->p_racct,
1784			    link->rrl_rule);
1785			if (error != 0)
1786				goto fail;
1787		}
1788	}
1789
1790	rw_wunlock(&rctl_lock);
1791	return (0);
1792
1793fail:
1794	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1795		link = LIST_FIRST(&child->p_racct->r_rule_links);
1796		LIST_REMOVE(link, rrl_next);
1797		rctl_rule_release(link->rrl_rule);
1798		uma_zfree(rctl_rule_link_zone, link);
1799	}
1800	rw_wunlock(&rctl_lock);
1801	return (EAGAIN);
1802}
1803
1804/*
1805 * Release rules attached to the racct.
1806 */
1807void
1808rctl_racct_release(struct racct *racct)
1809{
1810	struct rctl_rule_link *link;
1811
1812	rw_wlock(&rctl_lock);
1813	while (!LIST_EMPTY(&racct->r_rule_links)) {
1814		link = LIST_FIRST(&racct->r_rule_links);
1815		LIST_REMOVE(link, rrl_next);
1816		rctl_rule_release(link->rrl_rule);
1817		uma_zfree(rctl_rule_link_zone, link);
1818	}
1819	rw_wunlock(&rctl_lock);
1820}
1821
1822static void
1823rctl_init(void)
1824{
1825
1826	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1827	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1828	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1829	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1830	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1831}
1832
1833#else /* !RCTL */
1834
1835int
1836sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1837{
1838
1839	return (ENOSYS);
1840}
1841
1842int
1843sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1844{
1845
1846	return (ENOSYS);
1847}
1848
1849int
1850sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1851{
1852
1853	return (ENOSYS);
1854}
1855
1856int
1857sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1858{
1859
1860	return (ENOSYS);
1861}
1862
1863int
1864sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1865{
1866
1867	return (ENOSYS);
1868}
1869
1870#endif /* !RCTL */
1871