kern_rctl.c revision 235901
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: stable/9/sys/kern/kern_rctl.c 235901 2012-05-24 11:46:39Z trasz $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/9/sys/kern/kern_rctl.c 235901 2012-05-24 11:46:39Z trasz $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74/* Default buffer size for rctl_get_rules(2). */
75#define	RCTL_DEFAULT_BUFSIZE	4096
76#define	RCTL_MAX_INBUFLEN	4096
77#define	RCTL_LOG_BUFSIZE	128
78
79/*
80 * 'rctl_rule_link' connects a rule with every racct it's related to.
81 * For example, rule 'user:X:openfiles:deny=N/process' is linked
82 * with uidinfo for user X, and to each process of that user.
83 */
84struct rctl_rule_link {
85	LIST_ENTRY(rctl_rule_link)	rrl_next;
86	struct rctl_rule		*rrl_rule;
87	int				rrl_exceeded;
88};
89
90struct dict {
91	const char	*d_name;
92	int		d_value;
93};
94
95static struct dict subjectnames[] = {
96	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
97	{ "user", RCTL_SUBJECT_TYPE_USER },
98	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
99	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
100	{ NULL, -1 }};
101
102static struct dict resourcenames[] = {
103	{ "cputime", RACCT_CPU },
104	{ "datasize", RACCT_DATA },
105	{ "stacksize", RACCT_STACK },
106	{ "coredumpsize", RACCT_CORE },
107	{ "memoryuse", RACCT_RSS },
108	{ "memorylocked", RACCT_MEMLOCK },
109	{ "maxproc", RACCT_NPROC },
110	{ "openfiles", RACCT_NOFILE },
111	{ "vmemoryuse", RACCT_VMEM },
112	{ "pseudoterminals", RACCT_NPTS },
113	{ "swapuse", RACCT_SWAP },
114	{ "nthr", RACCT_NTHR },
115	{ "msgqqueued", RACCT_MSGQQUEUED },
116	{ "msgqsize", RACCT_MSGQSIZE },
117	{ "nmsgq", RACCT_NMSGQ },
118	{ "nsem", RACCT_NSEM },
119	{ "nsemop", RACCT_NSEMOP },
120	{ "nshm", RACCT_NSHM },
121	{ "shmsize", RACCT_SHMSIZE },
122	{ "wallclock", RACCT_WALLCLOCK },
123	{ NULL, -1 }};
124
125static struct dict actionnames[] = {
126	{ "sighup", RCTL_ACTION_SIGHUP },
127	{ "sigint", RCTL_ACTION_SIGINT },
128	{ "sigquit", RCTL_ACTION_SIGQUIT },
129	{ "sigill", RCTL_ACTION_SIGILL },
130	{ "sigtrap", RCTL_ACTION_SIGTRAP },
131	{ "sigabrt", RCTL_ACTION_SIGABRT },
132	{ "sigemt", RCTL_ACTION_SIGEMT },
133	{ "sigfpe", RCTL_ACTION_SIGFPE },
134	{ "sigkill", RCTL_ACTION_SIGKILL },
135	{ "sigbus", RCTL_ACTION_SIGBUS },
136	{ "sigsegv", RCTL_ACTION_SIGSEGV },
137	{ "sigsys", RCTL_ACTION_SIGSYS },
138	{ "sigpipe", RCTL_ACTION_SIGPIPE },
139	{ "sigalrm", RCTL_ACTION_SIGALRM },
140	{ "sigterm", RCTL_ACTION_SIGTERM },
141	{ "sigurg", RCTL_ACTION_SIGURG },
142	{ "sigstop", RCTL_ACTION_SIGSTOP },
143	{ "sigtstp", RCTL_ACTION_SIGTSTP },
144	{ "sigchld", RCTL_ACTION_SIGCHLD },
145	{ "sigttin", RCTL_ACTION_SIGTTIN },
146	{ "sigttou", RCTL_ACTION_SIGTTOU },
147	{ "sigio", RCTL_ACTION_SIGIO },
148	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
149	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
150	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
151	{ "sigprof", RCTL_ACTION_SIGPROF },
152	{ "sigwinch", RCTL_ACTION_SIGWINCH },
153	{ "siginfo", RCTL_ACTION_SIGINFO },
154	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
155	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
156	{ "sigthr", RCTL_ACTION_SIGTHR },
157	{ "deny", RCTL_ACTION_DENY },
158	{ "log", RCTL_ACTION_LOG },
159	{ "devctl", RCTL_ACTION_DEVCTL },
160	{ NULL, -1 }};
161
162static void rctl_init(void);
163SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
164
165static uma_zone_t rctl_rule_link_zone;
166static uma_zone_t rctl_rule_zone;
167static struct rwlock rctl_lock;
168RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
169
170static int rctl_rule_fully_specified(const struct rctl_rule *rule);
171static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
172
173MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
174
175static const char *
176rctl_subject_type_name(int subject)
177{
178	int i;
179
180	for (i = 0; subjectnames[i].d_name != NULL; i++) {
181		if (subjectnames[i].d_value == subject)
182			return (subjectnames[i].d_name);
183	}
184
185	panic("rctl_subject_type_name: unknown subject type %d", subject);
186}
187
188static const char *
189rctl_action_name(int action)
190{
191	int i;
192
193	for (i = 0; actionnames[i].d_name != NULL; i++) {
194		if (actionnames[i].d_value == action)
195			return (actionnames[i].d_name);
196	}
197
198	panic("rctl_action_name: unknown action %d", action);
199}
200
201const char *
202rctl_resource_name(int resource)
203{
204	int i;
205
206	for (i = 0; resourcenames[i].d_name != NULL; i++) {
207		if (resourcenames[i].d_value == resource)
208			return (resourcenames[i].d_name);
209	}
210
211	panic("rctl_resource_name: unknown resource %d", resource);
212}
213
214/*
215 * Return the amount of resource that can be allocated by 'p' before
216 * hitting 'rule'.
217 */
218static int64_t
219rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
220{
221	int resource;
222	int64_t available = INT64_MAX;
223	struct ucred *cred = p->p_ucred;
224
225	rw_assert(&rctl_lock, RA_LOCKED);
226
227	resource = rule->rr_resource;
228	switch (rule->rr_per) {
229	case RCTL_SUBJECT_TYPE_PROCESS:
230		available = rule->rr_amount -
231		    p->p_racct->r_resources[resource];
232		break;
233	case RCTL_SUBJECT_TYPE_USER:
234		available = rule->rr_amount -
235		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
236		break;
237	case RCTL_SUBJECT_TYPE_LOGINCLASS:
238		available = rule->rr_amount -
239		    cred->cr_loginclass->lc_racct->r_resources[resource];
240		break;
241	case RCTL_SUBJECT_TYPE_JAIL:
242		available = rule->rr_amount -
243		    cred->cr_prison->pr_prison_racct->prr_racct->
244		        r_resources[resource];
245		break;
246	default:
247		panic("rctl_compute_available: unknown per %d",
248		    rule->rr_per);
249	}
250
251	return (available);
252}
253
254/*
255 * Return non-zero if allocating 'amount' by proc 'p' would exceed
256 * resource limit specified by 'rule'.
257 */
258static int
259rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
260    int64_t amount)
261{
262	int64_t available;
263
264	rw_assert(&rctl_lock, RA_LOCKED);
265
266	available = rctl_available_resource(p, rule);
267	if (available >= amount)
268		return (0);
269
270	return (1);
271}
272
273/*
274 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
275 * to what it keeps allocated now.  Returns non-zero if the allocation should
276 * be denied, 0 otherwise.
277 */
278int
279rctl_enforce(struct proc *p, int resource, uint64_t amount)
280{
281	struct rctl_rule *rule;
282	struct rctl_rule_link *link;
283	struct sbuf sb;
284	int should_deny = 0;
285	char *buf;
286	static int curtime = 0;
287	static struct timeval lasttime;
288
289	rw_rlock(&rctl_lock);
290
291	/*
292	 * There may be more than one matching rule; go through all of them.
293	 * Denial should be done last, after logging and sending signals.
294	 */
295	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
296		rule = link->rrl_rule;
297		if (rule->rr_resource != resource)
298			continue;
299		if (!rctl_would_exceed(p, rule, amount)) {
300			link->rrl_exceeded = 0;
301			continue;
302		}
303
304		switch (rule->rr_action) {
305		case RCTL_ACTION_DENY:
306			should_deny = 1;
307			continue;
308		case RCTL_ACTION_LOG:
309			/*
310			 * If rrl_exceeded != 0, it means we've already
311			 * logged a warning for this process.
312			 */
313			if (link->rrl_exceeded != 0)
314				continue;
315
316			/*
317			 * If the process state is not fully initialized yet,
318			 * we can't access most of the required fields, e.g.
319			 * p->p_comm.  This happens when called from fork1().
320			 * Ignore this rule for now; it will be processed just
321			 * after fork, when called from racct_proc_fork_done().
322			 */
323			if (p->p_state != PRS_NORMAL)
324				continue;
325
326			if (!ppsratecheck(&lasttime, &curtime, 10))
327				continue;
328
329			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
330			if (buf == NULL) {
331				printf("rctl_enforce: out of memory\n");
332				continue;
333			}
334			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
335			rctl_rule_to_sbuf(&sb, rule);
336			sbuf_finish(&sb);
337			printf("rctl: rule \"%s\" matched by pid %d "
338			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
339			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
340			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
341			sbuf_delete(&sb);
342			free(buf, M_RCTL);
343			link->rrl_exceeded = 1;
344			continue;
345		case RCTL_ACTION_DEVCTL:
346			if (link->rrl_exceeded != 0)
347				continue;
348
349			if (p->p_state != PRS_NORMAL)
350				continue;
351
352			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
353			if (buf == NULL) {
354				printf("rctl_enforce: out of memory\n");
355				continue;
356			}
357			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
358			sbuf_printf(&sb, "rule=");
359			rctl_rule_to_sbuf(&sb, rule);
360			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
361			    p->p_pid, p->p_ucred->cr_ruid,
362			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
363			sbuf_finish(&sb);
364			devctl_notify_f("RCTL", "rule", "matched",
365			    sbuf_data(&sb), M_NOWAIT);
366			sbuf_delete(&sb);
367			free(buf, M_RCTL);
368			link->rrl_exceeded = 1;
369			continue;
370		default:
371			if (link->rrl_exceeded != 0)
372				continue;
373
374			if (p->p_state != PRS_NORMAL)
375				continue;
376
377			KASSERT(rule->rr_action > 0 &&
378			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
379			    ("rctl_enforce: unknown action %d",
380			     rule->rr_action));
381
382			/*
383			 * We're using the fact that RCTL_ACTION_SIG* values
384			 * are equal to their counterparts from sys/signal.h.
385			 */
386			kern_psignal(p, rule->rr_action);
387			link->rrl_exceeded = 1;
388			continue;
389		}
390	}
391
392	rw_runlock(&rctl_lock);
393
394	if (should_deny) {
395		/*
396		 * Return fake error code; the caller should change it
397		 * into one proper for the situation - EFSIZ, ENOMEM etc.
398		 */
399		return (EDOOFUS);
400	}
401
402	return (0);
403}
404
405uint64_t
406rctl_get_limit(struct proc *p, int resource)
407{
408	struct rctl_rule *rule;
409	struct rctl_rule_link *link;
410	uint64_t amount = UINT64_MAX;
411
412	rw_rlock(&rctl_lock);
413
414	/*
415	 * There may be more than one matching rule; go through all of them.
416	 * Denial should be done last, after logging and sending signals.
417	 */
418	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
419		rule = link->rrl_rule;
420		if (rule->rr_resource != resource)
421			continue;
422		if (rule->rr_action != RCTL_ACTION_DENY)
423			continue;
424		if (rule->rr_amount < amount)
425			amount = rule->rr_amount;
426	}
427
428	rw_runlock(&rctl_lock);
429
430	return (amount);
431}
432
433uint64_t
434rctl_get_available(struct proc *p, int resource)
435{
436	struct rctl_rule *rule;
437	struct rctl_rule_link *link;
438	int64_t available, minavailable, allocated;
439
440	minavailable = INT64_MAX;
441
442	rw_rlock(&rctl_lock);
443
444	/*
445	 * There may be more than one matching rule; go through all of them.
446	 * Denial should be done last, after logging and sending signals.
447	 */
448	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
449		rule = link->rrl_rule;
450		if (rule->rr_resource != resource)
451			continue;
452		if (rule->rr_action != RCTL_ACTION_DENY)
453			continue;
454		available = rctl_available_resource(p, rule);
455		if (available < minavailable)
456			minavailable = available;
457	}
458
459	rw_runlock(&rctl_lock);
460
461	/*
462	 * XXX: Think about this _hard_.
463	 */
464	allocated = p->p_racct->r_resources[resource];
465	if (minavailable < INT64_MAX - allocated)
466		minavailable += allocated;
467	if (minavailable < 0)
468		minavailable = 0;
469	return (minavailable);
470}
471
472static int
473rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
474{
475
476	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
477		if (rule->rr_subject_type != filter->rr_subject_type)
478			return (0);
479
480		switch (filter->rr_subject_type) {
481		case RCTL_SUBJECT_TYPE_PROCESS:
482			if (filter->rr_subject.rs_proc != NULL &&
483			    rule->rr_subject.rs_proc !=
484			    filter->rr_subject.rs_proc)
485				return (0);
486			break;
487		case RCTL_SUBJECT_TYPE_USER:
488			if (filter->rr_subject.rs_uip != NULL &&
489			    rule->rr_subject.rs_uip !=
490			    filter->rr_subject.rs_uip)
491				return (0);
492			break;
493		case RCTL_SUBJECT_TYPE_LOGINCLASS:
494			if (filter->rr_subject.rs_loginclass != NULL &&
495			    rule->rr_subject.rs_loginclass !=
496			    filter->rr_subject.rs_loginclass)
497				return (0);
498			break;
499		case RCTL_SUBJECT_TYPE_JAIL:
500			if (filter->rr_subject.rs_prison_racct != NULL &&
501			    rule->rr_subject.rs_prison_racct !=
502			    filter->rr_subject.rs_prison_racct)
503				return (0);
504			break;
505		default:
506			panic("rctl_rule_matches: unknown subject type %d",
507			    filter->rr_subject_type);
508		}
509	}
510
511	if (filter->rr_resource != RACCT_UNDEFINED) {
512		if (rule->rr_resource != filter->rr_resource)
513			return (0);
514	}
515
516	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
517		if (rule->rr_action != filter->rr_action)
518			return (0);
519	}
520
521	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
522		if (rule->rr_amount != filter->rr_amount)
523			return (0);
524	}
525
526	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
527		if (rule->rr_per != filter->rr_per)
528			return (0);
529	}
530
531	return (1);
532}
533
534static int
535str2value(const char *str, int *value, struct dict *table)
536{
537	int i;
538
539	if (value == NULL)
540		return (EINVAL);
541
542	for (i = 0; table[i].d_name != NULL; i++) {
543		if (strcasecmp(table[i].d_name, str) == 0) {
544			*value =  table[i].d_value;
545			return (0);
546		}
547	}
548
549	return (EINVAL);
550}
551
552static int
553str2id(const char *str, id_t *value)
554{
555	char *end;
556
557	if (str == NULL)
558		return (EINVAL);
559
560	*value = strtoul(str, &end, 10);
561	if ((size_t)(end - str) != strlen(str))
562		return (EINVAL);
563
564	return (0);
565}
566
567static int
568str2int64(const char *str, int64_t *value)
569{
570	char *end;
571
572	if (str == NULL)
573		return (EINVAL);
574
575	*value = strtoul(str, &end, 10);
576	if ((size_t)(end - str) != strlen(str))
577		return (EINVAL);
578
579	return (0);
580}
581
582/*
583 * Connect the rule to the racct, increasing refcount for the rule.
584 */
585static void
586rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
587{
588	struct rctl_rule_link *link;
589
590	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591
592	rctl_rule_acquire(rule);
593	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
594	link->rrl_rule = rule;
595	link->rrl_exceeded = 0;
596
597	rw_wlock(&rctl_lock);
598	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
599	rw_wunlock(&rctl_lock);
600}
601
602static int
603rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
604{
605	struct rctl_rule_link *link;
606
607	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
608	rw_assert(&rctl_lock, RA_WLOCKED);
609
610	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
611	if (link == NULL)
612		return (ENOMEM);
613	rctl_rule_acquire(rule);
614	link->rrl_rule = rule;
615	link->rrl_exceeded = 0;
616
617	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
618	return (0);
619}
620
621/*
622 * Remove limits for a rules matching the filter and release
623 * the refcounts for the rules, possibly freeing them.  Returns
624 * the number of limit structures removed.
625 */
626static int
627rctl_racct_remove_rules(struct racct *racct,
628    const struct rctl_rule *filter)
629{
630	int removed = 0;
631	struct rctl_rule_link *link, *linktmp;
632
633	rw_assert(&rctl_lock, RA_WLOCKED);
634
635	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
636		if (!rctl_rule_matches(link->rrl_rule, filter))
637			continue;
638
639		LIST_REMOVE(link, rrl_next);
640		rctl_rule_release(link->rrl_rule);
641		uma_zfree(rctl_rule_link_zone, link);
642		removed++;
643	}
644	return (removed);
645}
646
647static void
648rctl_rule_acquire_subject(struct rctl_rule *rule)
649{
650
651	switch (rule->rr_subject_type) {
652	case RCTL_SUBJECT_TYPE_UNDEFINED:
653	case RCTL_SUBJECT_TYPE_PROCESS:
654		break;
655	case RCTL_SUBJECT_TYPE_JAIL:
656		if (rule->rr_subject.rs_prison_racct != NULL)
657			prison_racct_hold(rule->rr_subject.rs_prison_racct);
658		break;
659	case RCTL_SUBJECT_TYPE_USER:
660		if (rule->rr_subject.rs_uip != NULL)
661			uihold(rule->rr_subject.rs_uip);
662		break;
663	case RCTL_SUBJECT_TYPE_LOGINCLASS:
664		if (rule->rr_subject.rs_loginclass != NULL)
665			loginclass_hold(rule->rr_subject.rs_loginclass);
666		break;
667	default:
668		panic("rctl_rule_acquire_subject: unknown subject type %d",
669		    rule->rr_subject_type);
670	}
671}
672
673static void
674rctl_rule_release_subject(struct rctl_rule *rule)
675{
676
677	switch (rule->rr_subject_type) {
678	case RCTL_SUBJECT_TYPE_UNDEFINED:
679	case RCTL_SUBJECT_TYPE_PROCESS:
680		break;
681	case RCTL_SUBJECT_TYPE_JAIL:
682		if (rule->rr_subject.rs_prison_racct != NULL)
683			prison_racct_free(rule->rr_subject.rs_prison_racct);
684		break;
685	case RCTL_SUBJECT_TYPE_USER:
686		if (rule->rr_subject.rs_uip != NULL)
687			uifree(rule->rr_subject.rs_uip);
688		break;
689	case RCTL_SUBJECT_TYPE_LOGINCLASS:
690		if (rule->rr_subject.rs_loginclass != NULL)
691			loginclass_free(rule->rr_subject.rs_loginclass);
692		break;
693	default:
694		panic("rctl_rule_release_subject: unknown subject type %d",
695		    rule->rr_subject_type);
696	}
697}
698
699struct rctl_rule *
700rctl_rule_alloc(int flags)
701{
702	struct rctl_rule *rule;
703
704	rule = uma_zalloc(rctl_rule_zone, flags);
705	if (rule == NULL)
706		return (NULL);
707	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
708	rule->rr_subject.rs_proc = NULL;
709	rule->rr_subject.rs_uip = NULL;
710	rule->rr_subject.rs_loginclass = NULL;
711	rule->rr_subject.rs_prison_racct = NULL;
712	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
713	rule->rr_resource = RACCT_UNDEFINED;
714	rule->rr_action = RCTL_ACTION_UNDEFINED;
715	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
716	refcount_init(&rule->rr_refcount, 1);
717
718	return (rule);
719}
720
721struct rctl_rule *
722rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
723{
724	struct rctl_rule *copy;
725
726	copy = uma_zalloc(rctl_rule_zone, flags);
727	if (copy == NULL)
728		return (NULL);
729	copy->rr_subject_type = rule->rr_subject_type;
730	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
731	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
732	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
733	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
734	copy->rr_per = rule->rr_per;
735	copy->rr_resource = rule->rr_resource;
736	copy->rr_action = rule->rr_action;
737	copy->rr_amount = rule->rr_amount;
738	refcount_init(&copy->rr_refcount, 1);
739	rctl_rule_acquire_subject(copy);
740
741	return (copy);
742}
743
744void
745rctl_rule_acquire(struct rctl_rule *rule)
746{
747
748	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
749
750	refcount_acquire(&rule->rr_refcount);
751}
752
753static void
754rctl_rule_free(void *context, int pending)
755{
756	struct rctl_rule *rule;
757
758	rule = (struct rctl_rule *)context;
759
760	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
761
762	/*
763	 * We don't need locking here; rule is guaranteed to be inaccessible.
764	 */
765
766	rctl_rule_release_subject(rule);
767	uma_zfree(rctl_rule_zone, rule);
768}
769
770void
771rctl_rule_release(struct rctl_rule *rule)
772{
773
774	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
775
776	if (refcount_release(&rule->rr_refcount)) {
777		/*
778		 * rctl_rule_release() is often called when iterating
779		 * over all the uidinfo structures in the system,
780		 * holding uihashtbl_lock.  Since rctl_rule_free()
781		 * might end up calling uifree(), this would lead
782		 * to lock recursion.  Use taskqueue to avoid this.
783		 */
784		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
785		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
786	}
787}
788
789static int
790rctl_rule_fully_specified(const struct rctl_rule *rule)
791{
792
793	switch (rule->rr_subject_type) {
794	case RCTL_SUBJECT_TYPE_UNDEFINED:
795		return (0);
796	case RCTL_SUBJECT_TYPE_PROCESS:
797		if (rule->rr_subject.rs_proc == NULL)
798			return (0);
799		break;
800	case RCTL_SUBJECT_TYPE_USER:
801		if (rule->rr_subject.rs_uip == NULL)
802			return (0);
803		break;
804	case RCTL_SUBJECT_TYPE_LOGINCLASS:
805		if (rule->rr_subject.rs_loginclass == NULL)
806			return (0);
807		break;
808	case RCTL_SUBJECT_TYPE_JAIL:
809		if (rule->rr_subject.rs_prison_racct == NULL)
810			return (0);
811		break;
812	default:
813		panic("rctl_rule_fully_specified: unknown subject type %d",
814		    rule->rr_subject_type);
815	}
816	if (rule->rr_resource == RACCT_UNDEFINED)
817		return (0);
818	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
819		return (0);
820	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
821		return (0);
822	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
823		return (0);
824
825	return (1);
826}
827
828static int
829rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
830{
831	int error = 0;
832	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
833	     *amountstr, *perstr;
834	struct rctl_rule *rule;
835	id_t id;
836
837	rule = rctl_rule_alloc(M_WAITOK);
838
839	subjectstr = strsep(&rulestr, ":");
840	subject_idstr = strsep(&rulestr, ":");
841	resourcestr = strsep(&rulestr, ":");
842	actionstr = strsep(&rulestr, "=/");
843	amountstr = strsep(&rulestr, "/");
844	perstr = rulestr;
845
846	if (subjectstr == NULL || subjectstr[0] == '\0')
847		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
848	else {
849		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
850		if (error != 0)
851			goto out;
852	}
853
854	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
855		rule->rr_subject.rs_proc = NULL;
856		rule->rr_subject.rs_uip = NULL;
857		rule->rr_subject.rs_loginclass = NULL;
858		rule->rr_subject.rs_prison_racct = NULL;
859	} else {
860		switch (rule->rr_subject_type) {
861		case RCTL_SUBJECT_TYPE_UNDEFINED:
862			error = EINVAL;
863			goto out;
864		case RCTL_SUBJECT_TYPE_PROCESS:
865			error = str2id(subject_idstr, &id);
866			if (error != 0)
867				goto out;
868			sx_assert(&allproc_lock, SA_LOCKED);
869			rule->rr_subject.rs_proc = pfind(id);
870			if (rule->rr_subject.rs_proc == NULL) {
871				error = ESRCH;
872				goto out;
873			}
874			PROC_UNLOCK(rule->rr_subject.rs_proc);
875			break;
876		case RCTL_SUBJECT_TYPE_USER:
877			error = str2id(subject_idstr, &id);
878			if (error != 0)
879				goto out;
880			rule->rr_subject.rs_uip = uifind(id);
881			break;
882		case RCTL_SUBJECT_TYPE_LOGINCLASS:
883			rule->rr_subject.rs_loginclass =
884			    loginclass_find(subject_idstr);
885			if (rule->rr_subject.rs_loginclass == NULL) {
886				error = ENAMETOOLONG;
887				goto out;
888			}
889			break;
890		case RCTL_SUBJECT_TYPE_JAIL:
891			rule->rr_subject.rs_prison_racct =
892			    prison_racct_find(subject_idstr);
893			if (rule->rr_subject.rs_prison_racct == NULL) {
894				error = ENAMETOOLONG;
895				goto out;
896			}
897			break;
898               default:
899                       panic("rctl_string_to_rule: unknown subject type %d",
900                           rule->rr_subject_type);
901               }
902	}
903
904	if (resourcestr == NULL || resourcestr[0] == '\0')
905		rule->rr_resource = RACCT_UNDEFINED;
906	else {
907		error = str2value(resourcestr, &rule->rr_resource,
908		    resourcenames);
909		if (error != 0)
910			goto out;
911	}
912
913	if (actionstr == NULL || actionstr[0] == '\0')
914		rule->rr_action = RCTL_ACTION_UNDEFINED;
915	else {
916		error = str2value(actionstr, &rule->rr_action, actionnames);
917		if (error != 0)
918			goto out;
919	}
920
921	if (amountstr == NULL || amountstr[0] == '\0')
922		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
923	else {
924		error = str2int64(amountstr, &rule->rr_amount);
925		if (error != 0)
926			goto out;
927		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
928			rule->rr_amount *= 1000000;
929	}
930
931	if (perstr == NULL || perstr[0] == '\0')
932		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
933	else {
934		error = str2value(perstr, &rule->rr_per, subjectnames);
935		if (error != 0)
936			goto out;
937	}
938
939out:
940	if (error == 0)
941		*rulep = rule;
942	else
943		rctl_rule_release(rule);
944
945	return (error);
946}
947
948/*
949 * Link a rule with all the subjects it applies to.
950 */
951int
952rctl_rule_add(struct rctl_rule *rule)
953{
954	struct proc *p;
955	struct ucred *cred;
956	struct uidinfo *uip;
957	struct prison *pr;
958	struct prison_racct *prr;
959	struct loginclass *lc;
960	struct rctl_rule *rule2;
961	int match;
962
963	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
964
965	/*
966	 * Some rules just don't make sense.  Note that the one below
967	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
968	 * for example, is not deniable in the racct sense, but the
969	 * limit is enforced in a different way, so "deny" rules for %CPU
970	 * do make sense.
971	 */
972	if (rule->rr_action == RCTL_ACTION_DENY &&
973	    (rule->rr_resource == RACCT_CPU ||
974	    rule->rr_resource == RACCT_WALLCLOCK))
975		return (EOPNOTSUPP);
976
977	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
978	    RACCT_IS_SLOPPY(rule->rr_resource))
979		return (EOPNOTSUPP);
980
981	/*
982	 * Make sure there are no duplicated rules.  Also, for the "deny"
983	 * rules, remove ones differing only by "amount".
984	 */
985	if (rule->rr_action == RCTL_ACTION_DENY) {
986		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
987		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
988		rctl_rule_remove(rule2);
989		rctl_rule_release(rule2);
990	} else
991		rctl_rule_remove(rule);
992
993	switch (rule->rr_subject_type) {
994	case RCTL_SUBJECT_TYPE_PROCESS:
995		p = rule->rr_subject.rs_proc;
996		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
997		/*
998		 * No resource limits for system processes.
999		 */
1000		if (p->p_flag & P_SYSTEM)
1001			return (EPERM);
1002
1003		rctl_racct_add_rule(p->p_racct, rule);
1004		/*
1005		 * In case of per-process rule, we don't have anything more
1006		 * to do.
1007		 */
1008		return (0);
1009
1010	case RCTL_SUBJECT_TYPE_USER:
1011		uip = rule->rr_subject.rs_uip;
1012		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1013		rctl_racct_add_rule(uip->ui_racct, rule);
1014		break;
1015
1016	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1017		lc = rule->rr_subject.rs_loginclass;
1018		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1019		rctl_racct_add_rule(lc->lc_racct, rule);
1020		break;
1021
1022	case RCTL_SUBJECT_TYPE_JAIL:
1023		prr = rule->rr_subject.rs_prison_racct;
1024		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1025		rctl_racct_add_rule(prr->prr_racct, rule);
1026		break;
1027
1028	default:
1029		panic("rctl_rule_add: unknown subject type %d",
1030		    rule->rr_subject_type);
1031	}
1032
1033	/*
1034	 * Now go through all the processes and add the new rule to the ones
1035	 * it applies to.
1036	 */
1037	sx_assert(&allproc_lock, SA_LOCKED);
1038	FOREACH_PROC_IN_SYSTEM(p) {
1039		if (p->p_flag & P_SYSTEM)
1040			continue;
1041		cred = p->p_ucred;
1042		switch (rule->rr_subject_type) {
1043		case RCTL_SUBJECT_TYPE_USER:
1044			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1045			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1046				break;
1047			continue;
1048		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1049			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1050				break;
1051			continue;
1052		case RCTL_SUBJECT_TYPE_JAIL:
1053			match = 0;
1054			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1055				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1056					match = 1;
1057					break;
1058				}
1059			}
1060			if (match)
1061				break;
1062			continue;
1063		default:
1064			panic("rctl_rule_add: unknown subject type %d",
1065			    rule->rr_subject_type);
1066		}
1067
1068		rctl_racct_add_rule(p->p_racct, rule);
1069	}
1070
1071	return (0);
1072}
1073
1074static void
1075rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1076{
1077	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1078	int found = 0;
1079
1080	rw_wlock(&rctl_lock);
1081	found += rctl_racct_remove_rules(racct, filter);
1082	rw_wunlock(&rctl_lock);
1083
1084	*((int *)arg3) += found;
1085}
1086
1087/*
1088 * Remove all rules that match the filter.
1089 */
1090int
1091rctl_rule_remove(struct rctl_rule *filter)
1092{
1093	int found = 0;
1094	struct proc *p;
1095
1096	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1097	    filter->rr_subject.rs_proc != NULL) {
1098		p = filter->rr_subject.rs_proc;
1099		rw_wlock(&rctl_lock);
1100		found = rctl_racct_remove_rules(p->p_racct, filter);
1101		rw_wunlock(&rctl_lock);
1102		if (found)
1103			return (0);
1104		return (ESRCH);
1105	}
1106
1107	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1108	    (void *)&found);
1109	ui_racct_foreach(rctl_rule_remove_callback, filter,
1110	    (void *)&found);
1111	prison_racct_foreach(rctl_rule_remove_callback, filter,
1112	    (void *)&found);
1113
1114	sx_assert(&allproc_lock, SA_LOCKED);
1115	rw_wlock(&rctl_lock);
1116	FOREACH_PROC_IN_SYSTEM(p) {
1117		found += rctl_racct_remove_rules(p->p_racct, filter);
1118	}
1119	rw_wunlock(&rctl_lock);
1120
1121	if (found)
1122		return (0);
1123	return (ESRCH);
1124}
1125
1126/*
1127 * Appends a rule to the sbuf.
1128 */
1129static void
1130rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1131{
1132	int64_t amount;
1133
1134	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1135
1136	switch (rule->rr_subject_type) {
1137	case RCTL_SUBJECT_TYPE_PROCESS:
1138		if (rule->rr_subject.rs_proc == NULL)
1139			sbuf_printf(sb, ":");
1140		else
1141			sbuf_printf(sb, "%d:",
1142			    rule->rr_subject.rs_proc->p_pid);
1143		break;
1144	case RCTL_SUBJECT_TYPE_USER:
1145		if (rule->rr_subject.rs_uip == NULL)
1146			sbuf_printf(sb, ":");
1147		else
1148			sbuf_printf(sb, "%d:",
1149			    rule->rr_subject.rs_uip->ui_uid);
1150		break;
1151	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1152		if (rule->rr_subject.rs_loginclass == NULL)
1153			sbuf_printf(sb, ":");
1154		else
1155			sbuf_printf(sb, "%s:",
1156			    rule->rr_subject.rs_loginclass->lc_name);
1157		break;
1158	case RCTL_SUBJECT_TYPE_JAIL:
1159		if (rule->rr_subject.rs_prison_racct == NULL)
1160			sbuf_printf(sb, ":");
1161		else
1162			sbuf_printf(sb, "%s:",
1163			    rule->rr_subject.rs_prison_racct->prr_name);
1164		break;
1165	default:
1166		panic("rctl_rule_to_sbuf: unknown subject type %d",
1167		    rule->rr_subject_type);
1168	}
1169
1170	amount = rule->rr_amount;
1171	if (amount != RCTL_AMOUNT_UNDEFINED &&
1172	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1173		amount /= 1000000;
1174
1175	sbuf_printf(sb, "%s:%s=%jd",
1176	    rctl_resource_name(rule->rr_resource),
1177	    rctl_action_name(rule->rr_action),
1178	    amount);
1179
1180	if (rule->rr_per != rule->rr_subject_type)
1181		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1182}
1183
1184/*
1185 * Routine used by RCTL syscalls to read in input string.
1186 */
1187static int
1188rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1189{
1190	int error;
1191	char *str;
1192
1193	if (inbuflen <= 0)
1194		return (EINVAL);
1195	if (inbuflen > RCTL_MAX_INBUFLEN)
1196		return (E2BIG);
1197
1198	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1199	error = copyinstr(inbufp, str, inbuflen, NULL);
1200	if (error != 0) {
1201		free(str, M_RCTL);
1202		return (error);
1203	}
1204
1205	*inputstr = str;
1206
1207	return (0);
1208}
1209
1210/*
1211 * Routine used by RCTL syscalls to write out output string.
1212 */
1213static int
1214rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1215{
1216	int error;
1217
1218	if (outputsbuf == NULL)
1219		return (0);
1220
1221	sbuf_finish(outputsbuf);
1222	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1223		sbuf_delete(outputsbuf);
1224		return (ERANGE);
1225	}
1226	error = copyout(sbuf_data(outputsbuf), outbufp,
1227	    sbuf_len(outputsbuf) + 1);
1228	sbuf_delete(outputsbuf);
1229	return (error);
1230}
1231
1232static struct sbuf *
1233rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1234{
1235	int i;
1236	int64_t amount;
1237	struct sbuf *sb;
1238
1239	sb = sbuf_new_auto();
1240	for (i = 0; i <= RACCT_MAX; i++) {
1241		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1242			continue;
1243		amount = racct->r_resources[i];
1244		if (RACCT_IS_IN_MILLIONS(i))
1245			amount /= 1000000;
1246		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1247	}
1248	sbuf_setpos(sb, sbuf_len(sb) - 1);
1249	return (sb);
1250}
1251
1252int
1253sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1254{
1255	int error;
1256	char *inputstr;
1257	struct rctl_rule *filter;
1258	struct sbuf *outputsbuf = NULL;
1259	struct proc *p;
1260	struct uidinfo *uip;
1261	struct loginclass *lc;
1262	struct prison_racct *prr;
1263
1264	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1265	if (error != 0)
1266		return (error);
1267
1268	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1269	if (error != 0)
1270		return (error);
1271
1272	sx_slock(&allproc_lock);
1273	error = rctl_string_to_rule(inputstr, &filter);
1274	free(inputstr, M_RCTL);
1275	if (error != 0) {
1276		sx_sunlock(&allproc_lock);
1277		return (error);
1278	}
1279
1280	switch (filter->rr_subject_type) {
1281	case RCTL_SUBJECT_TYPE_PROCESS:
1282		p = filter->rr_subject.rs_proc;
1283		if (p == NULL) {
1284			error = EINVAL;
1285			goto out;
1286		}
1287		if (p->p_flag & P_SYSTEM) {
1288			error = EINVAL;
1289			goto out;
1290		}
1291		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1292		break;
1293	case RCTL_SUBJECT_TYPE_USER:
1294		uip = filter->rr_subject.rs_uip;
1295		if (uip == NULL) {
1296			error = EINVAL;
1297			goto out;
1298		}
1299		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1300		break;
1301	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1302		lc = filter->rr_subject.rs_loginclass;
1303		if (lc == NULL) {
1304			error = EINVAL;
1305			goto out;
1306		}
1307		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1308		break;
1309	case RCTL_SUBJECT_TYPE_JAIL:
1310		prr = filter->rr_subject.rs_prison_racct;
1311		if (prr == NULL) {
1312			error = EINVAL;
1313			goto out;
1314		}
1315		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1316		break;
1317	default:
1318		error = EINVAL;
1319	}
1320out:
1321	rctl_rule_release(filter);
1322	sx_sunlock(&allproc_lock);
1323	if (error != 0)
1324		return (error);
1325
1326	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1327
1328	return (error);
1329}
1330
1331static void
1332rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1333{
1334	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1335	struct rctl_rule_link *link;
1336	struct sbuf *sb = (struct sbuf *)arg3;
1337
1338	rw_rlock(&rctl_lock);
1339	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1340		if (!rctl_rule_matches(link->rrl_rule, filter))
1341			continue;
1342		rctl_rule_to_sbuf(sb, link->rrl_rule);
1343		sbuf_printf(sb, ",");
1344	}
1345	rw_runlock(&rctl_lock);
1346}
1347
1348int
1349sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1350{
1351	int error;
1352	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1353	char *inputstr, *buf;
1354	struct sbuf *sb;
1355	struct rctl_rule *filter;
1356	struct rctl_rule_link *link;
1357	struct proc *p;
1358
1359	error = priv_check(td, PRIV_RCTL_GET_RULES);
1360	if (error != 0)
1361		return (error);
1362
1363	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1364	if (error != 0)
1365		return (error);
1366
1367	sx_slock(&allproc_lock);
1368	error = rctl_string_to_rule(inputstr, &filter);
1369	free(inputstr, M_RCTL);
1370	if (error != 0) {
1371		sx_sunlock(&allproc_lock);
1372		return (error);
1373	}
1374
1375again:
1376	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1377	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1378	KASSERT(sb != NULL, ("sbuf_new failed"));
1379
1380	sx_assert(&allproc_lock, SA_LOCKED);
1381	FOREACH_PROC_IN_SYSTEM(p) {
1382		rw_rlock(&rctl_lock);
1383		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1384			/*
1385			 * Non-process rules will be added to the buffer later.
1386			 * Adding them here would result in duplicated output.
1387			 */
1388			if (link->rrl_rule->rr_subject_type !=
1389			    RCTL_SUBJECT_TYPE_PROCESS)
1390				continue;
1391			if (!rctl_rule_matches(link->rrl_rule, filter))
1392				continue;
1393			rctl_rule_to_sbuf(sb, link->rrl_rule);
1394			sbuf_printf(sb, ",");
1395		}
1396		rw_runlock(&rctl_lock);
1397	}
1398
1399	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1400	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1401	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1402	if (sbuf_error(sb) == ENOMEM) {
1403		sbuf_delete(sb);
1404		free(buf, M_RCTL);
1405		bufsize *= 4;
1406		goto again;
1407	}
1408
1409	/*
1410	 * Remove trailing ",".
1411	 */
1412	if (sbuf_len(sb) > 0)
1413		sbuf_setpos(sb, sbuf_len(sb) - 1);
1414
1415	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1416
1417	rctl_rule_release(filter);
1418	sx_sunlock(&allproc_lock);
1419	free(buf, M_RCTL);
1420	return (error);
1421}
1422
1423int
1424sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1425{
1426	int error;
1427	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1428	char *inputstr, *buf;
1429	struct sbuf *sb;
1430	struct rctl_rule *filter;
1431	struct rctl_rule_link *link;
1432
1433	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1434	if (error != 0)
1435		return (error);
1436
1437	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1438	if (error != 0)
1439		return (error);
1440
1441	sx_slock(&allproc_lock);
1442	error = rctl_string_to_rule(inputstr, &filter);
1443	free(inputstr, M_RCTL);
1444	if (error != 0) {
1445		sx_sunlock(&allproc_lock);
1446		return (error);
1447	}
1448
1449	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1450		rctl_rule_release(filter);
1451		sx_sunlock(&allproc_lock);
1452		return (EINVAL);
1453	}
1454	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1455		rctl_rule_release(filter);
1456		sx_sunlock(&allproc_lock);
1457		return (EOPNOTSUPP);
1458	}
1459	if (filter->rr_subject.rs_proc == NULL) {
1460		rctl_rule_release(filter);
1461		sx_sunlock(&allproc_lock);
1462		return (EINVAL);
1463	}
1464
1465again:
1466	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1467	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1468	KASSERT(sb != NULL, ("sbuf_new failed"));
1469
1470	rw_rlock(&rctl_lock);
1471	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1472	    rrl_next) {
1473		rctl_rule_to_sbuf(sb, link->rrl_rule);
1474		sbuf_printf(sb, ",");
1475	}
1476	rw_runlock(&rctl_lock);
1477	if (sbuf_error(sb) == ENOMEM) {
1478		sbuf_delete(sb);
1479		free(buf, M_RCTL);
1480		bufsize *= 4;
1481		goto again;
1482	}
1483
1484	/*
1485	 * Remove trailing ",".
1486	 */
1487	if (sbuf_len(sb) > 0)
1488		sbuf_setpos(sb, sbuf_len(sb) - 1);
1489
1490	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1491	rctl_rule_release(filter);
1492	sx_sunlock(&allproc_lock);
1493	free(buf, M_RCTL);
1494	return (error);
1495}
1496
1497int
1498sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1499{
1500	int error;
1501	struct rctl_rule *rule;
1502	char *inputstr;
1503
1504	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1505	if (error != 0)
1506		return (error);
1507
1508	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1509	if (error != 0)
1510		return (error);
1511
1512	sx_slock(&allproc_lock);
1513	error = rctl_string_to_rule(inputstr, &rule);
1514	free(inputstr, M_RCTL);
1515	if (error != 0) {
1516		sx_sunlock(&allproc_lock);
1517		return (error);
1518	}
1519	/*
1520	 * The 'per' part of a rule is optional.
1521	 */
1522	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1523	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1524		rule->rr_per = rule->rr_subject_type;
1525
1526	if (!rctl_rule_fully_specified(rule)) {
1527		error = EINVAL;
1528		goto out;
1529	}
1530
1531	error = rctl_rule_add(rule);
1532
1533out:
1534	rctl_rule_release(rule);
1535	sx_sunlock(&allproc_lock);
1536	return (error);
1537}
1538
1539int
1540sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1541{
1542	int error;
1543	struct rctl_rule *filter;
1544	char *inputstr;
1545
1546	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1547	if (error != 0)
1548		return (error);
1549
1550	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1551	if (error != 0)
1552		return (error);
1553
1554	sx_slock(&allproc_lock);
1555	error = rctl_string_to_rule(inputstr, &filter);
1556	free(inputstr, M_RCTL);
1557	if (error != 0) {
1558		sx_sunlock(&allproc_lock);
1559		return (error);
1560	}
1561
1562	error = rctl_rule_remove(filter);
1563	rctl_rule_release(filter);
1564	sx_sunlock(&allproc_lock);
1565
1566	return (error);
1567}
1568
1569/*
1570 * Update RCTL rule list after credential change.
1571 */
1572void
1573rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1574{
1575	int rulecnt, i;
1576	struct rctl_rule_link *link, *newlink;
1577	struct uidinfo *newuip;
1578	struct loginclass *newlc;
1579	struct prison_racct *newprr;
1580	LIST_HEAD(, rctl_rule_link) newrules;
1581
1582	newuip = newcred->cr_ruidinfo;
1583	newlc = newcred->cr_loginclass;
1584	newprr = newcred->cr_prison->pr_prison_racct;
1585
1586	LIST_INIT(&newrules);
1587
1588again:
1589	/*
1590	 * First, count the rules that apply to the process with new
1591	 * credentials.
1592	 */
1593	rulecnt = 0;
1594	rw_rlock(&rctl_lock);
1595	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1596		if (link->rrl_rule->rr_subject_type ==
1597		    RCTL_SUBJECT_TYPE_PROCESS)
1598			rulecnt++;
1599	}
1600	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1601		rulecnt++;
1602	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1603		rulecnt++;
1604	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1605		rulecnt++;
1606	rw_runlock(&rctl_lock);
1607
1608	/*
1609	 * Create temporary list.  We've dropped the rctl_lock in order
1610	 * to use M_WAITOK.
1611	 */
1612	for (i = 0; i < rulecnt; i++) {
1613		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1614		newlink->rrl_rule = NULL;
1615		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1616	}
1617
1618	newlink = LIST_FIRST(&newrules);
1619
1620	/*
1621	 * Assign rules to the newly allocated list entries.
1622	 */
1623	rw_wlock(&rctl_lock);
1624	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1625		if (link->rrl_rule->rr_subject_type ==
1626		    RCTL_SUBJECT_TYPE_PROCESS) {
1627			if (newlink == NULL)
1628				goto goaround;
1629			rctl_rule_acquire(link->rrl_rule);
1630			newlink->rrl_rule = link->rrl_rule;
1631			newlink = LIST_NEXT(newlink, rrl_next);
1632			rulecnt--;
1633		}
1634	}
1635
1636	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1637		if (newlink == NULL)
1638			goto goaround;
1639		rctl_rule_acquire(link->rrl_rule);
1640		newlink->rrl_rule = link->rrl_rule;
1641		newlink = LIST_NEXT(newlink, rrl_next);
1642		rulecnt--;
1643	}
1644
1645	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1646		if (newlink == NULL)
1647			goto goaround;
1648		rctl_rule_acquire(link->rrl_rule);
1649		newlink->rrl_rule = link->rrl_rule;
1650		newlink = LIST_NEXT(newlink, rrl_next);
1651		rulecnt--;
1652	}
1653
1654	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1655		if (newlink == NULL)
1656			goto goaround;
1657		rctl_rule_acquire(link->rrl_rule);
1658		newlink->rrl_rule = link->rrl_rule;
1659		newlink = LIST_NEXT(newlink, rrl_next);
1660		rulecnt--;
1661	}
1662
1663	if (rulecnt == 0) {
1664		/*
1665		 * Free the old rule list.
1666		 */
1667		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1668			link = LIST_FIRST(&p->p_racct->r_rule_links);
1669			LIST_REMOVE(link, rrl_next);
1670			rctl_rule_release(link->rrl_rule);
1671			uma_zfree(rctl_rule_link_zone, link);
1672		}
1673
1674		/*
1675		 * Replace lists and we're done.
1676		 *
1677		 * XXX: Is there any way to switch list heads instead
1678		 *      of iterating here?
1679		 */
1680		while (!LIST_EMPTY(&newrules)) {
1681			newlink = LIST_FIRST(&newrules);
1682			LIST_REMOVE(newlink, rrl_next);
1683			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1684			    newlink, rrl_next);
1685		}
1686
1687		rw_wunlock(&rctl_lock);
1688
1689		return;
1690	}
1691
1692goaround:
1693	rw_wunlock(&rctl_lock);
1694
1695	/*
1696	 * Rule list changed while we were not holding the rctl_lock.
1697	 * Free the new list and try again.
1698	 */
1699	while (!LIST_EMPTY(&newrules)) {
1700		newlink = LIST_FIRST(&newrules);
1701		LIST_REMOVE(newlink, rrl_next);
1702		if (newlink->rrl_rule != NULL)
1703			rctl_rule_release(newlink->rrl_rule);
1704		uma_zfree(rctl_rule_link_zone, newlink);
1705	}
1706
1707	goto again;
1708}
1709
1710/*
1711 * Assign RCTL rules to the newly created process.
1712 */
1713int
1714rctl_proc_fork(struct proc *parent, struct proc *child)
1715{
1716	int error;
1717	struct rctl_rule_link *link;
1718	struct rctl_rule *rule;
1719
1720	LIST_INIT(&child->p_racct->r_rule_links);
1721
1722	/*
1723	 * No limits for kernel processes.
1724	 */
1725	if (child->p_flag & P_SYSTEM)
1726		return (0);
1727
1728	/*
1729	 * Nothing to inherit from P_SYSTEM parents.
1730	 */
1731	if (parent->p_racct == NULL) {
1732		KASSERT(parent->p_flag & P_SYSTEM,
1733		    ("non-system process without racct; p = %p", parent));
1734		return (0);
1735	}
1736
1737	rw_wlock(&rctl_lock);
1738
1739	/*
1740	 * Go through limits applicable to the parent and assign them
1741	 * to the child.  Rules with 'process' subject have to be duplicated
1742	 * in order to make their rr_subject point to the new process.
1743	 */
1744	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1745		if (link->rrl_rule->rr_subject_type ==
1746		    RCTL_SUBJECT_TYPE_PROCESS) {
1747			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1748			if (rule == NULL)
1749				goto fail;
1750			KASSERT(rule->rr_subject.rs_proc == parent,
1751			    ("rule->rr_subject.rs_proc != parent"));
1752			rule->rr_subject.rs_proc = child;
1753			error = rctl_racct_add_rule_locked(child->p_racct,
1754			    rule);
1755			rctl_rule_release(rule);
1756			if (error != 0)
1757				goto fail;
1758		} else {
1759			error = rctl_racct_add_rule_locked(child->p_racct,
1760			    link->rrl_rule);
1761			if (error != 0)
1762				goto fail;
1763		}
1764	}
1765
1766	rw_wunlock(&rctl_lock);
1767	return (0);
1768
1769fail:
1770	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1771		link = LIST_FIRST(&child->p_racct->r_rule_links);
1772		LIST_REMOVE(link, rrl_next);
1773		rctl_rule_release(link->rrl_rule);
1774		uma_zfree(rctl_rule_link_zone, link);
1775	}
1776	rw_wunlock(&rctl_lock);
1777	return (EAGAIN);
1778}
1779
1780/*
1781 * Release rules attached to the racct.
1782 */
1783void
1784rctl_racct_release(struct racct *racct)
1785{
1786	struct rctl_rule_link *link;
1787
1788	rw_wlock(&rctl_lock);
1789	while (!LIST_EMPTY(&racct->r_rule_links)) {
1790		link = LIST_FIRST(&racct->r_rule_links);
1791		LIST_REMOVE(link, rrl_next);
1792		rctl_rule_release(link->rrl_rule);
1793		uma_zfree(rctl_rule_link_zone, link);
1794	}
1795	rw_wunlock(&rctl_lock);
1796}
1797
1798static void
1799rctl_init(void)
1800{
1801
1802	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1803	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1804	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1805	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1806	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1807}
1808
1809#else /* !RCTL */
1810
1811int
1812sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1813{
1814
1815	return (ENOSYS);
1816}
1817
1818int
1819sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1820{
1821
1822	return (ENOSYS);
1823}
1824
1825int
1826sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1827{
1828
1829	return (ENOSYS);
1830}
1831
1832int
1833sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1834{
1835
1836	return (ENOSYS);
1837}
1838
1839int
1840sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1841{
1842
1843	return (ENOSYS);
1844}
1845
1846#endif /* !RCTL */
1847