kern_rctl.c revision 227293
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/kern/kern_rctl.c 227293 2011-11-07 06:44:47Z ed $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_rctl.c 227293 2011-11-07 06:44:47Z ed $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/refcount.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/loginclass.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/racct.h>
47#include <sys/rctl.h>
48#include <sys/resourcevar.h>
49#include <sys/sx.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/systm.h>
53#include <sys/types.h>
54#include <sys/eventhandler.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/taskqueue.h>
60#include <sys/tree.h>
61#include <vm/uma.h>
62
63#ifdef RCTL
64#ifndef RACCT
65#error "The RCTL option requires the RACCT option"
66#endif
67
68FEATURE(rctl, "Resource Limits");
69
70#define	HRF_DEFAULT		0
71#define	HRF_DONT_INHERIT	1
72#define	HRF_DONT_ACCUMULATE	2
73
74/* Default buffer size for rctl_get_rules(2). */
75#define	RCTL_DEFAULT_BUFSIZE	4096
76#define	RCTL_LOG_BUFSIZE	128
77
78/*
79 * 'rctl_rule_link' connects a rule with every racct it's related to.
80 * For example, rule 'user:X:openfiles:deny=N/process' is linked
81 * with uidinfo for user X, and to each process of that user.
82 */
83struct rctl_rule_link {
84	LIST_ENTRY(rctl_rule_link)	rrl_next;
85	struct rctl_rule		*rrl_rule;
86	int				rrl_exceeded;
87};
88
89struct dict {
90	const char	*d_name;
91	int		d_value;
92};
93
94static struct dict subjectnames[] = {
95	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
96	{ "user", RCTL_SUBJECT_TYPE_USER },
97	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
99	{ NULL, -1 }};
100
101static struct dict resourcenames[] = {
102	{ "cputime", RACCT_CPU },
103	{ "datasize", RACCT_DATA },
104	{ "stacksize", RACCT_STACK },
105	{ "coredumpsize", RACCT_CORE },
106	{ "memoryuse", RACCT_RSS },
107	{ "memorylocked", RACCT_MEMLOCK },
108	{ "maxproc", RACCT_NPROC },
109	{ "openfiles", RACCT_NOFILE },
110	{ "vmemoryuse", RACCT_VMEM },
111	{ "pseudoterminals", RACCT_NPTS },
112	{ "swapuse", RACCT_SWAP },
113	{ "nthr", RACCT_NTHR },
114	{ "msgqqueued", RACCT_MSGQQUEUED },
115	{ "msgqsize", RACCT_MSGQSIZE },
116	{ "nmsgq", RACCT_NMSGQ },
117	{ "nsem", RACCT_NSEM },
118	{ "nsemop", RACCT_NSEMOP },
119	{ "nshm", RACCT_NSHM },
120	{ "shmsize", RACCT_SHMSIZE },
121	{ "wallclock", RACCT_WALLCLOCK },
122	{ NULL, -1 }};
123
124static struct dict actionnames[] = {
125	{ "sighup", RCTL_ACTION_SIGHUP },
126	{ "sigint", RCTL_ACTION_SIGINT },
127	{ "sigquit", RCTL_ACTION_SIGQUIT },
128	{ "sigill", RCTL_ACTION_SIGILL },
129	{ "sigtrap", RCTL_ACTION_SIGTRAP },
130	{ "sigabrt", RCTL_ACTION_SIGABRT },
131	{ "sigemt", RCTL_ACTION_SIGEMT },
132	{ "sigfpe", RCTL_ACTION_SIGFPE },
133	{ "sigkill", RCTL_ACTION_SIGKILL },
134	{ "sigbus", RCTL_ACTION_SIGBUS },
135	{ "sigsegv", RCTL_ACTION_SIGSEGV },
136	{ "sigsys", RCTL_ACTION_SIGSYS },
137	{ "sigpipe", RCTL_ACTION_SIGPIPE },
138	{ "sigalrm", RCTL_ACTION_SIGALRM },
139	{ "sigterm", RCTL_ACTION_SIGTERM },
140	{ "sigurg", RCTL_ACTION_SIGURG },
141	{ "sigstop", RCTL_ACTION_SIGSTOP },
142	{ "sigtstp", RCTL_ACTION_SIGTSTP },
143	{ "sigchld", RCTL_ACTION_SIGCHLD },
144	{ "sigttin", RCTL_ACTION_SIGTTIN },
145	{ "sigttou", RCTL_ACTION_SIGTTOU },
146	{ "sigio", RCTL_ACTION_SIGIO },
147	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
148	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
149	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150	{ "sigprof", RCTL_ACTION_SIGPROF },
151	{ "sigwinch", RCTL_ACTION_SIGWINCH },
152	{ "siginfo", RCTL_ACTION_SIGINFO },
153	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
154	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
155	{ "sigthr", RCTL_ACTION_SIGTHR },
156	{ "deny", RCTL_ACTION_DENY },
157	{ "log", RCTL_ACTION_LOG },
158	{ "devctl", RCTL_ACTION_DEVCTL },
159	{ NULL, -1 }};
160
161static void rctl_init(void);
162SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163
164static uma_zone_t rctl_rule_link_zone;
165static uma_zone_t rctl_rule_zone;
166static struct rwlock rctl_lock;
167RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168
169static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171
172static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173
174static const char *
175rctl_subject_type_name(int subject)
176{
177	int i;
178
179	for (i = 0; subjectnames[i].d_name != NULL; i++) {
180		if (subjectnames[i].d_value == subject)
181			return (subjectnames[i].d_name);
182	}
183
184	panic("rctl_subject_type_name: unknown subject type %d", subject);
185}
186
187static const char *
188rctl_action_name(int action)
189{
190	int i;
191
192	for (i = 0; actionnames[i].d_name != NULL; i++) {
193		if (actionnames[i].d_value == action)
194			return (actionnames[i].d_name);
195	}
196
197	panic("rctl_action_name: unknown action %d", action);
198}
199
200const char *
201rctl_resource_name(int resource)
202{
203	int i;
204
205	for (i = 0; resourcenames[i].d_name != NULL; i++) {
206		if (resourcenames[i].d_value == resource)
207			return (resourcenames[i].d_name);
208	}
209
210	panic("rctl_resource_name: unknown resource %d", resource);
211}
212
213/*
214 * Return the amount of resource that can be allocated by 'p' before
215 * hitting 'rule'.
216 */
217static int64_t
218rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219{
220	int resource;
221	int64_t available = INT64_MAX;
222	struct ucred *cred = p->p_ucred;
223
224	rw_assert(&rctl_lock, RA_LOCKED);
225
226	resource = rule->rr_resource;
227	switch (rule->rr_per) {
228	case RCTL_SUBJECT_TYPE_PROCESS:
229		available = rule->rr_amount -
230		    p->p_racct->r_resources[resource];
231		break;
232	case RCTL_SUBJECT_TYPE_USER:
233		available = rule->rr_amount -
234		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
235		break;
236	case RCTL_SUBJECT_TYPE_LOGINCLASS:
237		available = rule->rr_amount -
238		    cred->cr_loginclass->lc_racct->r_resources[resource];
239		break;
240	case RCTL_SUBJECT_TYPE_JAIL:
241		available = rule->rr_amount -
242		    cred->cr_prison->pr_prison_racct->prr_racct->
243		        r_resources[resource];
244		break;
245	default:
246		panic("rctl_compute_available: unknown per %d",
247		    rule->rr_per);
248	}
249
250	return (available);
251}
252
253/*
254 * Return non-zero if allocating 'amount' by proc 'p' would exceed
255 * resource limit specified by 'rule'.
256 */
257static int
258rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259    int64_t amount)
260{
261	int64_t available;
262
263	rw_assert(&rctl_lock, RA_LOCKED);
264
265	available = rctl_available_resource(p, rule);
266	if (available >= amount)
267		return (0);
268
269	return (1);
270}
271
272/*
273 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274 * to what it keeps allocated now.  Returns non-zero if the allocation should
275 * be denied, 0 otherwise.
276 */
277int
278rctl_enforce(struct proc *p, int resource, uint64_t amount)
279{
280	struct rctl_rule *rule;
281	struct rctl_rule_link *link;
282	struct sbuf sb;
283	int should_deny = 0;
284	char *buf;
285	static int curtime = 0;
286	static struct timeval lasttime;
287
288	rw_rlock(&rctl_lock);
289
290	/*
291	 * There may be more than one matching rule; go through all of them.
292	 * Denial should be done last, after logging and sending signals.
293	 */
294	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295		rule = link->rrl_rule;
296		if (rule->rr_resource != resource)
297			continue;
298		if (!rctl_would_exceed(p, rule, amount)) {
299			link->rrl_exceeded = 0;
300			continue;
301		}
302
303		switch (rule->rr_action) {
304		case RCTL_ACTION_DENY:
305			should_deny = 1;
306			continue;
307		case RCTL_ACTION_LOG:
308			/*
309			 * If rrl_exceeded != 0, it means we've already
310			 * logged a warning for this process.
311			 */
312			if (link->rrl_exceeded != 0)
313				continue;
314
315			/*
316			 * If the process state is not fully initialized yet,
317			 * we can't access most of the required fields, e.g.
318			 * p->p_comm.  This happens when called from fork1().
319			 * Ignore this rule for now; it will be processed just
320			 * after fork, when called from racct_proc_fork_done().
321			 */
322			if (p->p_state != PRS_NORMAL)
323				continue;
324
325			if (!ppsratecheck(&lasttime, &curtime, 10))
326				continue;
327
328			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
329			if (buf == NULL) {
330				printf("rctl_enforce: out of memory\n");
331				continue;
332			}
333			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
334			rctl_rule_to_sbuf(&sb, rule);
335			sbuf_finish(&sb);
336			printf("rctl: rule \"%s\" matched by pid %d "
337			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
338			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
339			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
340			sbuf_delete(&sb);
341			free(buf, M_RCTL);
342			link->rrl_exceeded = 1;
343			continue;
344		case RCTL_ACTION_DEVCTL:
345			if (link->rrl_exceeded != 0)
346				continue;
347
348			if (p->p_state != PRS_NORMAL)
349				continue;
350
351			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
352			if (buf == NULL) {
353				printf("rctl_enforce: out of memory\n");
354				continue;
355			}
356			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
357			sbuf_printf(&sb, "rule=");
358			rctl_rule_to_sbuf(&sb, rule);
359			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
360			    p->p_pid, p->p_ucred->cr_ruid,
361			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
362			sbuf_finish(&sb);
363			devctl_notify_f("RCTL", "rule", "matched",
364			    sbuf_data(&sb), M_NOWAIT);
365			sbuf_delete(&sb);
366			free(buf, M_RCTL);
367			link->rrl_exceeded = 1;
368			continue;
369		default:
370			if (link->rrl_exceeded != 0)
371				continue;
372
373			if (p->p_state != PRS_NORMAL)
374				continue;
375
376			KASSERT(rule->rr_action > 0 &&
377			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
378			    ("rctl_enforce: unknown action %d",
379			     rule->rr_action));
380
381			/*
382			 * We're using the fact that RCTL_ACTION_SIG* values
383			 * are equal to their counterparts from sys/signal.h.
384			 */
385			kern_psignal(p, rule->rr_action);
386			link->rrl_exceeded = 1;
387			continue;
388		}
389	}
390
391	rw_runlock(&rctl_lock);
392
393	if (should_deny) {
394		/*
395		 * Return fake error code; the caller should change it
396		 * into one proper for the situation - EFSIZ, ENOMEM etc.
397		 */
398		return (EDOOFUS);
399	}
400
401	return (0);
402}
403
404uint64_t
405rctl_get_limit(struct proc *p, int resource)
406{
407	struct rctl_rule *rule;
408	struct rctl_rule_link *link;
409	uint64_t amount = UINT64_MAX;
410
411	rw_rlock(&rctl_lock);
412
413	/*
414	 * There may be more than one matching rule; go through all of them.
415	 * Denial should be done last, after logging and sending signals.
416	 */
417	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
418		rule = link->rrl_rule;
419		if (rule->rr_resource != resource)
420			continue;
421		if (rule->rr_action != RCTL_ACTION_DENY)
422			continue;
423		if (rule->rr_amount < amount)
424			amount = rule->rr_amount;
425	}
426
427	rw_runlock(&rctl_lock);
428
429	return (amount);
430}
431
432uint64_t
433rctl_get_available(struct proc *p, int resource)
434{
435	struct rctl_rule *rule;
436	struct rctl_rule_link *link;
437	int64_t available, minavailable, allocated;
438
439	minavailable = INT64_MAX;
440
441	rw_rlock(&rctl_lock);
442
443	/*
444	 * There may be more than one matching rule; go through all of them.
445	 * Denial should be done last, after logging and sending signals.
446	 */
447	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
448		rule = link->rrl_rule;
449		if (rule->rr_resource != resource)
450			continue;
451		if (rule->rr_action != RCTL_ACTION_DENY)
452			continue;
453		available = rctl_available_resource(p, rule);
454		if (available < minavailable)
455			minavailable = available;
456	}
457
458	rw_runlock(&rctl_lock);
459
460	/*
461	 * XXX: Think about this _hard_.
462	 */
463	allocated = p->p_racct->r_resources[resource];
464	if (minavailable < INT64_MAX - allocated)
465		minavailable += allocated;
466	if (minavailable < 0)
467		minavailable = 0;
468	return (minavailable);
469}
470
471static int
472rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
473{
474
475	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
476		if (rule->rr_subject_type != filter->rr_subject_type)
477			return (0);
478
479		switch (filter->rr_subject_type) {
480		case RCTL_SUBJECT_TYPE_PROCESS:
481			if (filter->rr_subject.rs_proc != NULL &&
482			    rule->rr_subject.rs_proc !=
483			    filter->rr_subject.rs_proc)
484				return (0);
485			break;
486		case RCTL_SUBJECT_TYPE_USER:
487			if (filter->rr_subject.rs_uip != NULL &&
488			    rule->rr_subject.rs_uip !=
489			    filter->rr_subject.rs_uip)
490				return (0);
491			break;
492		case RCTL_SUBJECT_TYPE_LOGINCLASS:
493			if (filter->rr_subject.rs_loginclass != NULL &&
494			    rule->rr_subject.rs_loginclass !=
495			    filter->rr_subject.rs_loginclass)
496				return (0);
497			break;
498		case RCTL_SUBJECT_TYPE_JAIL:
499			if (filter->rr_subject.rs_prison_racct != NULL &&
500			    rule->rr_subject.rs_prison_racct !=
501			    filter->rr_subject.rs_prison_racct)
502				return (0);
503			break;
504		default:
505			panic("rctl_rule_matches: unknown subject type %d",
506			    filter->rr_subject_type);
507		}
508	}
509
510	if (filter->rr_resource != RACCT_UNDEFINED) {
511		if (rule->rr_resource != filter->rr_resource)
512			return (0);
513	}
514
515	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
516		if (rule->rr_action != filter->rr_action)
517			return (0);
518	}
519
520	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
521		if (rule->rr_amount != filter->rr_amount)
522			return (0);
523	}
524
525	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
526		if (rule->rr_per != filter->rr_per)
527			return (0);
528	}
529
530	return (1);
531}
532
533static int
534str2value(const char *str, int *value, struct dict *table)
535{
536	int i;
537
538	if (value == NULL)
539		return (EINVAL);
540
541	for (i = 0; table[i].d_name != NULL; i++) {
542		if (strcasecmp(table[i].d_name, str) == 0) {
543			*value =  table[i].d_value;
544			return (0);
545		}
546	}
547
548	return (EINVAL);
549}
550
551static int
552str2id(const char *str, id_t *value)
553{
554	char *end;
555
556	if (str == NULL)
557		return (EINVAL);
558
559	*value = strtoul(str, &end, 10);
560	if ((size_t)(end - str) != strlen(str))
561		return (EINVAL);
562
563	return (0);
564}
565
566static int
567str2int64(const char *str, int64_t *value)
568{
569	char *end;
570
571	if (str == NULL)
572		return (EINVAL);
573
574	*value = strtoul(str, &end, 10);
575	if ((size_t)(end - str) != strlen(str))
576		return (EINVAL);
577
578	return (0);
579}
580
581/*
582 * Connect the rule to the racct, increasing refcount for the rule.
583 */
584static void
585rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
586{
587	struct rctl_rule_link *link;
588
589	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
590
591	rctl_rule_acquire(rule);
592	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
593	link->rrl_rule = rule;
594	link->rrl_exceeded = 0;
595
596	rw_wlock(&rctl_lock);
597	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
598	rw_wunlock(&rctl_lock);
599}
600
601static int
602rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
603{
604	struct rctl_rule_link *link;
605
606	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
607	rw_assert(&rctl_lock, RA_WLOCKED);
608
609	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
610	if (link == NULL)
611		return (ENOMEM);
612	rctl_rule_acquire(rule);
613	link->rrl_rule = rule;
614	link->rrl_exceeded = 0;
615
616	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
617	return (0);
618}
619
620/*
621 * Remove limits for a rules matching the filter and release
622 * the refcounts for the rules, possibly freeing them.  Returns
623 * the number of limit structures removed.
624 */
625static int
626rctl_racct_remove_rules(struct racct *racct,
627    const struct rctl_rule *filter)
628{
629	int removed = 0;
630	struct rctl_rule_link *link, *linktmp;
631
632	rw_assert(&rctl_lock, RA_WLOCKED);
633
634	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
635		if (!rctl_rule_matches(link->rrl_rule, filter))
636			continue;
637
638		LIST_REMOVE(link, rrl_next);
639		rctl_rule_release(link->rrl_rule);
640		uma_zfree(rctl_rule_link_zone, link);
641		removed++;
642	}
643	return (removed);
644}
645
646static void
647rctl_rule_acquire_subject(struct rctl_rule *rule)
648{
649
650	switch (rule->rr_subject_type) {
651	case RCTL_SUBJECT_TYPE_UNDEFINED:
652	case RCTL_SUBJECT_TYPE_PROCESS:
653		break;
654	case RCTL_SUBJECT_TYPE_JAIL:
655		if (rule->rr_subject.rs_prison_racct != NULL)
656			prison_racct_hold(rule->rr_subject.rs_prison_racct);
657		break;
658	case RCTL_SUBJECT_TYPE_USER:
659		if (rule->rr_subject.rs_uip != NULL)
660			uihold(rule->rr_subject.rs_uip);
661		break;
662	case RCTL_SUBJECT_TYPE_LOGINCLASS:
663		if (rule->rr_subject.rs_loginclass != NULL)
664			loginclass_hold(rule->rr_subject.rs_loginclass);
665		break;
666	default:
667		panic("rctl_rule_acquire_subject: unknown subject type %d",
668		    rule->rr_subject_type);
669	}
670}
671
672static void
673rctl_rule_release_subject(struct rctl_rule *rule)
674{
675
676	switch (rule->rr_subject_type) {
677	case RCTL_SUBJECT_TYPE_UNDEFINED:
678	case RCTL_SUBJECT_TYPE_PROCESS:
679		break;
680	case RCTL_SUBJECT_TYPE_JAIL:
681		if (rule->rr_subject.rs_prison_racct != NULL)
682			prison_racct_free(rule->rr_subject.rs_prison_racct);
683		break;
684	case RCTL_SUBJECT_TYPE_USER:
685		if (rule->rr_subject.rs_uip != NULL)
686			uifree(rule->rr_subject.rs_uip);
687		break;
688	case RCTL_SUBJECT_TYPE_LOGINCLASS:
689		if (rule->rr_subject.rs_loginclass != NULL)
690			loginclass_free(rule->rr_subject.rs_loginclass);
691		break;
692	default:
693		panic("rctl_rule_release_subject: unknown subject type %d",
694		    rule->rr_subject_type);
695	}
696}
697
698struct rctl_rule *
699rctl_rule_alloc(int flags)
700{
701	struct rctl_rule *rule;
702
703	rule = uma_zalloc(rctl_rule_zone, flags);
704	if (rule == NULL)
705		return (NULL);
706	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
707	rule->rr_subject.rs_proc = NULL;
708	rule->rr_subject.rs_uip = NULL;
709	rule->rr_subject.rs_loginclass = NULL;
710	rule->rr_subject.rs_prison_racct = NULL;
711	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
712	rule->rr_resource = RACCT_UNDEFINED;
713	rule->rr_action = RCTL_ACTION_UNDEFINED;
714	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
715	refcount_init(&rule->rr_refcount, 1);
716
717	return (rule);
718}
719
720struct rctl_rule *
721rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
722{
723	struct rctl_rule *copy;
724
725	copy = uma_zalloc(rctl_rule_zone, flags);
726	if (copy == NULL)
727		return (NULL);
728	copy->rr_subject_type = rule->rr_subject_type;
729	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
730	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
731	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
732	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
733	copy->rr_per = rule->rr_per;
734	copy->rr_resource = rule->rr_resource;
735	copy->rr_action = rule->rr_action;
736	copy->rr_amount = rule->rr_amount;
737	refcount_init(&copy->rr_refcount, 1);
738	rctl_rule_acquire_subject(copy);
739
740	return (copy);
741}
742
743void
744rctl_rule_acquire(struct rctl_rule *rule)
745{
746
747	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
748
749	refcount_acquire(&rule->rr_refcount);
750}
751
752static void
753rctl_rule_free(void *context, int pending)
754{
755	struct rctl_rule *rule;
756
757	rule = (struct rctl_rule *)context;
758
759	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
760
761	/*
762	 * We don't need locking here; rule is guaranteed to be inaccessible.
763	 */
764
765	rctl_rule_release_subject(rule);
766	uma_zfree(rctl_rule_zone, rule);
767}
768
769void
770rctl_rule_release(struct rctl_rule *rule)
771{
772
773	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
774
775	if (refcount_release(&rule->rr_refcount)) {
776		/*
777		 * rctl_rule_release() is often called when iterating
778		 * over all the uidinfo structures in the system,
779		 * holding uihashtbl_lock.  Since rctl_rule_free()
780		 * might end up calling uifree(), this would lead
781		 * to lock recursion.  Use taskqueue to avoid this.
782		 */
783		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
784		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
785	}
786}
787
788static int
789rctl_rule_fully_specified(const struct rctl_rule *rule)
790{
791
792	switch (rule->rr_subject_type) {
793	case RCTL_SUBJECT_TYPE_UNDEFINED:
794		return (0);
795	case RCTL_SUBJECT_TYPE_PROCESS:
796		if (rule->rr_subject.rs_proc == NULL)
797			return (0);
798		break;
799	case RCTL_SUBJECT_TYPE_USER:
800		if (rule->rr_subject.rs_uip == NULL)
801			return (0);
802		break;
803	case RCTL_SUBJECT_TYPE_LOGINCLASS:
804		if (rule->rr_subject.rs_loginclass == NULL)
805			return (0);
806		break;
807	case RCTL_SUBJECT_TYPE_JAIL:
808		if (rule->rr_subject.rs_prison_racct == NULL)
809			return (0);
810		break;
811	default:
812		panic("rctl_rule_fully_specified: unknown subject type %d",
813		    rule->rr_subject_type);
814	}
815	if (rule->rr_resource == RACCT_UNDEFINED)
816		return (0);
817	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
818		return (0);
819	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
820		return (0);
821	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
822		return (0);
823
824	return (1);
825}
826
827static int
828rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
829{
830	int error = 0;
831	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
832	     *amountstr, *perstr;
833	struct rctl_rule *rule;
834	id_t id;
835
836	rule = rctl_rule_alloc(M_WAITOK);
837
838	subjectstr = strsep(&rulestr, ":");
839	subject_idstr = strsep(&rulestr, ":");
840	resourcestr = strsep(&rulestr, ":");
841	actionstr = strsep(&rulestr, "=/");
842	amountstr = strsep(&rulestr, "/");
843	perstr = rulestr;
844
845	if (subjectstr == NULL || subjectstr[0] == '\0')
846		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
847	else {
848		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
849		if (error != 0)
850			goto out;
851	}
852
853	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
854		rule->rr_subject.rs_proc = NULL;
855		rule->rr_subject.rs_uip = NULL;
856		rule->rr_subject.rs_loginclass = NULL;
857		rule->rr_subject.rs_prison_racct = NULL;
858	} else {
859		switch (rule->rr_subject_type) {
860		case RCTL_SUBJECT_TYPE_UNDEFINED:
861			error = EINVAL;
862			goto out;
863		case RCTL_SUBJECT_TYPE_PROCESS:
864			error = str2id(subject_idstr, &id);
865			if (error != 0)
866				goto out;
867			sx_assert(&allproc_lock, SA_LOCKED);
868			rule->rr_subject.rs_proc = pfind(id);
869			if (rule->rr_subject.rs_proc == NULL) {
870				error = ESRCH;
871				goto out;
872			}
873			PROC_UNLOCK(rule->rr_subject.rs_proc);
874			break;
875		case RCTL_SUBJECT_TYPE_USER:
876			error = str2id(subject_idstr, &id);
877			if (error != 0)
878				goto out;
879			rule->rr_subject.rs_uip = uifind(id);
880			break;
881		case RCTL_SUBJECT_TYPE_LOGINCLASS:
882			rule->rr_subject.rs_loginclass =
883			    loginclass_find(subject_idstr);
884			if (rule->rr_subject.rs_loginclass == NULL) {
885				error = ENAMETOOLONG;
886				goto out;
887			}
888			break;
889		case RCTL_SUBJECT_TYPE_JAIL:
890			rule->rr_subject.rs_prison_racct =
891			    prison_racct_find(subject_idstr);
892			if (rule->rr_subject.rs_prison_racct == NULL) {
893				error = ENAMETOOLONG;
894				goto out;
895			}
896			break;
897               default:
898                       panic("rctl_string_to_rule: unknown subject type %d",
899                           rule->rr_subject_type);
900               }
901	}
902
903	if (resourcestr == NULL || resourcestr[0] == '\0')
904		rule->rr_resource = RACCT_UNDEFINED;
905	else {
906		error = str2value(resourcestr, &rule->rr_resource,
907		    resourcenames);
908		if (error != 0)
909			goto out;
910	}
911
912	if (actionstr == NULL || actionstr[0] == '\0')
913		rule->rr_action = RCTL_ACTION_UNDEFINED;
914	else {
915		error = str2value(actionstr, &rule->rr_action, actionnames);
916		if (error != 0)
917			goto out;
918	}
919
920	if (amountstr == NULL || amountstr[0] == '\0')
921		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
922	else {
923		error = str2int64(amountstr, &rule->rr_amount);
924		if (error != 0)
925			goto out;
926		if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
927			rule->rr_amount *= 1000000;
928	}
929
930	if (perstr == NULL || perstr[0] == '\0')
931		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
932	else {
933		error = str2value(perstr, &rule->rr_per, subjectnames);
934		if (error != 0)
935			goto out;
936	}
937
938out:
939	if (error == 0)
940		*rulep = rule;
941	else
942		rctl_rule_release(rule);
943
944	return (error);
945}
946
947/*
948 * Link a rule with all the subjects it applies to.
949 */
950int
951rctl_rule_add(struct rctl_rule *rule)
952{
953	struct proc *p;
954	struct ucred *cred;
955	struct uidinfo *uip;
956	struct prison *pr;
957	struct prison_racct *prr;
958	struct loginclass *lc;
959	struct rctl_rule *rule2;
960	int match;
961
962	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
963
964	/*
965	 * Some rules just don't make sense.  Note that the one below
966	 * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
967	 * for example, is not deniable in the racct sense, but the
968	 * limit is enforced in a different way, so "deny" rules for %CPU
969	 * do make sense.
970	 */
971	if (rule->rr_action == RCTL_ACTION_DENY &&
972	    (rule->rr_resource == RACCT_CPU ||
973	    rule->rr_resource == RACCT_WALLCLOCK))
974		return (EOPNOTSUPP);
975
976	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
977	    RACCT_IS_SLOPPY(rule->rr_resource))
978		return (EOPNOTSUPP);
979
980	/*
981	 * Make sure there are no duplicated rules.  Also, for the "deny"
982	 * rules, remove ones differing only by "amount".
983	 */
984	if (rule->rr_action == RCTL_ACTION_DENY) {
985		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
986		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
987		rctl_rule_remove(rule2);
988		rctl_rule_release(rule2);
989	} else
990		rctl_rule_remove(rule);
991
992	switch (rule->rr_subject_type) {
993	case RCTL_SUBJECT_TYPE_PROCESS:
994		p = rule->rr_subject.rs_proc;
995		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
996		/*
997		 * No resource limits for system processes.
998		 */
999		if (p->p_flag & P_SYSTEM)
1000			return (EPERM);
1001
1002		rctl_racct_add_rule(p->p_racct, rule);
1003		/*
1004		 * In case of per-process rule, we don't have anything more
1005		 * to do.
1006		 */
1007		return (0);
1008
1009	case RCTL_SUBJECT_TYPE_USER:
1010		uip = rule->rr_subject.rs_uip;
1011		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1012		rctl_racct_add_rule(uip->ui_racct, rule);
1013		break;
1014
1015	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1016		lc = rule->rr_subject.rs_loginclass;
1017		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1018		rctl_racct_add_rule(lc->lc_racct, rule);
1019		break;
1020
1021	case RCTL_SUBJECT_TYPE_JAIL:
1022		prr = rule->rr_subject.rs_prison_racct;
1023		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1024		rctl_racct_add_rule(prr->prr_racct, rule);
1025		break;
1026
1027	default:
1028		panic("rctl_rule_add: unknown subject type %d",
1029		    rule->rr_subject_type);
1030	}
1031
1032	/*
1033	 * Now go through all the processes and add the new rule to the ones
1034	 * it applies to.
1035	 */
1036	sx_assert(&allproc_lock, SA_LOCKED);
1037	FOREACH_PROC_IN_SYSTEM(p) {
1038		if (p->p_flag & P_SYSTEM)
1039			continue;
1040		cred = p->p_ucred;
1041		switch (rule->rr_subject_type) {
1042		case RCTL_SUBJECT_TYPE_USER:
1043			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1044			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1045				break;
1046			continue;
1047		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1048			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1049				break;
1050			continue;
1051		case RCTL_SUBJECT_TYPE_JAIL:
1052			match = 0;
1053			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1054				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1055					match = 1;
1056					break;
1057				}
1058			}
1059			if (match)
1060				break;
1061			continue;
1062		default:
1063			panic("rctl_rule_add: unknown subject type %d",
1064			    rule->rr_subject_type);
1065		}
1066
1067		rctl_racct_add_rule(p->p_racct, rule);
1068	}
1069
1070	return (0);
1071}
1072
1073static void
1074rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1075{
1076	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1077	int found = 0;
1078
1079	rw_wlock(&rctl_lock);
1080	found += rctl_racct_remove_rules(racct, filter);
1081	rw_wunlock(&rctl_lock);
1082
1083	*((int *)arg3) += found;
1084}
1085
1086/*
1087 * Remove all rules that match the filter.
1088 */
1089int
1090rctl_rule_remove(struct rctl_rule *filter)
1091{
1092	int found = 0;
1093	struct proc *p;
1094
1095	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1096	    filter->rr_subject.rs_proc != NULL) {
1097		p = filter->rr_subject.rs_proc;
1098		rw_wlock(&rctl_lock);
1099		found = rctl_racct_remove_rules(p->p_racct, filter);
1100		rw_wunlock(&rctl_lock);
1101		if (found)
1102			return (0);
1103		return (ESRCH);
1104	}
1105
1106	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1107	    (void *)&found);
1108	ui_racct_foreach(rctl_rule_remove_callback, filter,
1109	    (void *)&found);
1110	prison_racct_foreach(rctl_rule_remove_callback, filter,
1111	    (void *)&found);
1112
1113	sx_assert(&allproc_lock, SA_LOCKED);
1114	rw_wlock(&rctl_lock);
1115	FOREACH_PROC_IN_SYSTEM(p) {
1116		found += rctl_racct_remove_rules(p->p_racct, filter);
1117	}
1118	rw_wunlock(&rctl_lock);
1119
1120	if (found)
1121		return (0);
1122	return (ESRCH);
1123}
1124
1125/*
1126 * Appends a rule to the sbuf.
1127 */
1128static void
1129rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1130{
1131	int64_t amount;
1132
1133	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1134
1135	switch (rule->rr_subject_type) {
1136	case RCTL_SUBJECT_TYPE_PROCESS:
1137		if (rule->rr_subject.rs_proc == NULL)
1138			sbuf_printf(sb, ":");
1139		else
1140			sbuf_printf(sb, "%d:",
1141			    rule->rr_subject.rs_proc->p_pid);
1142		break;
1143	case RCTL_SUBJECT_TYPE_USER:
1144		if (rule->rr_subject.rs_uip == NULL)
1145			sbuf_printf(sb, ":");
1146		else
1147			sbuf_printf(sb, "%d:",
1148			    rule->rr_subject.rs_uip->ui_uid);
1149		break;
1150	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1151		if (rule->rr_subject.rs_loginclass == NULL)
1152			sbuf_printf(sb, ":");
1153		else
1154			sbuf_printf(sb, "%s:",
1155			    rule->rr_subject.rs_loginclass->lc_name);
1156		break;
1157	case RCTL_SUBJECT_TYPE_JAIL:
1158		if (rule->rr_subject.rs_prison_racct == NULL)
1159			sbuf_printf(sb, ":");
1160		else
1161			sbuf_printf(sb, "%s:",
1162			    rule->rr_subject.rs_prison_racct->prr_name);
1163		break;
1164	default:
1165		panic("rctl_rule_to_sbuf: unknown subject type %d",
1166		    rule->rr_subject_type);
1167	}
1168
1169	amount = rule->rr_amount;
1170	if (amount != RCTL_AMOUNT_UNDEFINED &&
1171	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1172		amount /= 1000000;
1173
1174	sbuf_printf(sb, "%s:%s=%jd",
1175	    rctl_resource_name(rule->rr_resource),
1176	    rctl_action_name(rule->rr_action),
1177	    amount);
1178
1179	if (rule->rr_per != rule->rr_subject_type)
1180		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1181}
1182
1183/*
1184 * Routine used by RCTL syscalls to read in input string.
1185 */
1186static int
1187rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1188{
1189	int error;
1190	char *str;
1191
1192	if (inbuflen <= 0)
1193		return (EINVAL);
1194
1195	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1196	error = copyinstr(inbufp, str, inbuflen, NULL);
1197	if (error != 0) {
1198		free(str, M_RCTL);
1199		return (error);
1200	}
1201
1202	*inputstr = str;
1203
1204	return (0);
1205}
1206
1207/*
1208 * Routine used by RCTL syscalls to write out output string.
1209 */
1210static int
1211rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1212{
1213	int error;
1214
1215	if (outputsbuf == NULL)
1216		return (0);
1217
1218	sbuf_finish(outputsbuf);
1219	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1220		sbuf_delete(outputsbuf);
1221		return (ERANGE);
1222	}
1223	error = copyout(sbuf_data(outputsbuf), outbufp,
1224	    sbuf_len(outputsbuf) + 1);
1225	sbuf_delete(outputsbuf);
1226	return (error);
1227}
1228
1229static struct sbuf *
1230rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1231{
1232	int i;
1233	int64_t amount;
1234	struct sbuf *sb;
1235
1236	sb = sbuf_new_auto();
1237	for (i = 0; i <= RACCT_MAX; i++) {
1238		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1239			continue;
1240		amount = racct->r_resources[i];
1241		if (RACCT_IS_IN_MILLIONS(i))
1242			amount /= 1000000;
1243		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1244	}
1245	sbuf_setpos(sb, sbuf_len(sb) - 1);
1246	return (sb);
1247}
1248
1249int
1250sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1251{
1252	int error;
1253	char *inputstr;
1254	struct rctl_rule *filter;
1255	struct sbuf *outputsbuf = NULL;
1256	struct proc *p;
1257	struct uidinfo *uip;
1258	struct loginclass *lc;
1259	struct prison_racct *prr;
1260
1261	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1262	if (error != 0)
1263		return (error);
1264
1265	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1266	if (error != 0)
1267		return (error);
1268
1269	sx_slock(&allproc_lock);
1270	error = rctl_string_to_rule(inputstr, &filter);
1271	free(inputstr, M_RCTL);
1272	if (error != 0) {
1273		sx_sunlock(&allproc_lock);
1274		return (error);
1275	}
1276
1277	switch (filter->rr_subject_type) {
1278	case RCTL_SUBJECT_TYPE_PROCESS:
1279		p = filter->rr_subject.rs_proc;
1280		if (p == NULL) {
1281			error = EINVAL;
1282			goto out;
1283		}
1284		if (p->p_flag & P_SYSTEM) {
1285			error = EINVAL;
1286			goto out;
1287		}
1288		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1289		break;
1290	case RCTL_SUBJECT_TYPE_USER:
1291		uip = filter->rr_subject.rs_uip;
1292		if (uip == NULL) {
1293			error = EINVAL;
1294			goto out;
1295		}
1296		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1297		break;
1298	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1299		lc = filter->rr_subject.rs_loginclass;
1300		if (lc == NULL) {
1301			error = EINVAL;
1302			goto out;
1303		}
1304		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1305		break;
1306	case RCTL_SUBJECT_TYPE_JAIL:
1307		prr = filter->rr_subject.rs_prison_racct;
1308		if (prr == NULL) {
1309			error = EINVAL;
1310			goto out;
1311		}
1312		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1313		break;
1314	default:
1315		error = EINVAL;
1316	}
1317out:
1318	rctl_rule_release(filter);
1319	sx_sunlock(&allproc_lock);
1320	if (error != 0)
1321		return (error);
1322
1323	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1324
1325	return (error);
1326}
1327
1328static void
1329rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1330{
1331	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1332	struct rctl_rule_link *link;
1333	struct sbuf *sb = (struct sbuf *)arg3;
1334
1335	rw_rlock(&rctl_lock);
1336	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1337		if (!rctl_rule_matches(link->rrl_rule, filter))
1338			continue;
1339		rctl_rule_to_sbuf(sb, link->rrl_rule);
1340		sbuf_printf(sb, ",");
1341	}
1342	rw_runlock(&rctl_lock);
1343}
1344
1345int
1346sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1347{
1348	int error;
1349	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1350	char *inputstr, *buf;
1351	struct sbuf *sb;
1352	struct rctl_rule *filter;
1353	struct rctl_rule_link *link;
1354	struct proc *p;
1355
1356	error = priv_check(td, PRIV_RCTL_GET_RULES);
1357	if (error != 0)
1358		return (error);
1359
1360	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1361	if (error != 0)
1362		return (error);
1363
1364	sx_slock(&allproc_lock);
1365	error = rctl_string_to_rule(inputstr, &filter);
1366	free(inputstr, M_RCTL);
1367	if (error != 0) {
1368		sx_sunlock(&allproc_lock);
1369		return (error);
1370	}
1371
1372again:
1373	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1374	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1375	KASSERT(sb != NULL, ("sbuf_new failed"));
1376
1377	sx_assert(&allproc_lock, SA_LOCKED);
1378	FOREACH_PROC_IN_SYSTEM(p) {
1379		rw_rlock(&rctl_lock);
1380		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1381			/*
1382			 * Non-process rules will be added to the buffer later.
1383			 * Adding them here would result in duplicated output.
1384			 */
1385			if (link->rrl_rule->rr_subject_type !=
1386			    RCTL_SUBJECT_TYPE_PROCESS)
1387				continue;
1388			if (!rctl_rule_matches(link->rrl_rule, filter))
1389				continue;
1390			rctl_rule_to_sbuf(sb, link->rrl_rule);
1391			sbuf_printf(sb, ",");
1392		}
1393		rw_runlock(&rctl_lock);
1394	}
1395
1396	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1397	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1398	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1399	if (sbuf_error(sb) == ENOMEM) {
1400		sbuf_delete(sb);
1401		free(buf, M_RCTL);
1402		bufsize *= 4;
1403		goto again;
1404	}
1405
1406	/*
1407	 * Remove trailing ",".
1408	 */
1409	if (sbuf_len(sb) > 0)
1410		sbuf_setpos(sb, sbuf_len(sb) - 1);
1411
1412	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1413
1414	rctl_rule_release(filter);
1415	sx_sunlock(&allproc_lock);
1416	free(buf, M_RCTL);
1417	return (error);
1418}
1419
1420int
1421sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1422{
1423	int error;
1424	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1425	char *inputstr, *buf;
1426	struct sbuf *sb;
1427	struct rctl_rule *filter;
1428	struct rctl_rule_link *link;
1429
1430	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1431	if (error != 0)
1432		return (error);
1433
1434	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1435	if (error != 0)
1436		return (error);
1437
1438	sx_slock(&allproc_lock);
1439	error = rctl_string_to_rule(inputstr, &filter);
1440	free(inputstr, M_RCTL);
1441	if (error != 0) {
1442		sx_sunlock(&allproc_lock);
1443		return (error);
1444	}
1445
1446	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1447		rctl_rule_release(filter);
1448		sx_sunlock(&allproc_lock);
1449		return (EINVAL);
1450	}
1451	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1452		rctl_rule_release(filter);
1453		sx_sunlock(&allproc_lock);
1454		return (EOPNOTSUPP);
1455	}
1456	if (filter->rr_subject.rs_proc == NULL) {
1457		rctl_rule_release(filter);
1458		sx_sunlock(&allproc_lock);
1459		return (EINVAL);
1460	}
1461
1462again:
1463	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1464	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1465	KASSERT(sb != NULL, ("sbuf_new failed"));
1466
1467	rw_rlock(&rctl_lock);
1468	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1469	    rrl_next) {
1470		rctl_rule_to_sbuf(sb, link->rrl_rule);
1471		sbuf_printf(sb, ",");
1472	}
1473	rw_runlock(&rctl_lock);
1474	if (sbuf_error(sb) == ENOMEM) {
1475		sbuf_delete(sb);
1476		free(buf, M_RCTL);
1477		bufsize *= 4;
1478		goto again;
1479	}
1480
1481	/*
1482	 * Remove trailing ",".
1483	 */
1484	if (sbuf_len(sb) > 0)
1485		sbuf_setpos(sb, sbuf_len(sb) - 1);
1486
1487	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1488	rctl_rule_release(filter);
1489	sx_sunlock(&allproc_lock);
1490	free(buf, M_RCTL);
1491	return (error);
1492}
1493
1494int
1495sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1496{
1497	int error;
1498	struct rctl_rule *rule;
1499	char *inputstr;
1500
1501	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1502	if (error != 0)
1503		return (error);
1504
1505	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1506	if (error != 0)
1507		return (error);
1508
1509	sx_slock(&allproc_lock);
1510	error = rctl_string_to_rule(inputstr, &rule);
1511	free(inputstr, M_RCTL);
1512	if (error != 0) {
1513		sx_sunlock(&allproc_lock);
1514		return (error);
1515	}
1516	/*
1517	 * The 'per' part of a rule is optional.
1518	 */
1519	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1520	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1521		rule->rr_per = rule->rr_subject_type;
1522
1523	if (!rctl_rule_fully_specified(rule)) {
1524		error = EINVAL;
1525		goto out;
1526	}
1527
1528	error = rctl_rule_add(rule);
1529
1530out:
1531	rctl_rule_release(rule);
1532	sx_sunlock(&allproc_lock);
1533	return (error);
1534}
1535
1536int
1537sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1538{
1539	int error;
1540	struct rctl_rule *filter;
1541	char *inputstr;
1542
1543	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1544	if (error != 0)
1545		return (error);
1546
1547	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1548	if (error != 0)
1549		return (error);
1550
1551	sx_slock(&allproc_lock);
1552	error = rctl_string_to_rule(inputstr, &filter);
1553	free(inputstr, M_RCTL);
1554	if (error != 0) {
1555		sx_sunlock(&allproc_lock);
1556		return (error);
1557	}
1558
1559	error = rctl_rule_remove(filter);
1560	rctl_rule_release(filter);
1561	sx_sunlock(&allproc_lock);
1562
1563	return (error);
1564}
1565
1566/*
1567 * Update RCTL rule list after credential change.
1568 */
1569void
1570rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1571{
1572	int rulecnt, i;
1573	struct rctl_rule_link *link, *newlink;
1574	struct uidinfo *newuip;
1575	struct loginclass *newlc;
1576	struct prison_racct *newprr;
1577	LIST_HEAD(, rctl_rule_link) newrules;
1578
1579	newuip = newcred->cr_ruidinfo;
1580	newlc = newcred->cr_loginclass;
1581	newprr = newcred->cr_prison->pr_prison_racct;
1582
1583	LIST_INIT(&newrules);
1584
1585again:
1586	/*
1587	 * First, count the rules that apply to the process with new
1588	 * credentials.
1589	 */
1590	rulecnt = 0;
1591	rw_rlock(&rctl_lock);
1592	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1593		if (link->rrl_rule->rr_subject_type ==
1594		    RCTL_SUBJECT_TYPE_PROCESS)
1595			rulecnt++;
1596	}
1597	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1598		rulecnt++;
1599	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1600		rulecnt++;
1601	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1602		rulecnt++;
1603	rw_runlock(&rctl_lock);
1604
1605	/*
1606	 * Create temporary list.  We've dropped the rctl_lock in order
1607	 * to use M_WAITOK.
1608	 */
1609	for (i = 0; i < rulecnt; i++) {
1610		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1611		newlink->rrl_rule = NULL;
1612		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1613	}
1614
1615	newlink = LIST_FIRST(&newrules);
1616
1617	/*
1618	 * Assign rules to the newly allocated list entries.
1619	 */
1620	rw_wlock(&rctl_lock);
1621	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1622		if (link->rrl_rule->rr_subject_type ==
1623		    RCTL_SUBJECT_TYPE_PROCESS) {
1624			if (newlink == NULL)
1625				goto goaround;
1626			rctl_rule_acquire(link->rrl_rule);
1627			newlink->rrl_rule = link->rrl_rule;
1628			newlink = LIST_NEXT(newlink, rrl_next);
1629			rulecnt--;
1630		}
1631	}
1632
1633	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1634		if (newlink == NULL)
1635			goto goaround;
1636		rctl_rule_acquire(link->rrl_rule);
1637		newlink->rrl_rule = link->rrl_rule;
1638		newlink = LIST_NEXT(newlink, rrl_next);
1639		rulecnt--;
1640	}
1641
1642	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1643		if (newlink == NULL)
1644			goto goaround;
1645		rctl_rule_acquire(link->rrl_rule);
1646		newlink->rrl_rule = link->rrl_rule;
1647		newlink = LIST_NEXT(newlink, rrl_next);
1648		rulecnt--;
1649	}
1650
1651	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1652		if (newlink == NULL)
1653			goto goaround;
1654		rctl_rule_acquire(link->rrl_rule);
1655		newlink->rrl_rule = link->rrl_rule;
1656		newlink = LIST_NEXT(newlink, rrl_next);
1657		rulecnt--;
1658	}
1659
1660	if (rulecnt == 0) {
1661		/*
1662		 * Free the old rule list.
1663		 */
1664		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1665			link = LIST_FIRST(&p->p_racct->r_rule_links);
1666			LIST_REMOVE(link, rrl_next);
1667			rctl_rule_release(link->rrl_rule);
1668			uma_zfree(rctl_rule_link_zone, link);
1669		}
1670
1671		/*
1672		 * Replace lists and we're done.
1673		 *
1674		 * XXX: Is there any way to switch list heads instead
1675		 *      of iterating here?
1676		 */
1677		while (!LIST_EMPTY(&newrules)) {
1678			newlink = LIST_FIRST(&newrules);
1679			LIST_REMOVE(newlink, rrl_next);
1680			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1681			    newlink, rrl_next);
1682		}
1683
1684		rw_wunlock(&rctl_lock);
1685
1686		return;
1687	}
1688
1689goaround:
1690	rw_wunlock(&rctl_lock);
1691
1692	/*
1693	 * Rule list changed while we were not holding the rctl_lock.
1694	 * Free the new list and try again.
1695	 */
1696	while (!LIST_EMPTY(&newrules)) {
1697		newlink = LIST_FIRST(&newrules);
1698		LIST_REMOVE(newlink, rrl_next);
1699		if (newlink->rrl_rule != NULL)
1700			rctl_rule_release(newlink->rrl_rule);
1701		uma_zfree(rctl_rule_link_zone, newlink);
1702	}
1703
1704	goto again;
1705}
1706
1707/*
1708 * Assign RCTL rules to the newly created process.
1709 */
1710int
1711rctl_proc_fork(struct proc *parent, struct proc *child)
1712{
1713	int error;
1714	struct rctl_rule_link *link;
1715	struct rctl_rule *rule;
1716
1717	LIST_INIT(&child->p_racct->r_rule_links);
1718
1719	/*
1720	 * No limits for kernel processes.
1721	 */
1722	if (child->p_flag & P_SYSTEM)
1723		return (0);
1724
1725	/*
1726	 * Nothing to inherit from P_SYSTEM parents.
1727	 */
1728	if (parent->p_racct == NULL) {
1729		KASSERT(parent->p_flag & P_SYSTEM,
1730		    ("non-system process without racct; p = %p", parent));
1731		return (0);
1732	}
1733
1734	rw_wlock(&rctl_lock);
1735
1736	/*
1737	 * Go through limits applicable to the parent and assign them
1738	 * to the child.  Rules with 'process' subject have to be duplicated
1739	 * in order to make their rr_subject point to the new process.
1740	 */
1741	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1742		if (link->rrl_rule->rr_subject_type ==
1743		    RCTL_SUBJECT_TYPE_PROCESS) {
1744			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1745			if (rule == NULL)
1746				goto fail;
1747			KASSERT(rule->rr_subject.rs_proc == parent,
1748			    ("rule->rr_subject.rs_proc != parent"));
1749			rule->rr_subject.rs_proc = child;
1750			error = rctl_racct_add_rule_locked(child->p_racct,
1751			    rule);
1752			rctl_rule_release(rule);
1753			if (error != 0)
1754				goto fail;
1755		} else {
1756			error = rctl_racct_add_rule_locked(child->p_racct,
1757			    link->rrl_rule);
1758			if (error != 0)
1759				goto fail;
1760		}
1761	}
1762
1763	rw_wunlock(&rctl_lock);
1764	return (0);
1765
1766fail:
1767	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1768		link = LIST_FIRST(&child->p_racct->r_rule_links);
1769		LIST_REMOVE(link, rrl_next);
1770		rctl_rule_release(link->rrl_rule);
1771		uma_zfree(rctl_rule_link_zone, link);
1772	}
1773	rw_wunlock(&rctl_lock);
1774	return (EAGAIN);
1775}
1776
1777/*
1778 * Release rules attached to the racct.
1779 */
1780void
1781rctl_racct_release(struct racct *racct)
1782{
1783	struct rctl_rule_link *link;
1784
1785	rw_wlock(&rctl_lock);
1786	while (!LIST_EMPTY(&racct->r_rule_links)) {
1787		link = LIST_FIRST(&racct->r_rule_links);
1788		LIST_REMOVE(link, rrl_next);
1789		rctl_rule_release(link->rrl_rule);
1790		uma_zfree(rctl_rule_link_zone, link);
1791	}
1792	rw_wunlock(&rctl_lock);
1793}
1794
1795static void
1796rctl_init(void)
1797{
1798
1799	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1800	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1801	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1802	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1803	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1804}
1805
1806#else /* !RCTL */
1807
1808int
1809sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1810{
1811
1812	return (ENOSYS);
1813}
1814
1815int
1816sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1817{
1818
1819	return (ENOSYS);
1820}
1821
1822int
1823sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1824{
1825
1826	return (ENOSYS);
1827}
1828
1829int
1830sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1831{
1832
1833	return (ENOSYS);
1834}
1835
1836int
1837sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1838{
1839
1840	return (ENOSYS);
1841}
1842
1843#endif /* !RCTL */
1844