kern_rctl.c revision 221362
140123Sdes/*-
266830Sobrien * Copyright (c) 2010 The FreeBSD Foundation
366830Sobrien * All rights reserved.
466830Sobrien *
566830Sobrien * This software was developed by Edward Tomasz Napierala under sponsorship
666830Sobrien * from the FreeBSD Foundation.
766830Sobrien *
866830Sobrien * Redistribution and use in source and binary forms, with or without
966830Sobrien * modification, are permitted provided that the following conditions
1066830Sobrien * are met:
1166830Sobrien * 1. Redistributions of source code must retain the above copyright
1266830Sobrien *    notice, this list of conditions and the following disclaimer.
1366830Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1466830Sobrien *    notice, this list of conditions and the following disclaimer in the
1566830Sobrien *    documentation and/or other materials provided with the distribution.
1666830Sobrien *
1766830Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1866830Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1966830Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2066830Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2166830Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2266830Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2366830Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2466830Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2566830Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2666830Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2783871Sobrien * SUCH DAMAGE.
2850472Speter *
2966830Sobrien * $FreeBSD: head/sys/kern/kern_rctl.c 221362 2011-05-03 07:32:58Z trasz $
3037Srgrimes */
3137Srgrimes
3237Srgrimes#include <sys/cdefs.h>
3337Srgrimes__FBSDID("$FreeBSD: head/sys/kern/kern_rctl.c 221362 2011-05-03 07:32:58Z trasz $");
3437Srgrimes
3537Srgrimes#include <sys/param.h>
3651231Ssheldonh#include <sys/bus.h>
3751231Ssheldonh#include <sys/malloc.h>
3851231Ssheldonh#include <sys/queue.h>
3951231Ssheldonh#include <sys/refcount.h>
408460Sjkh#include <sys/jail.h>
4137Srgrimes#include <sys/kernel.h>
4237Srgrimes#include <sys/limits.h>
4337Srgrimes#include <sys/loginclass.h>
4437Srgrimes#include <sys/priv.h>
4551231Ssheldonh#include <sys/proc.h>
4637Srgrimes#include <sys/racct.h>
4737Srgrimes#include <sys/rctl.h>
4837Srgrimes#include <sys/resourcevar.h>
4968985Sdougb#include <sys/sx.h>
5068985Sdougb#include <sys/sysent.h>
5151231Ssheldonh#include <sys/sysproto.h>
5220684Sjoerg#include <sys/systm.h>
5351231Ssheldonh#include <sys/types.h>
5437Srgrimes#include <sys/eventhandler.h>
5543179Sdillon#include <sys/lock.h>
5643803Sdillon#include <sys/mutex.h>
5743179Sdillon#include <sys/rwlock.h>
5851231Ssheldonh#include <sys/sbuf.h>
5943375Sdillon#include <sys/taskqueue.h>
6043375Sdillon#include <sys/tree.h>
6143803Sdillon#include <vm/uma.h>
6243179Sdillon
6343179Sdillon#ifdef RCTL
6443179Sdillon#ifndef RACCT
6543219Speter#error "The RCTL option requires the RACCT option"
6643219Speter#endif
6751231Ssheldonh
6843849SjkhFEATURE(rctl, "Resource Limits");
6959674Ssheldonh
7051231Ssheldonh#define	HRF_DEFAULT		0
7143219Speter#define	HRF_DONT_INHERIT	1
7243219Speter#define	HRF_DONT_ACCUMULATE	2
7343219Speter
7471014Sdougb/* Default buffer size for rctl_get_rules(2). */
7571014Sdougb#define	RCTL_DEFAULT_BUFSIZE	4096
7678918Srwatson#define	RCTL_LOG_BUFSIZE	128
7771014Sdougb
7871014Sdougb/*
7971014Sdougb * 'rctl_rule_link' connects a rule with every racct it's related to.
8071014Sdougb * For example, rule 'user:X:openfiles:deny=N/process' is linked
8169988Sbsd * with uidinfo for user X, and to each process of that user.
8269988Sbsd */
8369988Sbsdstruct rctl_rule_link {
8469988Sbsd	LIST_ENTRY(rctl_rule_link)	rrl_next;
8569988Sbsd	struct rctl_rule		*rrl_rule;
8669988Sbsd	int				rrl_exceeded;
8769988Sbsd};
8869988Sbsd
8969988Sbsdstruct dict {
9069988Sbsd	const char	*d_name;
9169876Sobrien	int		d_value;
9269988Sbsd};
9369949Sbsd
9469988Sbsdstatic struct dict subjectnames[] = {
9569949Sbsd	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
9669988Sbsd	{ "user", RCTL_SUBJECT_TYPE_USER },
9769988Sbsd	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
9869988Sbsd	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
9969988Sbsd	{ NULL, -1 }};
10069949Sbsd
10169876Sobrienstatic struct dict resourcenames[] = {
10269876Sobrien	{ "cpu", RACCT_CPU },
10369988Sbsd	{ "fsize", RACCT_FSIZE },
10467179Sjwd	{ "data", RACCT_DATA },
10569988Sbsd	{ "stack", RACCT_STACK },
10669988Sbsd	{ "core", RACCT_CORE },
10770916Sbsd	{ "rss", RACCT_RSS },
10870916Sbsd	{ "memlock", RACCT_MEMLOCK },
10969876Sobrien	{ "nproc", RACCT_NPROC },
11073277Sdougb	{ "nofile", RACCT_NOFILE },
11173277Sdougb	{ "sbsize", RACCT_SBSIZE },
11273277Sdougb	{ "vmem", RACCT_VMEM },
11373277Sdougb	{ "npts", RACCT_NPTS },
11473277Sdougb	{ "swap", RACCT_SWAP },
11573277Sdougb	{ "nthr", RACCT_NTHR },
11673277Sdougb	{ "msgqqueued", RACCT_MSGQQUEUED },
11773277Sdougb	{ "msgqsize", RACCT_MSGQSIZE },
11873277Sdougb	{ "nmsgq", RACCT_NMSGQ },
11973277Sdougb	{ "nsem", RACCT_NSEM },
12074075Smarkm	{ "nsemop", RACCT_NSEMOP },
12173277Sdougb	{ "nshm", RACCT_NSHM },
12273277Sdougb	{ "shmsize", RACCT_SHMSIZE },
12373277Sdougb	{ "wallclock", RACCT_WALLCLOCK },
12473277Sdougb	{ NULL, -1 }};
12573277Sdougb
12673277Sdougbstatic struct dict actionnames[] = {
12773277Sdougb	{ "sighup", RCTL_ACTION_SIGHUP },
12873277Sdougb	{ "sigint", RCTL_ACTION_SIGINT },
12973277Sdougb	{ "sigquit", RCTL_ACTION_SIGQUIT },
13074063Sdougb	{ "sigill", RCTL_ACTION_SIGILL },
13174075Smarkm	{ "sigtrap", RCTL_ACTION_SIGTRAP },
13273277Sdougb	{ "sigabrt", RCTL_ACTION_SIGABRT },
13373277Sdougb	{ "sigemt", RCTL_ACTION_SIGEMT },
13473277Sdougb	{ "sigfpe", RCTL_ACTION_SIGFPE },
13573277Sdougb	{ "sigkill", RCTL_ACTION_SIGKILL },
13673277Sdougb	{ "sigbus", RCTL_ACTION_SIGBUS },
13773277Sdougb	{ "sigsegv", RCTL_ACTION_SIGSEGV },
13873277Sdougb	{ "sigsys", RCTL_ACTION_SIGSYS },
13973277Sdougb	{ "sigpipe", RCTL_ACTION_SIGPIPE },
14073277Sdougb	{ "sigalrm", RCTL_ACTION_SIGALRM },
14174063Sdougb	{ "sigterm", RCTL_ACTION_SIGTERM },
14274075Smarkm	{ "sigurg", RCTL_ACTION_SIGURG },
14373277Sdougb	{ "sigstop", RCTL_ACTION_SIGSTOP },
14473277Sdougb	{ "sigtstp", RCTL_ACTION_SIGTSTP },
14573277Sdougb	{ "sigchld", RCTL_ACTION_SIGCHLD },
14673277Sdougb	{ "sigttin", RCTL_ACTION_SIGTTIN },
14773277Sdougb	{ "sigttou", RCTL_ACTION_SIGTTOU },
14873277Sdougb	{ "sigio", RCTL_ACTION_SIGIO },
14973277Sdougb	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
15071014Sdougb	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
15167113Smarkm	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
15267113Smarkm	{ "sigprof", RCTL_ACTION_SIGPROF },
15367113Smarkm	{ "sigwinch", RCTL_ACTION_SIGWINCH },
15467113Smarkm	{ "siginfo", RCTL_ACTION_SIGINFO },
15567113Smarkm	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
15667113Smarkm	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
15770922Sdougb	{ "sigthr", RCTL_ACTION_SIGTHR },
15870922Sdougb	{ "deny", RCTL_ACTION_DENY },
15970922Sdougb	{ "log", RCTL_ACTION_LOG },
16070922Sdougb	{ "devctl", RCTL_ACTION_DEVCTL },
16170922Sdougb	{ NULL, -1 }};
16274063Sdougb
16374063Sdougbstatic void rctl_init(void);
16474063SdougbSYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
16574063Sdougb
16674063Sdougbstatic uma_zone_t rctl_rule_link_zone;
16774063Sdougbstatic uma_zone_t rctl_rule_zone;
16874063Sdougbstatic struct rwlock rctl_lock;
16974063SdougbRW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
17015568Sasami
17151231Ssheldonhstatic int rctl_rule_fully_specified(const struct rctl_rule *rule);
17251231Ssheldonhstatic void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
17315568Sasami
17415568SasamiMALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
17515568Sasami
17651231Ssheldonhstatic const char *
17751231Ssheldonhrctl_subject_type_name(int subject)
17845239Sgrog{
17951231Ssheldonh	int i;
18051231Ssheldonh
18142741Sgrog	for (i = 0; subjectnames[i].d_name != NULL; i++) {
18243803Sdillon		if (subjectnames[i].d_value == subject)
1833843Sdg			return (subjectnames[i].d_name);
18468985Sdougb	}
18551231Ssheldonh
18670109Sdougb	panic("rctl_subject_type_name: unknown subject type %d", subject);
18776946Sdd}
18876946Sdd
18976946Sddstatic const char *
19076946Sddrctl_action_name(int action)
19176946Sdd{
19276946Sdd	int i;
19376946Sdd
19476946Sdd	for (i = 0; actionnames[i].d_name != NULL; i++) {
19537Srgrimes		if (actionnames[i].d_value == action)
19637Srgrimes			return (actionnames[i].d_name);
19737Srgrimes	}
19837Srgrimes
19937Srgrimes	panic("rctl_action_name: unknown action %d", action);
20037Srgrimes}
20137Srgrimes
20237Srgrimesconst char *
20370109Sdougbrctl_resource_name(int resource)
20437Srgrimes{
20537Srgrimes	int i;
20637Srgrimes
20772031Sdougb	for (i = 0; resourcenames[i].d_name != NULL; i++) {
20872031Sdougb		if (resourcenames[i].d_value == resource)
20972031Sdougb			return (resourcenames[i].d_name);
21072031Sdougb	}
21172031Sdougb
21272031Sdougb	panic("rctl_resource_name: unknown resource %d", resource);
21372031Sdougb}
21472031Sdougb
21572031Sdougb/*
21672031Sdougb * Return the amount of resource that can be allocated by 'p' before
21772031Sdougb * hitting 'rule'.
21872031Sdougb */
21972031Sdougbstatic int64_t
22072031Sdougbrctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
22172031Sdougb{
22272031Sdougb	int resource;
22372031Sdougb	int64_t available = INT64_MAX;
22472031Sdougb	struct ucred *cred = p->p_ucred;
22537Srgrimes
22637Srgrimes	rw_assert(&rctl_lock, RA_LOCKED);
22770109Sdougb
22837Srgrimes	resource = rule->rr_resource;
22937Srgrimes	switch (rule->rr_per) {
23037Srgrimes	case RCTL_SUBJECT_TYPE_PROCESS:
23137Srgrimes		available = rule->rr_amount -
23237Srgrimes		    p->p_racct->r_resources[resource];
23337Srgrimes		break;
23437Srgrimes	case RCTL_SUBJECT_TYPE_USER:
23570109Sdougb		available = rule->rr_amount -
23637Srgrimes		    cred->cr_ruidinfo->ui_racct->r_resources[resource];
23737Srgrimes		break;
23837Srgrimes	case RCTL_SUBJECT_TYPE_LOGINCLASS:
23951231Ssheldonh		available = rule->rr_amount -
24051231Ssheldonh		    cred->cr_loginclass->lc_racct->r_resources[resource];
24170109Sdougb		break;
24251231Ssheldonh	case RCTL_SUBJECT_TYPE_JAIL:
24351231Ssheldonh		available = rule->rr_amount -
24437Srgrimes		    cred->cr_prison->pr_prison_racct->prr_racct->
24545222Scracauer		        r_resources[resource];
24637Srgrimes		break;
24737Srgrimes	default:
24843197Sdillon		panic("rctl_compute_available: unknown per %d",
24943197Sdillon		    rule->rr_per);
25043197Sdillon	}
25151231Ssheldonh
25251231Ssheldonh	return (available);
25351231Ssheldonh}
25451231Ssheldonh
25551231Ssheldonh/*
25670109Sdougb * Return non-zero if allocating 'amount' by proc 'p' would exceed
25751231Ssheldonh * resource limit specified by 'rule'.
25851231Ssheldonh */
25951231Ssheldonhstatic int
26051231Ssheldonhrctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
2611692Sphk    int64_t amount)
26243803Sdillon{
26343197Sdillon	int64_t available;
26477974Ssheldonh
26577974Ssheldonh	rw_assert(&rctl_lock, RA_LOCKED);
26651231Ssheldonh
26751231Ssheldonh	available = rctl_available_resource(p, rule);
26851231Ssheldonh	if (available >= amount)
26951231Ssheldonh		return (0);
27051231Ssheldonh
27170109Sdougb	return (1);
27243803Sdillon}
27351231Ssheldonh
27451231Ssheldonh/*
27537Srgrimes * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
27643197Sdillon * to what it keeps allocated now.  Returns non-zero if the allocation should
27743197Sdillon * be denied, 0 otherwise.
27851231Ssheldonh */
27950357Ssheldonhint
28043197Sdillonrctl_enforce(struct proc *p, int resource, uint64_t amount)
28143197Sdillon{
28271014Sdougb	struct rctl_rule *rule;
28371014Sdougb	struct rctl_rule_link *link;
28471014Sdougb	struct sbuf sb;
28567179Sjwd	int should_deny = 0;
28667179Sjwd	char *buf;
28771014Sdougb	static int curtime = 0;
28871014Sdougb	static struct timeval lasttime;
28967179Sjwd
29071014Sdougb	rw_rlock(&rctl_lock);
29171014Sdougb
29271014Sdougb	/*
29367179Sjwd	 * There may be more than one matching rule; go through all of them.
29471014Sdougb	 * Denial should be done last, after logging and sending signals.
29567179Sjwd	 */
29667179Sjwd	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
29767179Sjwd		rule = link->rrl_rule;
29871014Sdougb		if (rule->rr_resource != resource)
29971014Sdougb			continue;
30071014Sdougb		if (!rctl_would_exceed(p, rule, amount)) {
30171014Sdougb			link->rrl_exceeded = 0;
30271014Sdougb			continue;
30371014Sdougb		}
30471014Sdougb
30571014Sdougb		switch (rule->rr_action) {
30671014Sdougb		case RCTL_ACTION_DENY:
30767179Sjwd			should_deny = 1;
3084091Sache			continue;
309872Sache		case RCTL_ACTION_LOG:
31064400Sbrian			/*
31164400Sbrian			 * If rrl_exceeded != 0, it means we've already
31264400Sbrian			 * logged a warning for this process.
31364400Sbrian			 */
31464400Sbrian			if (link->rrl_exceeded != 0)
31564400Sbrian				continue;
31664400Sbrian
31764400Sbrian			if (!ppsratecheck(&lasttime, &curtime, 10))
31864400Sbrian				continue;
31964449Sbrian
32064400Sbrian			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
32164449Sbrian			if (buf == NULL) {
32276430Sbrian				printf("rctl_enforce: out of memory\n");
32376430Sbrian				continue;
32464449Sbrian			}
32576430Sbrian			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
32676430Sbrian			rctl_rule_to_sbuf(&sb, rule);
32776430Sbrian			sbuf_finish(&sb);
32864400Sbrian			printf("rctl: rule \"%s\" matched by pid %d "
32964400Sbrian			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
33064400Sbrian			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
33164400Sbrian			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
33264400Sbrian			sbuf_delete(&sb);
33364400Sbrian			free(buf, M_RCTL);
33438237Sbrian			link->rrl_exceeded = 1;
33539384Sbrian			continue;
33671830Sbrian		case RCTL_ACTION_DEVCTL:
33739384Sbrian			if (link->rrl_exceeded != 0)
33839384Sbrian				continue;
33939384Sbrian
34039384Sbrian			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
34151231Ssheldonh			if (buf == NULL) {
34239384Sbrian				printf("rctl_enforce: out of memory\n");
34339384Sbrian				continue;
34438237Sbrian			}
34526450Sache			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
34638237Sbrian			sbuf_printf(&sb, "rule=");
34739384Sbrian			rctl_rule_to_sbuf(&sb, rule);
34839384Sbrian			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
34939384Sbrian			    p->p_pid, p->p_ucred->cr_ruid,
35039384Sbrian			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
35138237Sbrian			sbuf_finish(&sb);
35221197Sphk			devctl_notify_f("RCTL", "rule", "matched",
35317767Sjkh			    sbuf_data(&sb), M_NOWAIT);
35451231Ssheldonh			sbuf_delete(&sb);
35551231Ssheldonh			free(buf, M_RCTL);
35651231Ssheldonh			link->rrl_exceeded = 1;
35751231Ssheldonh			continue;
35851231Ssheldonh		default:
35975384Sru			if (link->rrl_exceeded != 0)
36070109Sdougb				continue;
36175384Sru
36251231Ssheldonh			KASSERT(rule->rr_action > 0 &&
36351231Ssheldonh			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
36451231Ssheldonh			    ("rctl_enforce: unknown action %d",
36517767Sjkh			     rule->rr_action));
36651231Ssheldonh
36751231Ssheldonh			/*
36851231Ssheldonh			 * We're using the fact that RCTL_ACTION_SIG* values
36945096Simp			 * are equal to their counterparts from sys/signal.h.
37045096Simp			 */
37145096Simp			psignal(p, rule->rr_action);
37251231Ssheldonh			link->rrl_exceeded = 1;
37351231Ssheldonh			continue;
37451231Ssheldonh		}
3757293Sjkh	}
3761675Sache
3771675Sache	rw_runlock(&rctl_lock);
37851231Ssheldonh
37951231Ssheldonh	if (should_deny) {
38051231Ssheldonh		/*
38114624Snate		 * Return fake error code; the caller should change it
38214596Snate		 * into one proper for the situation - EFSIZ, ENOMEM etc.
38314596Snate		 */
38451231Ssheldonh		return (EDOOFUS);
38551231Ssheldonh	}
38651231Ssheldonh
38725184Sjkh	return (0);
38825184Sjkh}
3897460Sjkh
3907460Sjkhuint64_t
39157398Sshinrctl_get_limit(struct proc *p, int resource)
39257398Sshin{
39357398Sshin	struct rctl_rule *rule;
39457398Sshin	struct rctl_rule_link *link;
39557398Sshin	uint64_t amount = UINT64_MAX;
39657398Sshin
39757398Sshin	rw_rlock(&rctl_lock);
39857398Sshin
39957398Sshin	/*
40077974Ssheldonh	 * There may be more than one matching rule; go through all of them.
40177974Ssheldonh	 * Denial should be done last, after logging and sending signals.
40277974Ssheldonh	 */
40377974Ssheldonh	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
40477974Ssheldonh		rule = link->rrl_rule;
40577974Ssheldonh		if (rule->rr_resource != resource)
40677974Ssheldonh			continue;
40777974Ssheldonh		if (rule->rr_action != RCTL_ACTION_DENY)
4087487Srgrimes			continue;
4097487Srgrimes		if (rule->rr_amount < amount)
41051231Ssheldonh			amount = rule->rr_amount;
41164893Ssheldonh	}
41264893Ssheldonh
41364893Ssheldonh	rw_runlock(&rctl_lock);
41464893Ssheldonh
41564893Ssheldonh	return (amount);
4167487Srgrimes}
41751231Ssheldonh
41851231Ssheldonhuint64_t
41938237Sbrianrctl_get_available(struct proc *p, int resource)
42038237Sbrian{
42138237Sbrian	struct rctl_rule *rule;
42231192Ssteve	struct rctl_rule_link *link;
42331192Ssteve	int64_t available, minavailable, allocated;
42431192Ssteve
42531192Ssteve	minavailable = INT64_MAX;
42631192Ssteve
42731192Ssteve	rw_rlock(&rctl_lock);
42831192Ssteve
42951231Ssheldonh	/*
43051231Ssheldonh	 * There may be more than one matching rule; go through all of them.
43170109Sdougb	 * Denial should be done last, after logging and sending signals.
43231192Ssteve	 */
43331192Ssteve	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
43431192Ssteve		rule = link->rrl_rule;
43551231Ssheldonh		if (rule->rr_resource != resource)
43651231Ssheldonh			continue;
43770109Sdougb		if (rule->rr_action != RCTL_ACTION_DENY)
43851231Ssheldonh			continue;
43951231Ssheldonh		available = rctl_available_resource(p, rule);
44031192Ssteve		if (available < minavailable)
44151231Ssheldonh			minavailable = available;
44238915Scracauer	}
44351231Ssheldonh
44438915Scracauer	rw_runlock(&rctl_lock);
44538915Scracauer
44651231Ssheldonh	/*
44751231Ssheldonh	 * XXX: Think about this _hard_.
4487487Srgrimes	 */
44970109Sdougb	allocated = p->p_racct->r_resources[resource];
45051231Ssheldonh	if (minavailable < INT64_MAX - allocated)
45151231Ssheldonh		minavailable += allocated;
45251231Ssheldonh	if (minavailable < 0)
4537259Sjkh		minavailable = 0;
45451231Ssheldonh	return (minavailable);
45551231Ssheldonh}
45624463Spst
45751231Ssheldonhstatic int
45875525Sphkrctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
45924463Spst{
46024463Spst
46124463Spst	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
46224463Spst		if (rule->rr_subject_type != filter->rr_subject_type)
46324463Spst			return (0);
46478905Sdd
46578905Sdd		switch (filter->rr_subject_type) {
46651231Ssheldonh		case RCTL_SUBJECT_TYPE_PROCESS:
46751231Ssheldonh			if (filter->rr_subject.rs_proc != NULL &&
46851231Ssheldonh			    rule->rr_subject.rs_proc !=
46925339Sjkh			    filter->rr_subject.rs_proc)
47025339Sjkh				return (0);
47167445Sphk			break;
47267040Sache		case RCTL_SUBJECT_TYPE_USER:
47367445Sphk			if (filter->rr_subject.rs_uip != NULL &&
47470109Sdougb			    rule->rr_subject.rs_uip !=
47567445Sphk			    filter->rr_subject.rs_uip)
47667445Sphk				return (0);
47767445Sphk			break;
47867040Sache		case RCTL_SUBJECT_TYPE_LOGINCLASS:
47951231Ssheldonh			if (filter->rr_subject.rs_loginclass != NULL &&
48032340Sjoerg			    rule->rr_subject.rs_loginclass !=
48132340Sjoerg			    filter->rr_subject.rs_loginclass)
48251231Ssheldonh				return (0);
48351231Ssheldonh			break;
48451231Ssheldonh		case RCTL_SUBJECT_TYPE_JAIL:
48551231Ssheldonh			if (filter->rr_subject.rs_prison_racct != NULL &&
48651231Ssheldonh			    rule->rr_subject.rs_prison_racct !=
48751231Ssheldonh			    filter->rr_subject.rs_prison_racct)
48874140Sdougb				return (0);
48970109Sdougb			break;
49074140Sdougb		default:
49151231Ssheldonh			panic("rctl_rule_matches: unknown subject type %d",
49251231Ssheldonh			    filter->rr_subject_type);
49351231Ssheldonh		}
49432340Sjoerg	}
49550357Ssheldonh
49639384Sbrian	if (filter->rr_resource != RACCT_UNDEFINED) {
497857Sdg		if (rule->rr_resource != filter->rr_resource)
49837Srgrimes			return (0);
49944818Sbillf	}
50051231Ssheldonh
50151231Ssheldonh	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
50251231Ssheldonh		if (rule->rr_action != filter->rr_action)
50351231Ssheldonh			return (0);
50451231Ssheldonh	}
50570109Sdougb
50651231Ssheldonh	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
50751231Ssheldonh		if (rule->rr_amount != filter->rr_amount)
50851231Ssheldonh			return (0);
50951231Ssheldonh	}
51044818Sbillf
51170109Sdougb	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
51251231Ssheldonh		if (rule->rr_per != filter->rr_per)
51351231Ssheldonh			return (0);
51451231Ssheldonh	}
51551231Ssheldonh
51644818Sbillf	return (1);
51750357Ssheldonh}
51839384Sbrian
5197477Sachestatic int
5207477Sachestr2value(const char *str, int *value, struct dict *table)
52151231Ssheldonh{
52251231Ssheldonh	int i;
52351231Ssheldonh
5247487Srgrimes	if (value == NULL)
5257487Srgrimes		return (EINVAL);
5267238Sache
5277238Sache	for (i = 0; table[i].d_name != NULL; i++) {
52851231Ssheldonh		if (strcasecmp(table[i].d_name, str) == 0) {
52951231Ssheldonh			*value =  table[i].d_value;
53051231Ssheldonh			return (0);
53170109Sdougb		}
53251231Ssheldonh	}
53351231Ssheldonh
53451231Ssheldonh	return (EINVAL);
53551231Ssheldonh}
53611992Sache
53751231Ssheldonhstatic int
53851231Ssheldonhstr2id(const char *str, id_t *value)
5397238Sache{
54027365Sjkh	char *end;
54127365Sjkh
54251231Ssheldonh	if (str == NULL)
54364520Sjdp		return (EINVAL);
54464520Sjdp
54564520Sjdp	*value = strtoul(str, &end, 10);
54664520Sjdp	if ((size_t)(end - str) != strlen(str))
54764520Sjdp		return (EINVAL);
54864520Sjdp
54938512Sgpalmer	return (0);
55051231Ssheldonh}
55151231Ssheldonh
55239329Sjdpstatic int
55350357Ssheldonhstr2int64(const char *str, int64_t *value)
55451231Ssheldonh{
55550357Ssheldonh	char *end;
55639329Sjdp
55739329Sjdp	if (str == NULL)
55870109Sdougb		return (EINVAL);
55964520Sjdp
56051231Ssheldonh	*value = strtoul(str, &end, 10);
56151231Ssheldonh	if ((size_t)(end - str) != strlen(str))
56239329Sjdp		return (EINVAL);
56341648Sjb
56451231Ssheldonh	return (0);
56551231Ssheldonh}
56643951Sjkh
56741648Sjb/*
56841648Sjb * Connect the rule to the racct, increasing refcount for the rule.
56950357Ssheldonh */
57051231Ssheldonhstatic void
57150357Ssheldonhrctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
57241648Sjb{
57341648Sjb	struct rctl_rule_link *link;
57470109Sdougb
57564520Sjdp	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
57651231Ssheldonh
57751231Ssheldonh	rctl_rule_acquire(rule);
57838512Sgpalmer	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
5797296Sjkh	link->rrl_rule = rule;
58017210Spst	link->rrl_exceeded = 0;
58117210Spst
58270109Sdougb	rw_wlock(&rctl_lock);
58351231Ssheldonh	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
58451231Ssheldonh	rw_wunlock(&rctl_lock);
58551231Ssheldonh}
58651231Ssheldonh
58778905Sddstatic int
58851231Ssheldonhrctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
58951231Ssheldonh{
59017210Spst	struct rctl_rule_link *link;
59151231Ssheldonh
59251231Ssheldonh	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
59351231Ssheldonh	rw_assert(&rctl_lock, RA_WLOCKED);
59451231Ssheldonh
59571340Sdougb	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
59651231Ssheldonh	if (link == NULL)
59751231Ssheldonh		return (ENOMEM);
59826727Spst	rctl_rule_acquire(rule);
59951231Ssheldonh	link->rrl_rule = rule;
60051231Ssheldonh	link->rrl_exceeded = 0;
60151231Ssheldonh
60251231Ssheldonh	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
60351231Ssheldonh	return (0);
60417210Spst}
60557572Smarkm
60657572Smarkm/*
60757572Smarkm * Remove limits for a rules matching the filter and release
60857572Smarkm * the refcounts for the rules, possibly freeing them.  Returns
60957572Smarkm * the number of limit structures removed.
61057572Smarkm */
61157572Smarkmstatic int
61257572Smarkmrctl_racct_remove_rules(struct racct *racct,
61357572Smarkm    const struct rctl_rule *filter)
61451231Ssheldonh{
61551231Ssheldonh	int removed = 0;
61642498Sn_hibma	struct rctl_rule_link *link, *linktmp;
61751231Ssheldonh
61851231Ssheldonh	rw_assert(&rctl_lock, RA_WLOCKED);
61942498Sn_hibma
62074198Speter	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
62174198Speter		if (!rctl_rule_matches(link->rrl_rule, filter))
62274198Speter			continue;
62374198Speter
62474198Speter		LIST_REMOVE(link, rrl_next);
62574198Speter		rctl_rule_release(link->rrl_rule);
62674198Speter		uma_zfree(rctl_rule_link_zone, link);
62774198Speter		removed++;
62874198Speter	}
62974198Speter	return (removed);
63074198Speter}
63174198Speter
63274198Speterstatic void
63374198Speterrctl_rule_acquire_subject(struct rctl_rule *rule)
63474198Speter{
63574198Speter
63674198Speter	switch (rule->rr_subject_type) {
63717161Spst	case RCTL_SUBJECT_TYPE_UNDEFINED:
63817161Spst	case RCTL_SUBJECT_TYPE_PROCESS:
63916671Spst		break;
64050612Simp	case RCTL_SUBJECT_TYPE_JAIL:
64119314Speter		if (rule->rr_subject.rs_prison_racct != NULL)
64250357Ssheldonh			prison_racct_hold(rule->rr_subject.rs_prison_racct);
64370109Sdougb		break;
64455511Speter	case RCTL_SUBJECT_TYPE_USER:
64519314Speter		if (rule->rr_subject.rs_uip != NULL)
64651231Ssheldonh			uihold(rule->rr_subject.rs_uip);
64719314Speter		break;
64819314Speter	case RCTL_SUBJECT_TYPE_LOGINCLASS:
64919314Speter		if (rule->rr_subject.rs_loginclass != NULL)
65019314Speter			loginclass_hold(rule->rr_subject.rs_loginclass);
65119314Speter		break;
65251231Ssheldonh	default:
65355451Speter		panic("rctl_rule_acquire_subject: unknown subject type %d",
65419314Speter		    rule->rr_subject_type);
65516671Spst	}
65619314Speter}
65719314Speter
65819314Speterstatic void
65919314Speterrctl_rule_release_subject(struct rctl_rule *rule)
66050357Ssheldonh{
66155511Speter
66219314Speter	switch (rule->rr_subject_type) {
66351231Ssheldonh	case RCTL_SUBJECT_TYPE_UNDEFINED:
66419314Speter	case RCTL_SUBJECT_TYPE_PROCESS:
66519314Speter		break;
66619314Speter	case RCTL_SUBJECT_TYPE_JAIL:
66719314Speter		if (rule->rr_subject.rs_prison_racct != NULL)
66819314Speter			prison_racct_free(rule->rr_subject.rs_prison_racct);
66919314Speter		break;
67055453Speter	case RCTL_SUBJECT_TYPE_USER:
67151231Ssheldonh		if (rule->rr_subject.rs_uip != NULL)
67255451Speter			uifree(rule->rr_subject.rs_uip);
67319314Speter		break;
67455451Speter	case RCTL_SUBJECT_TYPE_LOGINCLASS:
67519314Speter		if (rule->rr_subject.rs_loginclass != NULL)
67619314Speter			loginclass_free(rule->rr_subject.rs_loginclass);
67719314Speter		break;
67870109Sdougb	default:
67916671Spst		panic("rctl_rule_release_subject: unknown subject type %d",
68016671Spst		    rule->rr_subject_type);
68151231Ssheldonh	}
68251231Ssheldonh}
68370109Sdougb
68437899Snectarstruct rctl_rule *
68537899Snectarrctl_rule_alloc(int flags)
68637899Snectar{
68751231Ssheldonh	struct rctl_rule *rule;
68851231Ssheldonh
68951231Ssheldonh	rule = uma_zalloc(rctl_rule_zone, flags);
69051231Ssheldonh	if (rule == NULL)
69151231Ssheldonh		return (NULL);
69250357Ssheldonh	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
69350357Ssheldonh	rule->rr_subject.rs_proc = NULL;
69450357Ssheldonh	rule->rr_subject.rs_uip = NULL;
69550357Ssheldonh	rule->rr_subject.rs_loginclass = NULL;
69649451Speter	rule->rr_subject.rs_prison_racct = NULL;
69749451Speter	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
69850357Ssheldonh	rule->rr_resource = RACCT_UNDEFINED;
69949451Speter	rule->rr_action = RCTL_ACTION_UNDEFINED;
70051231Ssheldonh	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
70151231Ssheldonh	refcount_init(&rule->rr_refcount, 1);
70251231Ssheldonh
70351617Snsayer	return (rule);
70451617Snsayer}
70551617Snsayer
70651617Snsayerstruct rctl_rule *
70751617Snsayerrctl_rule_duplicate(const struct rctl_rule *rule, int flags)
70851617Snsayer{
70951617Snsayer	struct rctl_rule *copy;
71070856Sjhb
71170856Sjhb	copy = uma_zalloc(rctl_rule_zone, flags);
71270856Sjhb	if (copy == NULL)
71370856Sjhb		return (NULL);
71470856Sjhb	copy->rr_subject_type = rule->rr_subject_type;
71570856Sjhb	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
71651231Ssheldonh	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
71751231Ssheldonh	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
71851231Ssheldonh	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
71951231Ssheldonh	copy->rr_per = rule->rr_per;
72041704Sdillon	copy->rr_resource = rule->rr_resource;
72141704Sdillon	copy->rr_action = rule->rr_action;
72270109Sdougb	copy->rr_amount = rule->rr_amount;
72364684Sobrien	refcount_init(&copy->rr_refcount, 1);
72471121Sdes	rctl_rule_acquire_subject(copy);
72571121Sdes
72671121Sdes	return (copy);
72771121Sdes}
72871121Sdes
72971121Sdesvoid
73071121Sdesrctl_rule_acquire(struct rctl_rule *rule)
73171121Sdes{
73271121Sdes
73371121Sdes	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
73464684Sobrien
73564684Sobrien	refcount_acquire(&rule->rr_refcount);
73664684Sobrien}
73764684Sobrien
73864809Ssheldonhstatic void
73964809Ssheldonhrctl_rule_free(void *context, int pending)
74064809Ssheldonh{
74164809Ssheldonh	struct rctl_rule *rule;
74264809Ssheldonh
74364809Ssheldonh	rule = (struct rctl_rule *)context;
74464809Ssheldonh
74564684Sobrien	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
74664684Sobrien
74764684Sobrien	/*
74864684Sobrien	 * We don't need locking here; rule is guaranteed to be inaccessible.
74964684Sobrien	 */
75064684Sobrien
75164684Sobrien	rctl_rule_release_subject(rule);
75276553Speter	uma_zfree(rctl_rule_zone, rule);
75364684Sobrien}
75464684Sobrien
75564684Sobrienvoid
75670109Sdougbrctl_rule_release(struct rctl_rule *rule)
75764810Ssheldonh{
75841704Sdillon
75943951Sjkh	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
76043951Sjkh
76143951Sjkh	if (refcount_release(&rule->rr_refcount)) {
76241704Sdillon		/*
76341704Sdillon		 * rctl_rule_release() is often called when iterating
76451231Ssheldonh		 * over all the uidinfo structures in the system,
76551231Ssheldonh		 * holding uihashtbl_lock.  Since rctl_rule_free()
76651231Ssheldonh		 * might end up calling uifree(), this would lead
76759674Ssheldonh		 * to lock recursion.  Use taskqueue to avoid this.
76859674Ssheldonh		 */
76959674Ssheldonh		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
77051231Ssheldonh		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
77151231Ssheldonh	}
77251231Ssheldonh}
77351231Ssheldonh
77451231Ssheldonhstatic int
77551231Ssheldonhrctl_rule_fully_specified(const struct rctl_rule *rule)
77670109Sdougb{
77751231Ssheldonh
77843197Sdillon	switch (rule->rr_subject_type) {
77941704Sdillon	case RCTL_SUBJECT_TYPE_UNDEFINED:
78041704Sdillon		return (0);
78151617Snsayer	case RCTL_SUBJECT_TYPE_PROCESS:
78251617Snsayer		if (rule->rr_subject.rs_proc == NULL)
78351617Snsayer			return (0);
78451617Snsayer		break;
78551617Snsayer	case RCTL_SUBJECT_TYPE_USER:
78651617Snsayer		if (rule->rr_subject.rs_uip == NULL)
78751617Snsayer			return (0);
78879825Sroam		break;
78951617Snsayer	case RCTL_SUBJECT_TYPE_LOGINCLASS:
79051617Snsayer		if (rule->rr_subject.rs_loginclass == NULL)
79151617Snsayer			return (0);
79279825Sroam		break;
79351617Snsayer	case RCTL_SUBJECT_TYPE_JAIL:
79451617Snsayer		if (rule->rr_subject.rs_prison_racct == NULL)
79551617Snsayer			return (0);
79679825Sroam		break;
79779825Sroam	default:
79879825Sroam		panic("rctl_rule_fully_specified: unknown subject type %d",
79979825Sroam		    rule->rr_subject_type);
80079825Sroam	}
80179825Sroam	if (rule->rr_resource == RACCT_UNDEFINED)
80283897Sobrien		return (0);
80379825Sroam	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
80479825Sroam		return (0);
80579825Sroam	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
80670109Sdougb		return (0);
80751617Snsayer	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
80851617Snsayer		return (0);
80951617Snsayer
81053314Sache	return (1);
81153314Sache}
81253314Sache
81353314Sachestatic int
81437106Sjkoshyrctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
81537106Sjkoshy{
81651231Ssheldonh	int error = 0;
81751231Ssheldonh	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
81851231Ssheldonh	     *amountstr, *perstr;
81951231Ssheldonh	struct rctl_rule *rule;
82070109Sdougb	id_t id;
82151231Ssheldonh
82251231Ssheldonh	rule = rctl_rule_alloc(M_WAITOK);
82351231Ssheldonh
82451231Ssheldonh	subjectstr = strsep(&rulestr, ":");
82537106Sjkoshy	subject_idstr = strsep(&rulestr, ":");
82676946Sdd	resourcestr = strsep(&rulestr, ":");
82776946Sdd	actionstr = strsep(&rulestr, "=/");
82876946Sdd	amountstr = strsep(&rulestr, "/");
82976946Sdd	perstr = rulestr;
83076946Sdd
83176946Sdd	if (subjectstr == NULL || subjectstr[0] == '\0')
83276946Sdd		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
83376460Smckusick	else {
83470109Sdougb		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
83570109Sdougb		if (error != 0)
83637Srgrimes			goto out;
83770109Sdougb	}
83837Srgrimes
83970109Sdougb	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
840		rule->rr_subject.rs_proc = NULL;
841		rule->rr_subject.rs_uip = NULL;
842		rule->rr_subject.rs_loginclass = NULL;
843		rule->rr_subject.rs_prison_racct = NULL;
844	} else {
845		switch (rule->rr_subject_type) {
846		case RCTL_SUBJECT_TYPE_UNDEFINED:
847			error = EINVAL;
848			goto out;
849		case RCTL_SUBJECT_TYPE_PROCESS:
850			error = str2id(subject_idstr, &id);
851			if (error != 0)
852				goto out;
853			sx_assert(&allproc_lock, SA_LOCKED);
854			rule->rr_subject.rs_proc = pfind(id);
855			if (rule->rr_subject.rs_proc == NULL) {
856				error = ESRCH;
857				goto out;
858			}
859			PROC_UNLOCK(rule->rr_subject.rs_proc);
860			break;
861		case RCTL_SUBJECT_TYPE_USER:
862			error = str2id(subject_idstr, &id);
863			if (error != 0)
864				goto out;
865			rule->rr_subject.rs_uip = uifind(id);
866			break;
867		case RCTL_SUBJECT_TYPE_LOGINCLASS:
868			rule->rr_subject.rs_loginclass =
869			    loginclass_find(subject_idstr);
870			if (rule->rr_subject.rs_loginclass == NULL) {
871				error = ENAMETOOLONG;
872				goto out;
873			}
874			break;
875		case RCTL_SUBJECT_TYPE_JAIL:
876			rule->rr_subject.rs_prison_racct =
877			    prison_racct_find(subject_idstr);
878			if (rule->rr_subject.rs_prison_racct == NULL) {
879				error = ENAMETOOLONG;
880				goto out;
881			}
882			break;
883               default:
884                       panic("rctl_string_to_rule: unknown subject type %d",
885                           rule->rr_subject_type);
886               }
887	}
888
889	if (resourcestr == NULL || resourcestr[0] == '\0')
890		rule->rr_resource = RACCT_UNDEFINED;
891	else {
892		error = str2value(resourcestr, &rule->rr_resource,
893		    resourcenames);
894		if (error != 0)
895			goto out;
896	}
897
898	if (actionstr == NULL || actionstr[0] == '\0')
899		rule->rr_action = RCTL_ACTION_UNDEFINED;
900	else {
901		error = str2value(actionstr, &rule->rr_action, actionnames);
902		if (error != 0)
903			goto out;
904	}
905
906	if (amountstr == NULL || amountstr[0] == '\0')
907		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
908	else {
909		error = str2int64(amountstr, &rule->rr_amount);
910		if (error != 0)
911			goto out;
912		if (racct_is_in_thousands(rule->rr_resource))
913			rule->rr_amount *= 1000;
914	}
915
916	if (perstr == NULL || perstr[0] == '\0')
917		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
918	else {
919		error = str2value(perstr, &rule->rr_per, subjectnames);
920		if (error != 0)
921			goto out;
922	}
923
924out:
925	if (error == 0)
926		*rulep = rule;
927	else
928		rctl_rule_release(rule);
929
930	return (error);
931}
932
933/*
934 * Link a rule with all the subjects it applies to.
935 */
936int
937rctl_rule_add(struct rctl_rule *rule)
938{
939	struct proc *p;
940	struct ucred *cred;
941	struct uidinfo *uip;
942	struct prison *pr;
943	struct prison_racct *prr;
944	struct loginclass *lc;
945	struct rctl_rule *rule2;
946	int match;
947
948	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
949
950	/*
951	 * Some rules just don't make sense.  Note that the one below
952	 * cannot be rewritten using racct_is_deniable(); the RACCT_PCTCPU,
953	 * for example, is not deniable in the racct sense, but the
954	 * limit is enforced in a different way, so "deny" rules for %CPU
955	 * do make sense.
956	 */
957	if (rule->rr_action == RCTL_ACTION_DENY &&
958	    (rule->rr_resource == RACCT_CPU ||
959	    rule->rr_resource == RACCT_WALLCLOCK))
960		return (EOPNOTSUPP);
961
962	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
963	    racct_is_sloppy(rule->rr_resource))
964		return (EOPNOTSUPP);
965
966	/*
967	 * Make sure there are no duplicated rules.  Also, for the "deny"
968	 * rules, remove ones differing only by "amount".
969	 */
970	if (rule->rr_action == RCTL_ACTION_DENY) {
971		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
972		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
973		rctl_rule_remove(rule2);
974		rctl_rule_release(rule2);
975	} else
976		rctl_rule_remove(rule);
977
978	switch (rule->rr_subject_type) {
979	case RCTL_SUBJECT_TYPE_PROCESS:
980		p = rule->rr_subject.rs_proc;
981		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
982		/*
983		 * No resource limits for system processes.
984		 */
985		if (p->p_flag & P_SYSTEM)
986			return (EPERM);
987
988		rctl_racct_add_rule(p->p_racct, rule);
989		/*
990		 * In case of per-process rule, we don't have anything more
991		 * to do.
992		 */
993		return (0);
994
995	case RCTL_SUBJECT_TYPE_USER:
996		uip = rule->rr_subject.rs_uip;
997		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
998		rctl_racct_add_rule(uip->ui_racct, rule);
999		break;
1000
1001	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1002		lc = rule->rr_subject.rs_loginclass;
1003		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1004		rctl_racct_add_rule(lc->lc_racct, rule);
1005		break;
1006
1007	case RCTL_SUBJECT_TYPE_JAIL:
1008		prr = rule->rr_subject.rs_prison_racct;
1009		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1010		rctl_racct_add_rule(prr->prr_racct, rule);
1011		break;
1012
1013	default:
1014		panic("rctl_rule_add: unknown subject type %d",
1015		    rule->rr_subject_type);
1016	}
1017
1018	/*
1019	 * Now go through all the processes and add the new rule to the ones
1020	 * it applies to.
1021	 */
1022	sx_assert(&allproc_lock, SA_LOCKED);
1023	FOREACH_PROC_IN_SYSTEM(p) {
1024		if (p->p_flag & P_SYSTEM)
1025			continue;
1026		cred = p->p_ucred;
1027		switch (rule->rr_subject_type) {
1028		case RCTL_SUBJECT_TYPE_USER:
1029			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1030			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1031				break;
1032			continue;
1033		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1034			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1035				break;
1036			continue;
1037		case RCTL_SUBJECT_TYPE_JAIL:
1038			match = 0;
1039			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1040				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1041					match = 1;
1042					break;
1043				}
1044			}
1045			if (match)
1046				break;
1047			continue;
1048		default:
1049			panic("rctl_rule_add: unknown subject type %d",
1050			    rule->rr_subject_type);
1051		}
1052
1053		rctl_racct_add_rule(p->p_racct, rule);
1054	}
1055
1056	return (0);
1057}
1058
1059static void
1060rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1061{
1062	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1063	int found = 0;
1064
1065	rw_wlock(&rctl_lock);
1066	found += rctl_racct_remove_rules(racct, filter);
1067	rw_wunlock(&rctl_lock);
1068
1069	*((int *)arg3) += found;
1070}
1071
1072/*
1073 * Remove all rules that match the filter.
1074 */
1075int
1076rctl_rule_remove(struct rctl_rule *filter)
1077{
1078	int found = 0;
1079	struct proc *p;
1080
1081	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1082	    filter->rr_subject.rs_proc != NULL) {
1083		p = filter->rr_subject.rs_proc;
1084		rw_wlock(&rctl_lock);
1085		found = rctl_racct_remove_rules(p->p_racct, filter);
1086		rw_wunlock(&rctl_lock);
1087		if (found)
1088			return (0);
1089		return (ESRCH);
1090	}
1091
1092	loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1093	    (void *)&found);
1094	ui_racct_foreach(rctl_rule_remove_callback, filter,
1095	    (void *)&found);
1096	prison_racct_foreach(rctl_rule_remove_callback, filter,
1097	    (void *)&found);
1098
1099	sx_assert(&allproc_lock, SA_LOCKED);
1100	rw_wlock(&rctl_lock);
1101	FOREACH_PROC_IN_SYSTEM(p) {
1102		found += rctl_racct_remove_rules(p->p_racct, filter);
1103	}
1104	rw_wunlock(&rctl_lock);
1105
1106	if (found)
1107		return (0);
1108	return (ESRCH);
1109}
1110
1111/*
1112 * Appends a rule to the sbuf.
1113 */
1114static void
1115rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1116{
1117	int64_t amount;
1118
1119	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1120
1121	switch (rule->rr_subject_type) {
1122	case RCTL_SUBJECT_TYPE_PROCESS:
1123		if (rule->rr_subject.rs_proc == NULL)
1124			sbuf_printf(sb, ":");
1125		else
1126			sbuf_printf(sb, "%d:",
1127			    rule->rr_subject.rs_proc->p_pid);
1128		break;
1129	case RCTL_SUBJECT_TYPE_USER:
1130		if (rule->rr_subject.rs_uip == NULL)
1131			sbuf_printf(sb, ":");
1132		else
1133			sbuf_printf(sb, "%d:",
1134			    rule->rr_subject.rs_uip->ui_uid);
1135		break;
1136	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1137		if (rule->rr_subject.rs_loginclass == NULL)
1138			sbuf_printf(sb, ":");
1139		else
1140			sbuf_printf(sb, "%s:",
1141			    rule->rr_subject.rs_loginclass->lc_name);
1142		break;
1143	case RCTL_SUBJECT_TYPE_JAIL:
1144		if (rule->rr_subject.rs_prison_racct == NULL)
1145			sbuf_printf(sb, ":");
1146		else
1147			sbuf_printf(sb, "%s:",
1148			    rule->rr_subject.rs_prison_racct->prr_name);
1149		break;
1150	default:
1151		panic("rctl_rule_to_sbuf: unknown subject type %d",
1152		    rule->rr_subject_type);
1153	}
1154
1155	amount = rule->rr_amount;
1156	if (amount != RCTL_AMOUNT_UNDEFINED &&
1157	    racct_is_in_thousands(rule->rr_resource))
1158		amount /= 1000;
1159
1160	sbuf_printf(sb, "%s:%s=%jd",
1161	    rctl_resource_name(rule->rr_resource),
1162	    rctl_action_name(rule->rr_action),
1163	    amount);
1164
1165	if (rule->rr_per != rule->rr_subject_type)
1166		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1167}
1168
1169/*
1170 * Routine used by RCTL syscalls to read in input string.
1171 */
1172static int
1173rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1174{
1175	int error;
1176	char *str;
1177
1178	if (inbuflen <= 0)
1179		return (EINVAL);
1180
1181	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1182	error = copyinstr(inbufp, str, inbuflen, NULL);
1183	if (error != 0) {
1184		free(str, M_RCTL);
1185		return (error);
1186	}
1187
1188	*inputstr = str;
1189
1190	return (0);
1191}
1192
1193/*
1194 * Routine used by RCTL syscalls to write out output string.
1195 */
1196static int
1197rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1198{
1199	int error;
1200
1201	if (outputsbuf == NULL)
1202		return (0);
1203
1204	sbuf_finish(outputsbuf);
1205	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1206		sbuf_delete(outputsbuf);
1207		return (ERANGE);
1208	}
1209	error = copyout(sbuf_data(outputsbuf), outbufp,
1210	    sbuf_len(outputsbuf) + 1);
1211	sbuf_delete(outputsbuf);
1212	return (error);
1213}
1214
1215static struct sbuf *
1216rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1217{
1218	int i;
1219	int64_t amount;
1220	struct sbuf *sb;
1221
1222	sb = sbuf_new_auto();
1223	for (i = 0; i <= RACCT_MAX; i++) {
1224		if (sloppy == 0 && racct_is_sloppy(i))
1225			continue;
1226		amount = racct->r_resources[i];
1227		if (racct_is_in_thousands(i))
1228			amount /= 1000;
1229		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1230	}
1231	sbuf_setpos(sb, sbuf_len(sb) - 1);
1232	return (sb);
1233}
1234
1235int
1236rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1237{
1238	int error;
1239	char *inputstr;
1240	struct rctl_rule *filter;
1241	struct sbuf *outputsbuf = NULL;
1242	struct proc *p;
1243	struct uidinfo *uip;
1244	struct loginclass *lc;
1245	struct prison_racct *prr;
1246
1247	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1248	if (error != 0)
1249		return (error);
1250
1251	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1252	if (error != 0)
1253		return (error);
1254
1255	sx_slock(&allproc_lock);
1256	error = rctl_string_to_rule(inputstr, &filter);
1257	free(inputstr, M_RCTL);
1258	if (error != 0) {
1259		sx_sunlock(&allproc_lock);
1260		return (error);
1261	}
1262
1263	switch (filter->rr_subject_type) {
1264	case RCTL_SUBJECT_TYPE_PROCESS:
1265		p = filter->rr_subject.rs_proc;
1266		if (p == NULL) {
1267			error = EINVAL;
1268			goto out;
1269		}
1270		if (p->p_flag & P_SYSTEM) {
1271			error = EINVAL;
1272			goto out;
1273		}
1274		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1275		break;
1276	case RCTL_SUBJECT_TYPE_USER:
1277		uip = filter->rr_subject.rs_uip;
1278		if (uip == NULL) {
1279			error = EINVAL;
1280			goto out;
1281		}
1282		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1283		break;
1284	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1285		lc = filter->rr_subject.rs_loginclass;
1286		if (lc == NULL) {
1287			error = EINVAL;
1288			goto out;
1289		}
1290		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1291		break;
1292	case RCTL_SUBJECT_TYPE_JAIL:
1293		prr = filter->rr_subject.rs_prison_racct;
1294		if (prr == NULL) {
1295			error = EINVAL;
1296			goto out;
1297		}
1298		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1299		break;
1300	default:
1301		error = EINVAL;
1302	}
1303out:
1304	rctl_rule_release(filter);
1305	sx_sunlock(&allproc_lock);
1306	if (error != 0)
1307		return (error);
1308
1309	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1310
1311	return (error);
1312}
1313
1314static void
1315rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1316{
1317	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1318	struct rctl_rule_link *link;
1319	struct sbuf *sb = (struct sbuf *)arg3;
1320
1321	rw_rlock(&rctl_lock);
1322	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1323		if (!rctl_rule_matches(link->rrl_rule, filter))
1324			continue;
1325		rctl_rule_to_sbuf(sb, link->rrl_rule);
1326		sbuf_printf(sb, ",");
1327	}
1328	rw_runlock(&rctl_lock);
1329}
1330
1331int
1332rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1333{
1334	int error;
1335	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1336	char *inputstr, *buf;
1337	struct sbuf *sb;
1338	struct rctl_rule *filter;
1339	struct rctl_rule_link *link;
1340	struct proc *p;
1341
1342	error = priv_check(td, PRIV_RCTL_GET_RULES);
1343	if (error != 0)
1344		return (error);
1345
1346	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1347	if (error != 0)
1348		return (error);
1349
1350	sx_slock(&allproc_lock);
1351	error = rctl_string_to_rule(inputstr, &filter);
1352	free(inputstr, M_RCTL);
1353	if (error != 0) {
1354		sx_sunlock(&allproc_lock);
1355		return (error);
1356	}
1357
1358again:
1359	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1360	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1361	KASSERT(sb != NULL, ("sbuf_new failed"));
1362
1363	sx_assert(&allproc_lock, SA_LOCKED);
1364	FOREACH_PROC_IN_SYSTEM(p) {
1365		rw_rlock(&rctl_lock);
1366		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1367			/*
1368			 * Non-process rules will be added to the buffer later.
1369			 * Adding them here would result in duplicated output.
1370			 */
1371			if (link->rrl_rule->rr_subject_type !=
1372			    RCTL_SUBJECT_TYPE_PROCESS)
1373				continue;
1374			if (!rctl_rule_matches(link->rrl_rule, filter))
1375				continue;
1376			rctl_rule_to_sbuf(sb, link->rrl_rule);
1377			sbuf_printf(sb, ",");
1378		}
1379		rw_runlock(&rctl_lock);
1380	}
1381
1382	loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1383	ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1384	prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1385	if (sbuf_error(sb) == ENOMEM) {
1386		sbuf_delete(sb);
1387		free(buf, M_RCTL);
1388		bufsize *= 4;
1389		goto again;
1390	}
1391
1392	/*
1393	 * Remove trailing ",".
1394	 */
1395	if (sbuf_len(sb) > 0)
1396		sbuf_setpos(sb, sbuf_len(sb) - 1);
1397
1398	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1399
1400	rctl_rule_release(filter);
1401	sx_sunlock(&allproc_lock);
1402	free(buf, M_RCTL);
1403	return (error);
1404}
1405
1406int
1407rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1408{
1409	int error;
1410	size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1411	char *inputstr, *buf;
1412	struct sbuf *sb;
1413	struct rctl_rule *filter;
1414	struct rctl_rule_link *link;
1415
1416	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1417	if (error != 0)
1418		return (error);
1419
1420	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1421	if (error != 0)
1422		return (error);
1423
1424	sx_slock(&allproc_lock);
1425	error = rctl_string_to_rule(inputstr, &filter);
1426	free(inputstr, M_RCTL);
1427	if (error != 0) {
1428		sx_sunlock(&allproc_lock);
1429		return (error);
1430	}
1431
1432	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1433		rctl_rule_release(filter);
1434		sx_sunlock(&allproc_lock);
1435		return (EINVAL);
1436	}
1437	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1438		rctl_rule_release(filter);
1439		sx_sunlock(&allproc_lock);
1440		return (EOPNOTSUPP);
1441	}
1442	if (filter->rr_subject.rs_proc == NULL) {
1443		rctl_rule_release(filter);
1444		sx_sunlock(&allproc_lock);
1445		return (EINVAL);
1446	}
1447
1448again:
1449	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1450	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1451	KASSERT(sb != NULL, ("sbuf_new failed"));
1452
1453	rw_rlock(&rctl_lock);
1454	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1455	    rrl_next) {
1456		rctl_rule_to_sbuf(sb, link->rrl_rule);
1457		sbuf_printf(sb, ",");
1458	}
1459	rw_runlock(&rctl_lock);
1460	if (sbuf_error(sb) == ENOMEM) {
1461		sbuf_delete(sb);
1462		free(buf, M_RCTL);
1463		bufsize *= 4;
1464		goto again;
1465	}
1466
1467	/*
1468	 * Remove trailing ",".
1469	 */
1470	if (sbuf_len(sb) > 0)
1471		sbuf_setpos(sb, sbuf_len(sb) - 1);
1472
1473	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1474	rctl_rule_release(filter);
1475	sx_sunlock(&allproc_lock);
1476	free(buf, M_RCTL);
1477	return (error);
1478}
1479
1480int
1481rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1482{
1483	int error;
1484	struct rctl_rule *rule;
1485	char *inputstr;
1486
1487	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1488	if (error != 0)
1489		return (error);
1490
1491	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1492	if (error != 0)
1493		return (error);
1494
1495	sx_slock(&allproc_lock);
1496	error = rctl_string_to_rule(inputstr, &rule);
1497	free(inputstr, M_RCTL);
1498	if (error != 0) {
1499		sx_sunlock(&allproc_lock);
1500		return (error);
1501	}
1502	/*
1503	 * The 'per' part of a rule is optional.
1504	 */
1505	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1506	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1507		rule->rr_per = rule->rr_subject_type;
1508
1509	if (!rctl_rule_fully_specified(rule)) {
1510		error = EINVAL;
1511		goto out;
1512	}
1513
1514	error = rctl_rule_add(rule);
1515
1516out:
1517	rctl_rule_release(rule);
1518	sx_sunlock(&allproc_lock);
1519	return (error);
1520}
1521
1522int
1523rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1524{
1525	int error;
1526	struct rctl_rule *filter;
1527	char *inputstr;
1528
1529	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1530	if (error != 0)
1531		return (error);
1532
1533	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1534	if (error != 0)
1535		return (error);
1536
1537	sx_slock(&allproc_lock);
1538	error = rctl_string_to_rule(inputstr, &filter);
1539	free(inputstr, M_RCTL);
1540	if (error != 0) {
1541		sx_sunlock(&allproc_lock);
1542		return (error);
1543	}
1544
1545	error = rctl_rule_remove(filter);
1546	rctl_rule_release(filter);
1547	sx_sunlock(&allproc_lock);
1548
1549	return (error);
1550}
1551
1552/*
1553 * Update RCTL rule list after credential change.
1554 */
1555void
1556rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1557{
1558	int rulecnt, i;
1559	struct rctl_rule_link *link, *newlink;
1560	struct uidinfo *newuip;
1561	struct loginclass *newlc;
1562	struct prison_racct *newprr;
1563	LIST_HEAD(, rctl_rule_link) newrules;
1564
1565	newuip = newcred->cr_ruidinfo;
1566	newlc = newcred->cr_loginclass;
1567	newprr = newcred->cr_prison->pr_prison_racct;
1568
1569	LIST_INIT(&newrules);
1570
1571again:
1572	/*
1573	 * First, count the rules that apply to the process with new
1574	 * credentials.
1575	 */
1576	rulecnt = 0;
1577	rw_rlock(&rctl_lock);
1578	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1579		if (link->rrl_rule->rr_subject_type ==
1580		    RCTL_SUBJECT_TYPE_PROCESS)
1581			rulecnt++;
1582	}
1583	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1584		rulecnt++;
1585	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1586		rulecnt++;
1587	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1588		rulecnt++;
1589	rw_runlock(&rctl_lock);
1590
1591	/*
1592	 * Create temporary list.  We've dropped the rctl_lock in order
1593	 * to use M_WAITOK.
1594	 */
1595	for (i = 0; i < rulecnt; i++) {
1596		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1597		newlink->rrl_rule = NULL;
1598		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1599	}
1600
1601	newlink = LIST_FIRST(&newrules);
1602
1603	/*
1604	 * Assign rules to the newly allocated list entries.
1605	 */
1606	rw_wlock(&rctl_lock);
1607	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1608		if (link->rrl_rule->rr_subject_type ==
1609		    RCTL_SUBJECT_TYPE_PROCESS) {
1610			if (newlink == NULL)
1611				goto goaround;
1612			rctl_rule_acquire(link->rrl_rule);
1613			newlink->rrl_rule = link->rrl_rule;
1614			newlink = LIST_NEXT(newlink, rrl_next);
1615			rulecnt--;
1616		}
1617	}
1618
1619	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1620		if (newlink == NULL)
1621			goto goaround;
1622		rctl_rule_acquire(link->rrl_rule);
1623		newlink->rrl_rule = link->rrl_rule;
1624		newlink = LIST_NEXT(newlink, rrl_next);
1625		rulecnt--;
1626	}
1627
1628	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1629		if (newlink == NULL)
1630			goto goaround;
1631		rctl_rule_acquire(link->rrl_rule);
1632		newlink->rrl_rule = link->rrl_rule;
1633		newlink = LIST_NEXT(newlink, rrl_next);
1634		rulecnt--;
1635	}
1636
1637	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1638		if (newlink == NULL)
1639			goto goaround;
1640		rctl_rule_acquire(link->rrl_rule);
1641		newlink->rrl_rule = link->rrl_rule;
1642		newlink = LIST_NEXT(newlink, rrl_next);
1643		rulecnt--;
1644	}
1645
1646	if (rulecnt == 0) {
1647		/*
1648		 * Free the old rule list.
1649		 */
1650		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1651			link = LIST_FIRST(&p->p_racct->r_rule_links);
1652			LIST_REMOVE(link, rrl_next);
1653			rctl_rule_release(link->rrl_rule);
1654			uma_zfree(rctl_rule_link_zone, link);
1655		}
1656
1657		/*
1658		 * Replace lists and we're done.
1659		 *
1660		 * XXX: Is there any way to switch list heads instead
1661		 *      of iterating here?
1662		 */
1663		while (!LIST_EMPTY(&newrules)) {
1664			newlink = LIST_FIRST(&newrules);
1665			LIST_REMOVE(newlink, rrl_next);
1666			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1667			    newlink, rrl_next);
1668		}
1669
1670		rw_wunlock(&rctl_lock);
1671
1672		return;
1673	}
1674
1675goaround:
1676	rw_wunlock(&rctl_lock);
1677
1678	/*
1679	 * Rule list changed while we were not holding the rctl_lock.
1680	 * Free the new list and try again.
1681	 */
1682	while (!LIST_EMPTY(&newrules)) {
1683		newlink = LIST_FIRST(&newrules);
1684		LIST_REMOVE(newlink, rrl_next);
1685		if (newlink->rrl_rule != NULL)
1686			rctl_rule_release(newlink->rrl_rule);
1687		uma_zfree(rctl_rule_link_zone, newlink);
1688	}
1689
1690	goto again;
1691}
1692
1693/*
1694 * Assign RCTL rules to the newly created process.
1695 */
1696int
1697rctl_proc_fork(struct proc *parent, struct proc *child)
1698{
1699	int error;
1700	struct rctl_rule_link *link;
1701	struct rctl_rule *rule;
1702
1703	LIST_INIT(&child->p_racct->r_rule_links);
1704
1705	/*
1706	 * No limits for kernel processes.
1707	 */
1708	if (child->p_flag & P_SYSTEM)
1709		return (0);
1710
1711	/*
1712	 * Nothing to inherit from P_SYSTEM parents.
1713	 */
1714	if (parent->p_racct == NULL) {
1715		KASSERT(parent->p_flag & P_SYSTEM,
1716		    ("non-system process without racct; p = %p", parent));
1717		return (0);
1718	}
1719
1720	rw_wlock(&rctl_lock);
1721
1722	/*
1723	 * Go through limits applicable to the parent and assign them
1724	 * to the child.  Rules with 'process' subject have to be duplicated
1725	 * in order to make their rr_subject point to the new process.
1726	 */
1727	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1728		if (link->rrl_rule->rr_subject_type ==
1729		    RCTL_SUBJECT_TYPE_PROCESS) {
1730			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1731			if (rule == NULL)
1732				goto fail;
1733			KASSERT(rule->rr_subject.rs_proc == parent,
1734			    ("rule->rr_subject.rs_proc != parent"));
1735			rule->rr_subject.rs_proc = child;
1736			error = rctl_racct_add_rule_locked(child->p_racct,
1737			    rule);
1738			rctl_rule_release(rule);
1739			if (error != 0)
1740				goto fail;
1741		} else {
1742			error = rctl_racct_add_rule_locked(child->p_racct,
1743			    link->rrl_rule);
1744			if (error != 0)
1745				goto fail;
1746		}
1747	}
1748
1749	rw_wunlock(&rctl_lock);
1750	return (0);
1751
1752fail:
1753	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1754		link = LIST_FIRST(&child->p_racct->r_rule_links);
1755		LIST_REMOVE(link, rrl_next);
1756		rctl_rule_release(link->rrl_rule);
1757		uma_zfree(rctl_rule_link_zone, link);
1758	}
1759	rw_wunlock(&rctl_lock);
1760	return (EAGAIN);
1761}
1762
1763/*
1764 * Release rules attached to the racct.
1765 */
1766void
1767rctl_racct_release(struct racct *racct)
1768{
1769	struct rctl_rule_link *link;
1770
1771	rw_wlock(&rctl_lock);
1772	while (!LIST_EMPTY(&racct->r_rule_links)) {
1773		link = LIST_FIRST(&racct->r_rule_links);
1774		LIST_REMOVE(link, rrl_next);
1775		rctl_rule_release(link->rrl_rule);
1776		uma_zfree(rctl_rule_link_zone, link);
1777	}
1778	rw_wunlock(&rctl_lock);
1779}
1780
1781static void
1782rctl_init(void)
1783{
1784
1785	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1786	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1787	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1788	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1789	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1790}
1791
1792#else /* !RCTL */
1793
1794int
1795rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1796{
1797
1798	return (ENOSYS);
1799}
1800
1801int
1802rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1803{
1804
1805	return (ENOSYS);
1806}
1807
1808int
1809rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1810{
1811
1812	return (ENOSYS);
1813}
1814
1815int
1816rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1817{
1818
1819	return (ENOSYS);
1820}
1821
1822int
1823rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1824{
1825
1826	return (ENOSYS);
1827}
1828
1829#endif /* !RCTL */
1830