1220163Strasz/*-
2220163Strasz * Copyright (c) 2010 The FreeBSD Foundation
3220163Strasz * All rights reserved.
4220163Strasz *
5220163Strasz * This software was developed by Edward Tomasz Napierala under sponsorship
6220163Strasz * from the FreeBSD Foundation.
7220163Strasz *
8220163Strasz * Redistribution and use in source and binary forms, with or without
9220163Strasz * modification, are permitted provided that the following conditions
10220163Strasz * are met:
11220163Strasz * 1. Redistributions of source code must retain the above copyright
12220163Strasz *    notice, this list of conditions and the following disclaimer.
13220163Strasz * 2. Redistributions in binary form must reproduce the above copyright
14220163Strasz *    notice, this list of conditions and the following disclaimer in the
15220163Strasz *    documentation and/or other materials provided with the distribution.
16220163Strasz *
17220163Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18220163Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19220163Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20220163Strasz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21220163Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22220163Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23220163Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24220163Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25220163Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26220163Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27220163Strasz * SUCH DAMAGE.
28220163Strasz *
29220163Strasz * $FreeBSD: stable/11/sys/kern/kern_rctl.c 335536 2018-06-22 09:18:38Z avg $
30220163Strasz */
31220163Strasz
32220163Strasz#include <sys/cdefs.h>
33220163Strasz__FBSDID("$FreeBSD: stable/11/sys/kern/kern_rctl.c 335536 2018-06-22 09:18:38Z avg $");
34220163Strasz
35220163Strasz#include <sys/param.h>
36220163Strasz#include <sys/bus.h>
37220163Strasz#include <sys/malloc.h>
38220163Strasz#include <sys/queue.h>
39220163Strasz#include <sys/refcount.h>
40220163Strasz#include <sys/jail.h>
41220163Strasz#include <sys/kernel.h>
42220163Strasz#include <sys/limits.h>
43220163Strasz#include <sys/loginclass.h>
44220163Strasz#include <sys/priv.h>
45220163Strasz#include <sys/proc.h>
46220163Strasz#include <sys/racct.h>
47220163Strasz#include <sys/rctl.h>
48220163Strasz#include <sys/resourcevar.h>
49220163Strasz#include <sys/sx.h>
50220163Strasz#include <sys/sysent.h>
51220163Strasz#include <sys/sysproto.h>
52220163Strasz#include <sys/systm.h>
53220163Strasz#include <sys/types.h>
54220163Strasz#include <sys/eventhandler.h>
55220163Strasz#include <sys/lock.h>
56220163Strasz#include <sys/mutex.h>
57220163Strasz#include <sys/rwlock.h>
58220163Strasz#include <sys/sbuf.h>
59220163Strasz#include <sys/taskqueue.h>
60220163Strasz#include <sys/tree.h>
61220163Strasz#include <vm/uma.h>
62220163Strasz
63220163Strasz#ifdef RCTL
64220163Strasz#ifndef RACCT
65220163Strasz#error "The RCTL option requires the RACCT option"
66220163Strasz#endif
67220163Strasz
68220163StraszFEATURE(rctl, "Resource Limits");
69220163Strasz
70220163Strasz#define	HRF_DEFAULT		0
71220163Strasz#define	HRF_DONT_INHERIT	1
72220163Strasz#define	HRF_DONT_ACCUMULATE	2
73220163Strasz
74290552Strasz#define	RCTL_MAX_INBUFSIZE	4 * 1024
75290552Strasz#define	RCTL_MAX_OUTBUFSIZE	16 * 1024 * 1024
76220163Strasz#define	RCTL_LOG_BUFSIZE	128
77220163Strasz
78242139Strasz#define	RCTL_PCPU_SHIFT		(10 * 1000000)
79242139Strasz
80297633Straszstatic unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
81297510Straszstatic int rctl_log_rate_limit = 10;
82297510Straszstatic int rctl_devctl_rate_limit = 10;
83290551Strasz
84297722Strasz/*
85297722Strasz * Values below are initialized in rctl_init().
86297722Strasz */
87297722Straszstatic int rctl_throttle_min = -1;
88297722Straszstatic int rctl_throttle_max = -1;
89297722Straszstatic int rctl_throttle_pct = -1;
90297722Straszstatic int rctl_throttle_pct2 = -1;
91297722Strasz
92297722Straszstatic int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
93297722Straszstatic int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
94297722Straszstatic int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
95297722Straszstatic int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
96297722Strasz
97290551StraszSYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
98290551StraszSYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
99290551Strasz    &rctl_maxbufsize, 0, "Maximum output buffer size");
100297510StraszSYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
101297510Strasz    &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
102297722StraszSYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
103297510Strasz    &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
104297722StraszSYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
105297722Strasz    CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU",
106297722Strasz    "Shortest throttling duration, in hz");
107297722StraszTUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
108297722StraszSYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
109297722Strasz    CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU",
110297722Strasz    "Longest throttling duration, in hz");
111297722StraszTUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
112297722StraszSYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
113297722Strasz    CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU",
114297633Strasz    "Throttling penalty for process consumption, in percent");
115297722StraszTUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
116297722StraszSYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
117297722Strasz    CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU",
118297633Strasz    "Throttling penalty for container consumption, in percent");
119297722StraszTUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
120290551Strasz
121220163Strasz/*
122220163Strasz * 'rctl_rule_link' connects a rule with every racct it's related to.
123220163Strasz * For example, rule 'user:X:openfiles:deny=N/process' is linked
124220163Strasz * with uidinfo for user X, and to each process of that user.
125220163Strasz */
126220163Straszstruct rctl_rule_link {
127220163Strasz	LIST_ENTRY(rctl_rule_link)	rrl_next;
128220163Strasz	struct rctl_rule		*rrl_rule;
129220163Strasz	int				rrl_exceeded;
130220163Strasz};
131220163Strasz
132220163Straszstruct dict {
133220163Strasz	const char	*d_name;
134220163Strasz	int		d_value;
135220163Strasz};
136220163Strasz
137220163Straszstatic struct dict subjectnames[] = {
138220163Strasz	{ "process", RCTL_SUBJECT_TYPE_PROCESS },
139220163Strasz	{ "user", RCTL_SUBJECT_TYPE_USER },
140220163Strasz	{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
141220163Strasz	{ "jail", RCTL_SUBJECT_TYPE_JAIL },
142220163Strasz	{ NULL, -1 }};
143220163Strasz
144220163Straszstatic struct dict resourcenames[] = {
145224036Strasz	{ "cputime", RACCT_CPU },
146224036Strasz	{ "datasize", RACCT_DATA },
147224036Strasz	{ "stacksize", RACCT_STACK },
148224036Strasz	{ "coredumpsize", RACCT_CORE },
149224036Strasz	{ "memoryuse", RACCT_RSS },
150224036Strasz	{ "memorylocked", RACCT_MEMLOCK },
151224036Strasz	{ "maxproc", RACCT_NPROC },
152224036Strasz	{ "openfiles", RACCT_NOFILE },
153224036Strasz	{ "vmemoryuse", RACCT_VMEM },
154224036Strasz	{ "pseudoterminals", RACCT_NPTS },
155224036Strasz	{ "swapuse", RACCT_SWAP },
156220163Strasz	{ "nthr", RACCT_NTHR },
157220163Strasz	{ "msgqqueued", RACCT_MSGQQUEUED },
158220163Strasz	{ "msgqsize", RACCT_MSGQSIZE },
159220163Strasz	{ "nmsgq", RACCT_NMSGQ },
160220163Strasz	{ "nsem", RACCT_NSEM },
161220163Strasz	{ "nsemop", RACCT_NSEMOP },
162220163Strasz	{ "nshm", RACCT_NSHM },
163220163Strasz	{ "shmsize", RACCT_SHMSIZE },
164220163Strasz	{ "wallclock", RACCT_WALLCLOCK },
165242139Strasz	{ "pcpu", RACCT_PCTCPU },
166297633Strasz	{ "readbps", RACCT_READBPS },
167297633Strasz	{ "writebps", RACCT_WRITEBPS },
168297633Strasz	{ "readiops", RACCT_READIOPS },
169297633Strasz	{ "writeiops", RACCT_WRITEIOPS },
170220163Strasz	{ NULL, -1 }};
171220163Strasz
172220163Straszstatic struct dict actionnames[] = {
173220163Strasz	{ "sighup", RCTL_ACTION_SIGHUP },
174220163Strasz	{ "sigint", RCTL_ACTION_SIGINT },
175220163Strasz	{ "sigquit", RCTL_ACTION_SIGQUIT },
176220163Strasz	{ "sigill", RCTL_ACTION_SIGILL },
177220163Strasz	{ "sigtrap", RCTL_ACTION_SIGTRAP },
178220163Strasz	{ "sigabrt", RCTL_ACTION_SIGABRT },
179220163Strasz	{ "sigemt", RCTL_ACTION_SIGEMT },
180220163Strasz	{ "sigfpe", RCTL_ACTION_SIGFPE },
181220163Strasz	{ "sigkill", RCTL_ACTION_SIGKILL },
182220163Strasz	{ "sigbus", RCTL_ACTION_SIGBUS },
183220163Strasz	{ "sigsegv", RCTL_ACTION_SIGSEGV },
184220163Strasz	{ "sigsys", RCTL_ACTION_SIGSYS },
185220163Strasz	{ "sigpipe", RCTL_ACTION_SIGPIPE },
186220163Strasz	{ "sigalrm", RCTL_ACTION_SIGALRM },
187220163Strasz	{ "sigterm", RCTL_ACTION_SIGTERM },
188220163Strasz	{ "sigurg", RCTL_ACTION_SIGURG },
189220163Strasz	{ "sigstop", RCTL_ACTION_SIGSTOP },
190220163Strasz	{ "sigtstp", RCTL_ACTION_SIGTSTP },
191220163Strasz	{ "sigchld", RCTL_ACTION_SIGCHLD },
192220163Strasz	{ "sigttin", RCTL_ACTION_SIGTTIN },
193220163Strasz	{ "sigttou", RCTL_ACTION_SIGTTOU },
194220163Strasz	{ "sigio", RCTL_ACTION_SIGIO },
195220163Strasz	{ "sigxcpu", RCTL_ACTION_SIGXCPU },
196220163Strasz	{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
197220163Strasz	{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
198220163Strasz	{ "sigprof", RCTL_ACTION_SIGPROF },
199220163Strasz	{ "sigwinch", RCTL_ACTION_SIGWINCH },
200220163Strasz	{ "siginfo", RCTL_ACTION_SIGINFO },
201220163Strasz	{ "sigusr1", RCTL_ACTION_SIGUSR1 },
202220163Strasz	{ "sigusr2", RCTL_ACTION_SIGUSR2 },
203220163Strasz	{ "sigthr", RCTL_ACTION_SIGTHR },
204220163Strasz	{ "deny", RCTL_ACTION_DENY },
205220163Strasz	{ "log", RCTL_ACTION_LOG },
206220163Strasz	{ "devctl", RCTL_ACTION_DEVCTL },
207297633Strasz	{ "throttle", RCTL_ACTION_THROTTLE },
208220163Strasz	{ NULL, -1 }};
209220163Strasz
210220163Straszstatic void rctl_init(void);
211220163StraszSYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
212220163Strasz
213298045Straszstatic uma_zone_t rctl_rule_zone;
214220163Straszstatic uma_zone_t rctl_rule_link_zone;
215220163Strasz
216220163Straszstatic int rctl_rule_fully_specified(const struct rctl_rule *rule);
217220163Straszstatic void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
218220163Strasz
219227293Sedstatic MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
220220163Strasz
221297722Straszstatic int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
222297722Strasz{
223298045Strasz	int error, val = rctl_throttle_min;
224297722Strasz
225297722Strasz	error = sysctl_handle_int(oidp, &val, 0, req);
226297722Strasz	if (error || !req->newptr)
227297722Strasz		return (error);
228297722Strasz	if (val < 1 || val > rctl_throttle_max)
229297722Strasz		return (EINVAL);
230297722Strasz
231298414Strasz	RACCT_LOCK();
232297722Strasz	rctl_throttle_min = val;
233298414Strasz	RACCT_UNLOCK();
234297722Strasz
235297722Strasz	return (0);
236297722Strasz}
237297722Strasz
238297722Straszstatic int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
239297722Strasz{
240298045Strasz	int error, val = rctl_throttle_max;
241297722Strasz
242297722Strasz	error = sysctl_handle_int(oidp, &val, 0, req);
243297722Strasz	if (error || !req->newptr)
244297722Strasz		return (error);
245297722Strasz	if (val < rctl_throttle_min)
246297722Strasz		return (EINVAL);
247297722Strasz
248298414Strasz	RACCT_LOCK();
249297722Strasz	rctl_throttle_max = val;
250298414Strasz	RACCT_UNLOCK();
251297722Strasz
252297722Strasz	return (0);
253297722Strasz}
254297722Strasz
255297722Straszstatic int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
256297722Strasz{
257298045Strasz	int error, val = rctl_throttle_pct;
258297722Strasz
259297722Strasz	error = sysctl_handle_int(oidp, &val, 0, req);
260297722Strasz	if (error || !req->newptr)
261297722Strasz		return (error);
262297722Strasz	if (val < 0)
263297722Strasz		return (EINVAL);
264297722Strasz
265298414Strasz	RACCT_LOCK();
266297722Strasz	rctl_throttle_pct = val;
267298414Strasz	RACCT_UNLOCK();
268297722Strasz
269297722Strasz	return (0);
270297722Strasz}
271297722Strasz
272297722Straszstatic int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
273297722Strasz{
274298045Strasz	int error, val = rctl_throttle_pct2;
275297722Strasz
276297722Strasz	error = sysctl_handle_int(oidp, &val, 0, req);
277297722Strasz	if (error || !req->newptr)
278297722Strasz		return (error);
279297722Strasz	if (val < 0)
280297722Strasz		return (EINVAL);
281297722Strasz
282298414Strasz	RACCT_LOCK();
283297722Strasz	rctl_throttle_pct2 = val;
284298414Strasz	RACCT_UNLOCK();
285297722Strasz
286297722Strasz	return (0);
287297722Strasz}
288297722Strasz
289220163Straszstatic const char *
290220163Straszrctl_subject_type_name(int subject)
291220163Strasz{
292220163Strasz	int i;
293220163Strasz
294220163Strasz	for (i = 0; subjectnames[i].d_name != NULL; i++) {
295220163Strasz		if (subjectnames[i].d_value == subject)
296220163Strasz			return (subjectnames[i].d_name);
297220163Strasz	}
298220163Strasz
299220163Strasz	panic("rctl_subject_type_name: unknown subject type %d", subject);
300220163Strasz}
301220163Strasz
302220163Straszstatic const char *
303220163Straszrctl_action_name(int action)
304220163Strasz{
305220163Strasz	int i;
306220163Strasz
307220163Strasz	for (i = 0; actionnames[i].d_name != NULL; i++) {
308220163Strasz		if (actionnames[i].d_value == action)
309220163Strasz			return (actionnames[i].d_name);
310220163Strasz	}
311220163Strasz
312220163Strasz	panic("rctl_action_name: unknown action %d", action);
313220163Strasz}
314220163Strasz
315220163Straszconst char *
316220163Straszrctl_resource_name(int resource)
317220163Strasz{
318220163Strasz	int i;
319220163Strasz
320220163Strasz	for (i = 0; resourcenames[i].d_name != NULL; i++) {
321220163Strasz		if (resourcenames[i].d_value == resource)
322220163Strasz			return (resourcenames[i].d_name);
323220163Strasz	}
324220163Strasz
325220163Strasz	panic("rctl_resource_name: unknown resource %d", resource);
326220163Strasz}
327220163Strasz
328297465Straszstatic struct racct *
329297465Straszrctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
330220163Strasz{
331220163Strasz	struct ucred *cred = p->p_ucred;
332220163Strasz
333282213Strasz	ASSERT_RACCT_ENABLED();
334298414Strasz	RACCT_LOCK_ASSERT();
335220163Strasz
336220163Strasz	switch (rule->rr_per) {
337220163Strasz	case RCTL_SUBJECT_TYPE_PROCESS:
338297465Strasz		return (p->p_racct);
339220163Strasz	case RCTL_SUBJECT_TYPE_USER:
340297465Strasz		return (cred->cr_ruidinfo->ui_racct);
341220163Strasz	case RCTL_SUBJECT_TYPE_LOGINCLASS:
342297465Strasz		return (cred->cr_loginclass->lc_racct);
343220163Strasz	case RCTL_SUBJECT_TYPE_JAIL:
344297465Strasz		return (cred->cr_prison->pr_prison_racct->prr_racct);
345220163Strasz	default:
346297465Strasz		panic("%s: unknown per %d", __func__, rule->rr_per);
347220163Strasz	}
348297465Strasz}
349220163Strasz
350297465Strasz/*
351297465Strasz * Return the amount of resource that can be allocated by 'p' before
352297465Strasz * hitting 'rule'.
353297465Strasz */
354297465Straszstatic int64_t
355297465Straszrctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
356297465Strasz{
357298045Strasz	const struct racct *racct;
358297465Strasz	int64_t available;
359297465Strasz
360297465Strasz	ASSERT_RACCT_ENABLED();
361298414Strasz	RACCT_LOCK_ASSERT();
362297465Strasz
363297465Strasz	racct = rctl_proc_rule_to_racct(p, rule);
364297465Strasz	available = rule->rr_amount - racct->r_resources[rule->rr_resource];
365297465Strasz
366220163Strasz	return (available);
367220163Strasz}
368220163Strasz
369220163Strasz/*
370297633Strasz * Called every second for proc, uidinfo, loginclass, and jail containers.
371297633Strasz * If the limit isn't exceeded, it decreases the usage amount to zero.
372297633Strasz * Otherwise, it decreases it by the value of the limit.  This way
373297633Strasz * resource consumption exceeding the limit "carries over" to the next
374297633Strasz * period.
375220163Strasz */
376297633Straszvoid
377297633Straszrctl_throttle_decay(struct racct *racct, int resource)
378220163Strasz{
379297633Strasz	struct rctl_rule *rule;
380297633Strasz	struct rctl_rule_link *link;
381297633Strasz	int64_t minavailable;
382220163Strasz
383282213Strasz	ASSERT_RACCT_ENABLED();
384298414Strasz	RACCT_LOCK_ASSERT();
385282213Strasz
386297633Strasz	minavailable = INT64_MAX;
387220163Strasz
388297633Strasz	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
389297633Strasz		rule = link->rrl_rule;
390297633Strasz
391297633Strasz		if (rule->rr_resource != resource)
392297633Strasz			continue;
393297633Strasz		if (rule->rr_action != RCTL_ACTION_THROTTLE)
394297633Strasz			continue;
395297633Strasz
396297633Strasz		if (rule->rr_amount < minavailable)
397297633Strasz			minavailable = rule->rr_amount;
398297633Strasz	}
399297633Strasz
400297633Strasz	if (racct->r_resources[resource] < minavailable) {
401297633Strasz		racct->r_resources[resource] = 0;
402297633Strasz	} else {
403297633Strasz		/*
404297633Strasz		 * Cap utilization counter at ten times the limit.  Otherwise,
405297633Strasz		 * if we changed the rule lowering the allowed amount, it could
406297633Strasz		 * take unreasonably long time for the accumulated resource
407297633Strasz		 * usage to drop.
408297633Strasz		 */
409297633Strasz		if (racct->r_resources[resource] > minavailable * 10)
410297633Strasz			racct->r_resources[resource] = minavailable * 10;
411297633Strasz
412297633Strasz		racct->r_resources[resource] -= minavailable;
413297633Strasz	}
414220163Strasz}
415220163Strasz
416220163Strasz/*
417292162Strasz * Special version of rctl_get_available() for the %CPU resource.
418242139Strasz * We slightly cheat here and return less than we normally would.
419242139Strasz */
420242139Straszint64_t
421242139Straszrctl_pcpu_available(const struct proc *p) {
422242139Strasz	struct rctl_rule *rule;
423242139Strasz	struct rctl_rule_link *link;
424242139Strasz	int64_t available, minavailable, limit;
425242139Strasz
426282213Strasz	ASSERT_RACCT_ENABLED();
427298414Strasz	RACCT_LOCK_ASSERT();
428282213Strasz
429242139Strasz	minavailable = INT64_MAX;
430242139Strasz	limit = 0;
431242139Strasz
432242139Strasz	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
433242139Strasz		rule = link->rrl_rule;
434242139Strasz		if (rule->rr_resource != RACCT_PCTCPU)
435242139Strasz			continue;
436242139Strasz		if (rule->rr_action != RCTL_ACTION_DENY)
437242139Strasz			continue;
438242139Strasz		available = rctl_available_resource(p, rule);
439242139Strasz		if (available < minavailable) {
440242139Strasz			minavailable = available;
441242139Strasz			limit = rule->rr_amount;
442242139Strasz		}
443242139Strasz	}
444242139Strasz
445242139Strasz	/*
446242139Strasz	 * Return slightly less than actual value of the available
447298819Spfg	 * %cpu resource.  This makes %cpu throttling more aggressive
448242139Strasz	 * and lets us act sooner than the limits are already exceeded.
449242139Strasz	 */
450242139Strasz	if (limit != 0) {
451242139Strasz		if (limit > 2 * RCTL_PCPU_SHIFT)
452242139Strasz			minavailable -= RCTL_PCPU_SHIFT;
453242139Strasz		else
454242139Strasz			minavailable -= (limit / 2);
455242139Strasz	}
456242139Strasz
457242139Strasz	return (minavailable);
458242139Strasz}
459242139Strasz
460297633Straszstatic uint64_t
461297633Straszxadd(uint64_t a, uint64_t b)
462297633Strasz{
463297633Strasz	uint64_t c;
464297633Strasz
465297633Strasz	c = a + b;
466297633Strasz
467297633Strasz	/*
468297633Strasz	 * Detect overflow.
469297633Strasz	 */
470297633Strasz	if (c < a || c < b)
471297633Strasz		return (UINT64_MAX);
472297633Strasz
473297633Strasz	return (c);
474297633Strasz}
475297633Strasz
476297633Straszstatic uint64_t
477297633Straszxmul(uint64_t a, uint64_t b)
478297633Strasz{
479297633Strasz
480297864Strasz	if (b != 0 && a > UINT64_MAX / b)
481297633Strasz		return (UINT64_MAX);
482297633Strasz
483297864Strasz	return (a * b);
484297633Strasz}
485297633Strasz
486242139Strasz/*
487220163Strasz * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
488220163Strasz * to what it keeps allocated now.  Returns non-zero if the allocation should
489220163Strasz * be denied, 0 otherwise.
490220163Strasz */
491220163Straszint
492220163Straszrctl_enforce(struct proc *p, int resource, uint64_t amount)
493220163Strasz{
494297510Strasz	static struct timeval log_lasttime, devctl_lasttime;
495297510Strasz	static int log_curtime = 0, devctl_curtime = 0;
496220163Strasz	struct rctl_rule *rule;
497220163Strasz	struct rctl_rule_link *link;
498220163Strasz	struct sbuf sb;
499298045Strasz	char *buf;
500297633Strasz	int64_t available;
501297633Strasz	uint64_t sleep_ms, sleep_ratio;
502220163Strasz	int should_deny = 0;
503220163Strasz
504282213Strasz	ASSERT_RACCT_ENABLED();
505298414Strasz	RACCT_LOCK_ASSERT();
506282213Strasz
507220163Strasz	/*
508220163Strasz	 * There may be more than one matching rule; go through all of them.
509220163Strasz	 * Denial should be done last, after logging and sending signals.
510220163Strasz	 */
511220163Strasz	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
512220163Strasz		rule = link->rrl_rule;
513220163Strasz		if (rule->rr_resource != resource)
514220163Strasz			continue;
515297633Strasz
516297633Strasz		available = rctl_available_resource(p, rule);
517297633Strasz		if (available >= (int64_t)amount) {
518220163Strasz			link->rrl_exceeded = 0;
519220163Strasz			continue;
520220163Strasz		}
521220163Strasz
522220163Strasz		switch (rule->rr_action) {
523220163Strasz		case RCTL_ACTION_DENY:
524220163Strasz			should_deny = 1;
525220163Strasz			continue;
526220163Strasz		case RCTL_ACTION_LOG:
527220163Strasz			/*
528220163Strasz			 * If rrl_exceeded != 0, it means we've already
529220163Strasz			 * logged a warning for this process.
530220163Strasz			 */
531220163Strasz			if (link->rrl_exceeded != 0)
532220163Strasz				continue;
533220163Strasz
534225940Strasz			/*
535225940Strasz			 * If the process state is not fully initialized yet,
536225940Strasz			 * we can't access most of the required fields, e.g.
537225940Strasz			 * p->p_comm.  This happens when called from fork1().
538225940Strasz			 * Ignore this rule for now; it will be processed just
539225940Strasz			 * after fork, when called from racct_proc_fork_done().
540225940Strasz			 */
541225940Strasz			if (p->p_state != PRS_NORMAL)
542225940Strasz				continue;
543225940Strasz
544297510Strasz			if (!ppsratecheck(&log_lasttime, &log_curtime,
545297510Strasz			    rctl_log_rate_limit))
546220163Strasz				continue;
547220163Strasz
548220163Strasz			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
549220163Strasz			if (buf == NULL) {
550220163Strasz				printf("rctl_enforce: out of memory\n");
551220163Strasz				continue;
552220163Strasz			}
553220163Strasz			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
554220163Strasz			rctl_rule_to_sbuf(&sb, rule);
555220163Strasz			sbuf_finish(&sb);
556220163Strasz			printf("rctl: rule \"%s\" matched by pid %d "
557220163Strasz			    "(%s), uid %d, jail %s\n", sbuf_data(&sb),
558220163Strasz			    p->p_pid, p->p_comm, p->p_ucred->cr_uid,
559221362Strasz			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
560220163Strasz			sbuf_delete(&sb);
561220163Strasz			free(buf, M_RCTL);
562220163Strasz			link->rrl_exceeded = 1;
563220163Strasz			continue;
564220163Strasz		case RCTL_ACTION_DEVCTL:
565220163Strasz			if (link->rrl_exceeded != 0)
566220163Strasz				continue;
567220163Strasz
568225940Strasz			if (p->p_state != PRS_NORMAL)
569225940Strasz				continue;
570297633Strasz
571297510Strasz			if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
572297510Strasz			    rctl_devctl_rate_limit))
573297510Strasz				continue;
574297510Strasz
575220163Strasz			buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
576220163Strasz			if (buf == NULL) {
577220163Strasz				printf("rctl_enforce: out of memory\n");
578220163Strasz				continue;
579220163Strasz			}
580220163Strasz			sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
581220163Strasz			sbuf_printf(&sb, "rule=");
582220163Strasz			rctl_rule_to_sbuf(&sb, rule);
583220163Strasz			sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
584220163Strasz			    p->p_pid, p->p_ucred->cr_ruid,
585221362Strasz			    p->p_ucred->cr_prison->pr_prison_racct->prr_name);
586220163Strasz			sbuf_finish(&sb);
587220163Strasz			devctl_notify_f("RCTL", "rule", "matched",
588220163Strasz			    sbuf_data(&sb), M_NOWAIT);
589220163Strasz			sbuf_delete(&sb);
590220163Strasz			free(buf, M_RCTL);
591220163Strasz			link->rrl_exceeded = 1;
592220163Strasz			continue;
593297633Strasz		case RCTL_ACTION_THROTTLE:
594297633Strasz			if (p->p_state != PRS_NORMAL)
595297633Strasz				continue;
596297633Strasz
597297633Strasz			/*
598297633Strasz			 * Make the process sleep for a fraction of second
599297633Strasz			 * proportional to the ratio of process' resource
600297633Strasz			 * utilization compared to the limit.  The point is
601297633Strasz			 * to penalize resource hogs: processes that consume
602297633Strasz			 * more of the available resources sleep for longer.
603297633Strasz			 *
604297633Strasz			 * We're trying to defer division until the very end,
605297633Strasz			 * to minimize the rounding effects.  The following
606297633Strasz			 * calculation could have been written in a clearer
607297633Strasz			 * way like this:
608297633Strasz			 *
609297633Strasz			 * sleep_ms = hz * p->p_racct->r_resources[resource] /
610297633Strasz			 *     rule->rr_amount;
611297633Strasz			 * sleep_ms *= rctl_throttle_pct / 100;
612297633Strasz			 * if (sleep_ms < rctl_throttle_min)
613297633Strasz			 *         sleep_ms = rctl_throttle_min;
614297633Strasz			 *
615297633Strasz			 */
616297633Strasz			sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
617297633Strasz			sleep_ms = xmul(sleep_ms,  rctl_throttle_pct) / 100;
618297633Strasz			if (sleep_ms < rctl_throttle_min * rule->rr_amount)
619297633Strasz				sleep_ms = rctl_throttle_min * rule->rr_amount;
620297633Strasz
621297633Strasz			/*
622297633Strasz			 * Multiply that by the ratio of the resource
623297633Strasz			 * consumption for the container compared to the limit,
624297633Strasz			 * squared.  In other words, a process in a container
625297633Strasz			 * that is two times over the limit will be throttled
626297633Strasz			 * four times as much for hitting the same rule.  The
627297633Strasz			 * point is to penalize processes more if the container
628297633Strasz			 * itself (eg certain UID or jail) is above the limit.
629297633Strasz			 */
630297633Strasz			if (available < 0)
631297633Strasz				sleep_ratio = -available / rule->rr_amount;
632297633Strasz			else
633297633Strasz				sleep_ratio = 0;
634297633Strasz			sleep_ratio = xmul(sleep_ratio, sleep_ratio);
635297633Strasz			sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
636297633Strasz			sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
637297633Strasz
638297633Strasz			/*
639297633Strasz			 * Finally the division.
640297633Strasz			 */
641297633Strasz			sleep_ms /= rule->rr_amount;
642297633Strasz
643297633Strasz			if (sleep_ms > rctl_throttle_max)
644297633Strasz				sleep_ms = rctl_throttle_max;
645297633Strasz#if 0
646298267Strasz			printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
647297633Strasz			   __func__, p->p_pid, p->p_comm,
648297633Strasz			   p->p_racct->r_resources[resource],
649298267Strasz			   rule->rr_amount, (uintmax_t)sleep_ms,
650298267Strasz			   (uintmax_t)sleep_ratio, (intmax_t)available);
651297633Strasz#endif
652297633Strasz
653297633Strasz			KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
654297633Strasz			    __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
655297633Strasz			racct_proc_throttle(p, sleep_ms);
656297633Strasz			continue;
657220163Strasz		default:
658220163Strasz			if (link->rrl_exceeded != 0)
659220163Strasz				continue;
660220163Strasz
661225940Strasz			if (p->p_state != PRS_NORMAL)
662225940Strasz				continue;
663225940Strasz
664220163Strasz			KASSERT(rule->rr_action > 0 &&
665220163Strasz			    rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
666220163Strasz			    ("rctl_enforce: unknown action %d",
667220163Strasz			     rule->rr_action));
668220163Strasz
669220163Strasz			/*
670220163Strasz			 * We're using the fact that RCTL_ACTION_SIG* values
671220163Strasz			 * are equal to their counterparts from sys/signal.h.
672220163Strasz			 */
673225617Skmacy			kern_psignal(p, rule->rr_action);
674220163Strasz			link->rrl_exceeded = 1;
675220163Strasz			continue;
676220163Strasz		}
677220163Strasz	}
678220163Strasz
679220163Strasz	if (should_deny) {
680220163Strasz		/*
681220163Strasz		 * Return fake error code; the caller should change it
682220163Strasz		 * into one proper for the situation - EFSIZ, ENOMEM etc.
683220163Strasz		 */
684220163Strasz		return (EDOOFUS);
685220163Strasz	}
686220163Strasz
687220163Strasz	return (0);
688220163Strasz}
689220163Strasz
690220163Straszuint64_t
691220163Straszrctl_get_limit(struct proc *p, int resource)
692220163Strasz{
693220163Strasz	struct rctl_rule *rule;
694220163Strasz	struct rctl_rule_link *link;
695220163Strasz	uint64_t amount = UINT64_MAX;
696220163Strasz
697282213Strasz	ASSERT_RACCT_ENABLED();
698298414Strasz	RACCT_LOCK_ASSERT();
699282213Strasz
700220163Strasz	/*
701220163Strasz	 * There may be more than one matching rule; go through all of them.
702220163Strasz	 * Denial should be done last, after logging and sending signals.
703220163Strasz	 */
704220163Strasz	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
705220163Strasz		rule = link->rrl_rule;
706220163Strasz		if (rule->rr_resource != resource)
707220163Strasz			continue;
708220163Strasz		if (rule->rr_action != RCTL_ACTION_DENY)
709220163Strasz			continue;
710220163Strasz		if (rule->rr_amount < amount)
711220163Strasz			amount = rule->rr_amount;
712220163Strasz	}
713220163Strasz
714220163Strasz	return (amount);
715220163Strasz}
716220163Strasz
717220163Straszuint64_t
718220163Straszrctl_get_available(struct proc *p, int resource)
719220163Strasz{
720220163Strasz	struct rctl_rule *rule;
721220163Strasz	struct rctl_rule_link *link;
722220163Strasz	int64_t available, minavailable, allocated;
723220163Strasz
724220163Strasz	minavailable = INT64_MAX;
725220163Strasz
726282213Strasz	ASSERT_RACCT_ENABLED();
727298414Strasz	RACCT_LOCK_ASSERT();
728282213Strasz
729220163Strasz	/*
730220163Strasz	 * There may be more than one matching rule; go through all of them.
731220163Strasz	 * Denial should be done last, after logging and sending signals.
732220163Strasz	 */
733220163Strasz	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
734220163Strasz		rule = link->rrl_rule;
735220163Strasz		if (rule->rr_resource != resource)
736220163Strasz			continue;
737220163Strasz		if (rule->rr_action != RCTL_ACTION_DENY)
738220163Strasz			continue;
739220163Strasz		available = rctl_available_resource(p, rule);
740220163Strasz		if (available < minavailable)
741220163Strasz			minavailable = available;
742220163Strasz	}
743220163Strasz
744220163Strasz	/*
745220163Strasz	 * XXX: Think about this _hard_.
746220163Strasz	 */
747220163Strasz	allocated = p->p_racct->r_resources[resource];
748220163Strasz	if (minavailable < INT64_MAX - allocated)
749220163Strasz		minavailable += allocated;
750220163Strasz	if (minavailable < 0)
751220163Strasz		minavailable = 0;
752298414Strasz
753220163Strasz	return (minavailable);
754220163Strasz}
755220163Strasz
756220163Straszstatic int
757220163Straszrctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
758220163Strasz{
759220163Strasz
760282213Strasz	ASSERT_RACCT_ENABLED();
761282213Strasz
762220163Strasz	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
763220163Strasz		if (rule->rr_subject_type != filter->rr_subject_type)
764220163Strasz			return (0);
765220163Strasz
766220163Strasz		switch (filter->rr_subject_type) {
767220163Strasz		case RCTL_SUBJECT_TYPE_PROCESS:
768220163Strasz			if (filter->rr_subject.rs_proc != NULL &&
769220163Strasz			    rule->rr_subject.rs_proc !=
770220163Strasz			    filter->rr_subject.rs_proc)
771220163Strasz				return (0);
772220163Strasz			break;
773220163Strasz		case RCTL_SUBJECT_TYPE_USER:
774220163Strasz			if (filter->rr_subject.rs_uip != NULL &&
775220163Strasz			    rule->rr_subject.rs_uip !=
776220163Strasz			    filter->rr_subject.rs_uip)
777220163Strasz				return (0);
778220163Strasz			break;
779220163Strasz		case RCTL_SUBJECT_TYPE_LOGINCLASS:
780220527Strasz			if (filter->rr_subject.rs_loginclass != NULL &&
781220527Strasz			    rule->rr_subject.rs_loginclass !=
782220527Strasz			    filter->rr_subject.rs_loginclass)
783220163Strasz				return (0);
784220163Strasz			break;
785220163Strasz		case RCTL_SUBJECT_TYPE_JAIL:
786221362Strasz			if (filter->rr_subject.rs_prison_racct != NULL &&
787221362Strasz			    rule->rr_subject.rs_prison_racct !=
788221362Strasz			    filter->rr_subject.rs_prison_racct)
789220163Strasz				return (0);
790220163Strasz			break;
791220163Strasz		default:
792220163Strasz			panic("rctl_rule_matches: unknown subject type %d",
793220163Strasz			    filter->rr_subject_type);
794220163Strasz		}
795220163Strasz	}
796220163Strasz
797220163Strasz	if (filter->rr_resource != RACCT_UNDEFINED) {
798220163Strasz		if (rule->rr_resource != filter->rr_resource)
799220163Strasz			return (0);
800220163Strasz	}
801220163Strasz
802220163Strasz	if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
803220163Strasz		if (rule->rr_action != filter->rr_action)
804220163Strasz			return (0);
805220163Strasz	}
806220163Strasz
807220163Strasz	if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
808220163Strasz		if (rule->rr_amount != filter->rr_amount)
809220163Strasz			return (0);
810220163Strasz	}
811220163Strasz
812220163Strasz	if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
813220163Strasz		if (rule->rr_per != filter->rr_per)
814220163Strasz			return (0);
815220163Strasz	}
816220163Strasz
817220163Strasz	return (1);
818220163Strasz}
819220163Strasz
820220163Straszstatic int
821220163Straszstr2value(const char *str, int *value, struct dict *table)
822220163Strasz{
823220163Strasz	int i;
824220163Strasz
825220163Strasz	if (value == NULL)
826220163Strasz		return (EINVAL);
827220163Strasz
828220163Strasz	for (i = 0; table[i].d_name != NULL; i++) {
829220163Strasz		if (strcasecmp(table[i].d_name, str) == 0) {
830220163Strasz			*value =  table[i].d_value;
831220163Strasz			return (0);
832220163Strasz		}
833220163Strasz	}
834220163Strasz
835220163Strasz	return (EINVAL);
836220163Strasz}
837220163Strasz
838220163Straszstatic int
839220163Straszstr2id(const char *str, id_t *value)
840220163Strasz{
841220163Strasz	char *end;
842220163Strasz
843220163Strasz	if (str == NULL)
844220163Strasz		return (EINVAL);
845220163Strasz
846220163Strasz	*value = strtoul(str, &end, 10);
847220163Strasz	if ((size_t)(end - str) != strlen(str))
848220163Strasz		return (EINVAL);
849220163Strasz
850220163Strasz	return (0);
851220163Strasz}
852220163Strasz
853220163Straszstatic int
854220163Straszstr2int64(const char *str, int64_t *value)
855220163Strasz{
856220163Strasz	char *end;
857220163Strasz
858220163Strasz	if (str == NULL)
859220163Strasz		return (EINVAL);
860220163Strasz
861220163Strasz	*value = strtoul(str, &end, 10);
862220163Strasz	if ((size_t)(end - str) != strlen(str))
863220163Strasz		return (EINVAL);
864220163Strasz
865297458Strasz	if (*value < 0)
866297458Strasz		return (ERANGE);
867297458Strasz
868220163Strasz	return (0);
869220163Strasz}
870220163Strasz
871220163Strasz/*
872220163Strasz * Connect the rule to the racct, increasing refcount for the rule.
873220163Strasz */
874220163Straszstatic void
875220163Straszrctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
876220163Strasz{
877220163Strasz	struct rctl_rule_link *link;
878220163Strasz
879282213Strasz	ASSERT_RACCT_ENABLED();
880220163Strasz	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
881220163Strasz
882220163Strasz	rctl_rule_acquire(rule);
883220163Strasz	link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
884220163Strasz	link->rrl_rule = rule;
885220163Strasz	link->rrl_exceeded = 0;
886220163Strasz
887298414Strasz	RACCT_LOCK();
888220163Strasz	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
889298414Strasz	RACCT_UNLOCK();
890220163Strasz}
891220163Strasz
892220163Straszstatic int
893220163Straszrctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
894220163Strasz{
895220163Strasz	struct rctl_rule_link *link;
896220163Strasz
897282213Strasz	ASSERT_RACCT_ENABLED();
898220163Strasz	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
899298414Strasz	RACCT_LOCK_ASSERT();
900220163Strasz
901220163Strasz	link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
902220163Strasz	if (link == NULL)
903220163Strasz		return (ENOMEM);
904220163Strasz	rctl_rule_acquire(rule);
905220163Strasz	link->rrl_rule = rule;
906220163Strasz	link->rrl_exceeded = 0;
907220163Strasz
908220163Strasz	LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
909298414Strasz
910220163Strasz	return (0);
911220163Strasz}
912220163Strasz
913220163Strasz/*
914220163Strasz * Remove limits for a rules matching the filter and release
915220163Strasz * the refcounts for the rules, possibly freeing them.  Returns
916220163Strasz * the number of limit structures removed.
917220163Strasz */
918220163Straszstatic int
919220163Straszrctl_racct_remove_rules(struct racct *racct,
920220163Strasz    const struct rctl_rule *filter)
921220163Strasz{
922298045Strasz	struct rctl_rule_link *link, *linktmp;
923220163Strasz	int removed = 0;
924220163Strasz
925282213Strasz	ASSERT_RACCT_ENABLED();
926298414Strasz	RACCT_LOCK_ASSERT();
927220163Strasz
928220163Strasz	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
929220163Strasz		if (!rctl_rule_matches(link->rrl_rule, filter))
930220163Strasz			continue;
931220163Strasz
932220163Strasz		LIST_REMOVE(link, rrl_next);
933220163Strasz		rctl_rule_release(link->rrl_rule);
934220163Strasz		uma_zfree(rctl_rule_link_zone, link);
935220163Strasz		removed++;
936220163Strasz	}
937220163Strasz	return (removed);
938220163Strasz}
939220163Strasz
940220163Straszstatic void
941220163Straszrctl_rule_acquire_subject(struct rctl_rule *rule)
942220163Strasz{
943220163Strasz
944282213Strasz	ASSERT_RACCT_ENABLED();
945282213Strasz
946220163Strasz	switch (rule->rr_subject_type) {
947220163Strasz	case RCTL_SUBJECT_TYPE_UNDEFINED:
948220163Strasz	case RCTL_SUBJECT_TYPE_PROCESS:
949221362Strasz		break;
950220163Strasz	case RCTL_SUBJECT_TYPE_JAIL:
951221362Strasz		if (rule->rr_subject.rs_prison_racct != NULL)
952221362Strasz			prison_racct_hold(rule->rr_subject.rs_prison_racct);
953220163Strasz		break;
954220163Strasz	case RCTL_SUBJECT_TYPE_USER:
955220163Strasz		if (rule->rr_subject.rs_uip != NULL)
956220163Strasz			uihold(rule->rr_subject.rs_uip);
957220163Strasz		break;
958220163Strasz	case RCTL_SUBJECT_TYPE_LOGINCLASS:
959220527Strasz		if (rule->rr_subject.rs_loginclass != NULL)
960220527Strasz			loginclass_hold(rule->rr_subject.rs_loginclass);
961220163Strasz		break;
962220163Strasz	default:
963220163Strasz		panic("rctl_rule_acquire_subject: unknown subject type %d",
964220163Strasz		    rule->rr_subject_type);
965220163Strasz	}
966220163Strasz}
967220163Strasz
968220163Straszstatic void
969220163Straszrctl_rule_release_subject(struct rctl_rule *rule)
970220163Strasz{
971220163Strasz
972282213Strasz	ASSERT_RACCT_ENABLED();
973282213Strasz
974220163Strasz	switch (rule->rr_subject_type) {
975220163Strasz	case RCTL_SUBJECT_TYPE_UNDEFINED:
976220163Strasz	case RCTL_SUBJECT_TYPE_PROCESS:
977221362Strasz		break;
978220163Strasz	case RCTL_SUBJECT_TYPE_JAIL:
979221362Strasz		if (rule->rr_subject.rs_prison_racct != NULL)
980221362Strasz			prison_racct_free(rule->rr_subject.rs_prison_racct);
981220163Strasz		break;
982220163Strasz	case RCTL_SUBJECT_TYPE_USER:
983220163Strasz		if (rule->rr_subject.rs_uip != NULL)
984220163Strasz			uifree(rule->rr_subject.rs_uip);
985220163Strasz		break;
986220163Strasz	case RCTL_SUBJECT_TYPE_LOGINCLASS:
987220527Strasz		if (rule->rr_subject.rs_loginclass != NULL)
988220527Strasz			loginclass_free(rule->rr_subject.rs_loginclass);
989220163Strasz		break;
990220163Strasz	default:
991220163Strasz		panic("rctl_rule_release_subject: unknown subject type %d",
992220163Strasz		    rule->rr_subject_type);
993220163Strasz	}
994220163Strasz}
995220163Strasz
996220163Straszstruct rctl_rule *
997220163Straszrctl_rule_alloc(int flags)
998220163Strasz{
999220163Strasz	struct rctl_rule *rule;
1000220163Strasz
1001282213Strasz	ASSERT_RACCT_ENABLED();
1002282213Strasz
1003220163Strasz	rule = uma_zalloc(rctl_rule_zone, flags);
1004220163Strasz	if (rule == NULL)
1005220163Strasz		return (NULL);
1006220163Strasz	rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1007220163Strasz	rule->rr_subject.rs_proc = NULL;
1008220163Strasz	rule->rr_subject.rs_uip = NULL;
1009220527Strasz	rule->rr_subject.rs_loginclass = NULL;
1010221362Strasz	rule->rr_subject.rs_prison_racct = NULL;
1011220163Strasz	rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1012220163Strasz	rule->rr_resource = RACCT_UNDEFINED;
1013220163Strasz	rule->rr_action = RCTL_ACTION_UNDEFINED;
1014220163Strasz	rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1015220163Strasz	refcount_init(&rule->rr_refcount, 1);
1016220163Strasz
1017220163Strasz	return (rule);
1018220163Strasz}
1019220163Strasz
1020220163Straszstruct rctl_rule *
1021220163Straszrctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1022220163Strasz{
1023220163Strasz	struct rctl_rule *copy;
1024220163Strasz
1025282213Strasz	ASSERT_RACCT_ENABLED();
1026282213Strasz
1027220163Strasz	copy = uma_zalloc(rctl_rule_zone, flags);
1028220163Strasz	if (copy == NULL)
1029220163Strasz		return (NULL);
1030220163Strasz	copy->rr_subject_type = rule->rr_subject_type;
1031220163Strasz	copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1032220163Strasz	copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1033220527Strasz	copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1034221362Strasz	copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1035220163Strasz	copy->rr_per = rule->rr_per;
1036220163Strasz	copy->rr_resource = rule->rr_resource;
1037220163Strasz	copy->rr_action = rule->rr_action;
1038220163Strasz	copy->rr_amount = rule->rr_amount;
1039220163Strasz	refcount_init(&copy->rr_refcount, 1);
1040220163Strasz	rctl_rule_acquire_subject(copy);
1041220163Strasz
1042220163Strasz	return (copy);
1043220163Strasz}
1044220163Strasz
1045220163Straszvoid
1046220163Straszrctl_rule_acquire(struct rctl_rule *rule)
1047220163Strasz{
1048220163Strasz
1049282213Strasz	ASSERT_RACCT_ENABLED();
1050220163Strasz	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1051220163Strasz
1052220163Strasz	refcount_acquire(&rule->rr_refcount);
1053220163Strasz}
1054220163Strasz
1055220163Straszstatic void
1056220163Straszrctl_rule_free(void *context, int pending)
1057220163Strasz{
1058220163Strasz	struct rctl_rule *rule;
1059220163Strasz
1060220163Strasz	rule = (struct rctl_rule *)context;
1061220163Strasz
1062282213Strasz	ASSERT_RACCT_ENABLED();
1063220163Strasz	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1064220163Strasz
1065220163Strasz	/*
1066220163Strasz	 * We don't need locking here; rule is guaranteed to be inaccessible.
1067220163Strasz	 */
1068220163Strasz
1069220163Strasz	rctl_rule_release_subject(rule);
1070220163Strasz	uma_zfree(rctl_rule_zone, rule);
1071220163Strasz}
1072220163Strasz
1073220163Straszvoid
1074220163Straszrctl_rule_release(struct rctl_rule *rule)
1075220163Strasz{
1076220163Strasz
1077282213Strasz	ASSERT_RACCT_ENABLED();
1078220163Strasz	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1079220163Strasz
1080220163Strasz	if (refcount_release(&rule->rr_refcount)) {
1081220163Strasz		/*
1082220163Strasz		 * rctl_rule_release() is often called when iterating
1083220163Strasz		 * over all the uidinfo structures in the system,
1084220163Strasz		 * holding uihashtbl_lock.  Since rctl_rule_free()
1085220163Strasz		 * might end up calling uifree(), this would lead
1086220163Strasz		 * to lock recursion.  Use taskqueue to avoid this.
1087220163Strasz		 */
1088220163Strasz		TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1089220163Strasz		taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1090220163Strasz	}
1091220163Strasz}
1092220163Strasz
1093220163Straszstatic int
1094220163Straszrctl_rule_fully_specified(const struct rctl_rule *rule)
1095220163Strasz{
1096220163Strasz
1097282213Strasz	ASSERT_RACCT_ENABLED();
1098282213Strasz
1099220163Strasz	switch (rule->rr_subject_type) {
1100220163Strasz	case RCTL_SUBJECT_TYPE_UNDEFINED:
1101220163Strasz		return (0);
1102220163Strasz	case RCTL_SUBJECT_TYPE_PROCESS:
1103220163Strasz		if (rule->rr_subject.rs_proc == NULL)
1104220163Strasz			return (0);
1105220163Strasz		break;
1106220163Strasz	case RCTL_SUBJECT_TYPE_USER:
1107220163Strasz		if (rule->rr_subject.rs_uip == NULL)
1108220163Strasz			return (0);
1109220163Strasz		break;
1110220163Strasz	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1111220527Strasz		if (rule->rr_subject.rs_loginclass == NULL)
1112220163Strasz			return (0);
1113220163Strasz		break;
1114220163Strasz	case RCTL_SUBJECT_TYPE_JAIL:
1115221362Strasz		if (rule->rr_subject.rs_prison_racct == NULL)
1116220163Strasz			return (0);
1117220163Strasz		break;
1118220163Strasz	default:
1119220163Strasz		panic("rctl_rule_fully_specified: unknown subject type %d",
1120220163Strasz		    rule->rr_subject_type);
1121220163Strasz	}
1122220163Strasz	if (rule->rr_resource == RACCT_UNDEFINED)
1123220163Strasz		return (0);
1124220163Strasz	if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1125220163Strasz		return (0);
1126220163Strasz	if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1127220163Strasz		return (0);
1128220163Strasz	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1129220163Strasz		return (0);
1130220163Strasz
1131220163Strasz	return (1);
1132220163Strasz}
1133220163Strasz
1134220163Straszstatic int
1135220163Straszrctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1136220163Strasz{
1137298045Strasz	struct rctl_rule *rule;
1138220163Strasz	char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1139220163Strasz	     *amountstr, *perstr;
1140220163Strasz	id_t id;
1141298045Strasz	int error = 0;
1142220163Strasz
1143282213Strasz	ASSERT_RACCT_ENABLED();
1144282213Strasz
1145220163Strasz	rule = rctl_rule_alloc(M_WAITOK);
1146220163Strasz
1147220163Strasz	subjectstr = strsep(&rulestr, ":");
1148220163Strasz	subject_idstr = strsep(&rulestr, ":");
1149220163Strasz	resourcestr = strsep(&rulestr, ":");
1150220163Strasz	actionstr = strsep(&rulestr, "=/");
1151220163Strasz	amountstr = strsep(&rulestr, "/");
1152220163Strasz	perstr = rulestr;
1153220163Strasz
1154220163Strasz	if (subjectstr == NULL || subjectstr[0] == '\0')
1155220163Strasz		rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1156220163Strasz	else {
1157220163Strasz		error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1158220163Strasz		if (error != 0)
1159220163Strasz			goto out;
1160220163Strasz	}
1161220163Strasz
1162220163Strasz	if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1163220163Strasz		rule->rr_subject.rs_proc = NULL;
1164220163Strasz		rule->rr_subject.rs_uip = NULL;
1165220527Strasz		rule->rr_subject.rs_loginclass = NULL;
1166221362Strasz		rule->rr_subject.rs_prison_racct = NULL;
1167220163Strasz	} else {
1168220163Strasz		switch (rule->rr_subject_type) {
1169220163Strasz		case RCTL_SUBJECT_TYPE_UNDEFINED:
1170220163Strasz			error = EINVAL;
1171220163Strasz			goto out;
1172220163Strasz		case RCTL_SUBJECT_TYPE_PROCESS:
1173220163Strasz			error = str2id(subject_idstr, &id);
1174220163Strasz			if (error != 0)
1175220163Strasz				goto out;
1176220163Strasz			sx_assert(&allproc_lock, SA_LOCKED);
1177220163Strasz			rule->rr_subject.rs_proc = pfind(id);
1178220163Strasz			if (rule->rr_subject.rs_proc == NULL) {
1179220163Strasz				error = ESRCH;
1180220163Strasz				goto out;
1181220163Strasz			}
1182220163Strasz			PROC_UNLOCK(rule->rr_subject.rs_proc);
1183220163Strasz			break;
1184220163Strasz		case RCTL_SUBJECT_TYPE_USER:
1185220163Strasz			error = str2id(subject_idstr, &id);
1186220163Strasz			if (error != 0)
1187220163Strasz				goto out;
1188220163Strasz			rule->rr_subject.rs_uip = uifind(id);
1189220163Strasz			break;
1190220163Strasz		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1191220527Strasz			rule->rr_subject.rs_loginclass =
1192220163Strasz			    loginclass_find(subject_idstr);
1193220527Strasz			if (rule->rr_subject.rs_loginclass == NULL) {
1194220163Strasz				error = ENAMETOOLONG;
1195220163Strasz				goto out;
1196220163Strasz			}
1197220163Strasz			break;
1198220163Strasz		case RCTL_SUBJECT_TYPE_JAIL:
1199221362Strasz			rule->rr_subject.rs_prison_racct =
1200221362Strasz			    prison_racct_find(subject_idstr);
1201221362Strasz			if (rule->rr_subject.rs_prison_racct == NULL) {
1202221362Strasz				error = ENAMETOOLONG;
1203221362Strasz				goto out;
1204220163Strasz			}
1205220163Strasz			break;
1206220163Strasz               default:
1207220163Strasz                       panic("rctl_string_to_rule: unknown subject type %d",
1208220163Strasz                           rule->rr_subject_type);
1209220163Strasz               }
1210220163Strasz	}
1211220163Strasz
1212220163Strasz	if (resourcestr == NULL || resourcestr[0] == '\0')
1213220163Strasz		rule->rr_resource = RACCT_UNDEFINED;
1214220163Strasz	else {
1215220163Strasz		error = str2value(resourcestr, &rule->rr_resource,
1216220163Strasz		    resourcenames);
1217220163Strasz		if (error != 0)
1218220163Strasz			goto out;
1219220163Strasz	}
1220220163Strasz
1221220163Strasz	if (actionstr == NULL || actionstr[0] == '\0')
1222220163Strasz		rule->rr_action = RCTL_ACTION_UNDEFINED;
1223220163Strasz	else {
1224220163Strasz		error = str2value(actionstr, &rule->rr_action, actionnames);
1225220163Strasz		if (error != 0)
1226220163Strasz			goto out;
1227220163Strasz	}
1228220163Strasz
1229220163Strasz	if (amountstr == NULL || amountstr[0] == '\0')
1230220163Strasz		rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1231220163Strasz	else {
1232220163Strasz		error = str2int64(amountstr, &rule->rr_amount);
1233220163Strasz		if (error != 0)
1234220163Strasz			goto out;
1235297458Strasz		if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1236297458Strasz			if (rule->rr_amount > INT64_MAX / 1000000) {
1237297458Strasz				error = ERANGE;
1238297458Strasz				goto out;
1239297458Strasz			}
1240225371Strasz			rule->rr_amount *= 1000000;
1241297458Strasz		}
1242220163Strasz	}
1243220163Strasz
1244220163Strasz	if (perstr == NULL || perstr[0] == '\0')
1245220163Strasz		rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1246220163Strasz	else {
1247220163Strasz		error = str2value(perstr, &rule->rr_per, subjectnames);
1248220163Strasz		if (error != 0)
1249220163Strasz			goto out;
1250220163Strasz	}
1251220163Strasz
1252220163Straszout:
1253220163Strasz	if (error == 0)
1254220163Strasz		*rulep = rule;
1255220163Strasz	else
1256220163Strasz		rctl_rule_release(rule);
1257220163Strasz
1258220163Strasz	return (error);
1259220163Strasz}
1260220163Strasz
1261220163Strasz/*
1262220163Strasz * Link a rule with all the subjects it applies to.
1263220163Strasz */
1264220163Straszint
1265220163Straszrctl_rule_add(struct rctl_rule *rule)
1266220163Strasz{
1267220163Strasz	struct proc *p;
1268220163Strasz	struct ucred *cred;
1269220163Strasz	struct uidinfo *uip;
1270220163Strasz	struct prison *pr;
1271221362Strasz	struct prison_racct *prr;
1272220163Strasz	struct loginclass *lc;
1273220163Strasz	struct rctl_rule *rule2;
1274220163Strasz	int match;
1275220163Strasz
1276282213Strasz	ASSERT_RACCT_ENABLED();
1277220163Strasz	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1278220163Strasz
1279220163Strasz	/*
1280297633Strasz	 * Some rules just don't make sense, like "deny" rule for an undeniable
1281297633Strasz	 * resource.  The exception are the RSS and %CPU resources - they are
1282297633Strasz	 * not deniable in the racct sense, but the limit is enforced in
1283297633Strasz	 * a different way.
1284220163Strasz	 */
1285220163Strasz	if (rule->rr_action == RCTL_ACTION_DENY &&
1286297633Strasz	    !RACCT_IS_DENIABLE(rule->rr_resource) &&
1287297633Strasz	    rule->rr_resource != RACCT_RSS &&
1288297633Strasz	    rule->rr_resource != RACCT_PCTCPU) {
1289220163Strasz		return (EOPNOTSUPP);
1290297633Strasz	}
1291220163Strasz
1292297633Strasz	if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1293297633Strasz	    !RACCT_IS_DECAYING(rule->rr_resource)) {
1294297633Strasz		return (EOPNOTSUPP);
1295297633Strasz	}
1296297633Strasz
1297297633Strasz	if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1298297633Strasz	    rule->rr_resource == RACCT_PCTCPU) {
1299297633Strasz		return (EOPNOTSUPP);
1300297633Strasz	}
1301297633Strasz
1302220163Strasz	if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1303297633Strasz	    RACCT_IS_SLOPPY(rule->rr_resource)) {
1304220163Strasz		return (EOPNOTSUPP);
1305297633Strasz	}
1306220163Strasz
1307220163Strasz	/*
1308220163Strasz	 * Make sure there are no duplicated rules.  Also, for the "deny"
1309220163Strasz	 * rules, remove ones differing only by "amount".
1310220163Strasz	 */
1311220163Strasz	if (rule->rr_action == RCTL_ACTION_DENY) {
1312220163Strasz		rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1313220163Strasz		rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1314220163Strasz		rctl_rule_remove(rule2);
1315220163Strasz		rctl_rule_release(rule2);
1316220163Strasz	} else
1317220163Strasz		rctl_rule_remove(rule);
1318220163Strasz
1319220163Strasz	switch (rule->rr_subject_type) {
1320220163Strasz	case RCTL_SUBJECT_TYPE_PROCESS:
1321220163Strasz		p = rule->rr_subject.rs_proc;
1322220163Strasz		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1323220163Strasz
1324220163Strasz		rctl_racct_add_rule(p->p_racct, rule);
1325220163Strasz		/*
1326220163Strasz		 * In case of per-process rule, we don't have anything more
1327220163Strasz		 * to do.
1328220163Strasz		 */
1329220163Strasz		return (0);
1330220163Strasz
1331220163Strasz	case RCTL_SUBJECT_TYPE_USER:
1332220163Strasz		uip = rule->rr_subject.rs_uip;
1333220163Strasz		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1334220163Strasz		rctl_racct_add_rule(uip->ui_racct, rule);
1335220163Strasz		break;
1336220163Strasz
1337220163Strasz	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1338220527Strasz		lc = rule->rr_subject.rs_loginclass;
1339220163Strasz		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1340220163Strasz		rctl_racct_add_rule(lc->lc_racct, rule);
1341220163Strasz		break;
1342220163Strasz
1343220163Strasz	case RCTL_SUBJECT_TYPE_JAIL:
1344221362Strasz		prr = rule->rr_subject.rs_prison_racct;
1345221362Strasz		KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1346221362Strasz		rctl_racct_add_rule(prr->prr_racct, rule);
1347220163Strasz		break;
1348220163Strasz
1349220163Strasz	default:
1350220163Strasz		panic("rctl_rule_add: unknown subject type %d",
1351220163Strasz		    rule->rr_subject_type);
1352220163Strasz	}
1353220163Strasz
1354220163Strasz	/*
1355220163Strasz	 * Now go through all the processes and add the new rule to the ones
1356220163Strasz	 * it applies to.
1357220163Strasz	 */
1358220163Strasz	sx_assert(&allproc_lock, SA_LOCKED);
1359220163Strasz	FOREACH_PROC_IN_SYSTEM(p) {
1360220163Strasz		cred = p->p_ucred;
1361220163Strasz		switch (rule->rr_subject_type) {
1362220163Strasz		case RCTL_SUBJECT_TYPE_USER:
1363220163Strasz			if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1364220163Strasz			    cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1365220163Strasz				break;
1366220163Strasz			continue;
1367220163Strasz		case RCTL_SUBJECT_TYPE_LOGINCLASS:
1368220527Strasz			if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1369220163Strasz				break;
1370220163Strasz			continue;
1371220163Strasz		case RCTL_SUBJECT_TYPE_JAIL:
1372220163Strasz			match = 0;
1373220163Strasz			for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1374221362Strasz				if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1375220163Strasz					match = 1;
1376220163Strasz					break;
1377220163Strasz				}
1378220163Strasz			}
1379220163Strasz			if (match)
1380220163Strasz				break;
1381220163Strasz			continue;
1382220163Strasz		default:
1383220163Strasz			panic("rctl_rule_add: unknown subject type %d",
1384220163Strasz			    rule->rr_subject_type);
1385220163Strasz		}
1386220163Strasz
1387220163Strasz		rctl_racct_add_rule(p->p_racct, rule);
1388220163Strasz	}
1389220163Strasz
1390220163Strasz	return (0);
1391220163Strasz}
1392220163Strasz
1393220163Straszstatic void
1394290857Straszrctl_rule_pre_callback(void)
1395290857Strasz{
1396290857Strasz
1397298414Strasz	RACCT_LOCK();
1398290857Strasz}
1399290857Strasz
1400290857Straszstatic void
1401290857Straszrctl_rule_post_callback(void)
1402290857Strasz{
1403290857Strasz
1404298414Strasz	RACCT_UNLOCK();
1405290857Strasz}
1406290857Strasz
1407290857Straszstatic void
1408220163Straszrctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1409220163Strasz{
1410220163Strasz	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1411220163Strasz	int found = 0;
1412220163Strasz
1413282213Strasz	ASSERT_RACCT_ENABLED();
1414298414Strasz	RACCT_LOCK_ASSERT();
1415282213Strasz
1416220163Strasz	found += rctl_racct_remove_rules(racct, filter);
1417220163Strasz
1418220163Strasz	*((int *)arg3) += found;
1419220163Strasz}
1420220163Strasz
1421220163Strasz/*
1422220163Strasz * Remove all rules that match the filter.
1423220163Strasz */
1424220163Straszint
1425220163Straszrctl_rule_remove(struct rctl_rule *filter)
1426220163Strasz{
1427298045Strasz	struct proc *p;
1428220163Strasz	int found = 0;
1429220163Strasz
1430282213Strasz	ASSERT_RACCT_ENABLED();
1431282213Strasz
1432220163Strasz	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1433220163Strasz	    filter->rr_subject.rs_proc != NULL) {
1434220163Strasz		p = filter->rr_subject.rs_proc;
1435298414Strasz		RACCT_LOCK();
1436220163Strasz		found = rctl_racct_remove_rules(p->p_racct, filter);
1437298414Strasz		RACCT_UNLOCK();
1438220163Strasz		if (found)
1439220163Strasz			return (0);
1440220163Strasz		return (ESRCH);
1441220163Strasz	}
1442220163Strasz
1443290857Strasz	loginclass_racct_foreach(rctl_rule_remove_callback,
1444290857Strasz	    rctl_rule_pre_callback, rctl_rule_post_callback,
1445290857Strasz	    filter, (void *)&found);
1446290857Strasz	ui_racct_foreach(rctl_rule_remove_callback,
1447290857Strasz	    rctl_rule_pre_callback, rctl_rule_post_callback,
1448290857Strasz	    filter, (void *)&found);
1449290857Strasz	prison_racct_foreach(rctl_rule_remove_callback,
1450290857Strasz	    rctl_rule_pre_callback, rctl_rule_post_callback,
1451290857Strasz	    filter, (void *)&found);
1452220163Strasz
1453220163Strasz	sx_assert(&allproc_lock, SA_LOCKED);
1454298414Strasz	RACCT_LOCK();
1455220163Strasz	FOREACH_PROC_IN_SYSTEM(p) {
1456220163Strasz		found += rctl_racct_remove_rules(p->p_racct, filter);
1457220163Strasz	}
1458298414Strasz	RACCT_UNLOCK();
1459220163Strasz
1460220163Strasz	if (found)
1461220163Strasz		return (0);
1462220163Strasz	return (ESRCH);
1463220163Strasz}
1464220163Strasz
1465220163Strasz/*
1466220163Strasz * Appends a rule to the sbuf.
1467220163Strasz */
1468220163Straszstatic void
1469220163Straszrctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1470220163Strasz{
1471220163Strasz	int64_t amount;
1472220163Strasz
1473282213Strasz	ASSERT_RACCT_ENABLED();
1474282213Strasz
1475220163Strasz	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1476220163Strasz
1477220163Strasz	switch (rule->rr_subject_type) {
1478220163Strasz	case RCTL_SUBJECT_TYPE_PROCESS:
1479220163Strasz		if (rule->rr_subject.rs_proc == NULL)
1480220163Strasz			sbuf_printf(sb, ":");
1481220163Strasz		else
1482220163Strasz			sbuf_printf(sb, "%d:",
1483220163Strasz			    rule->rr_subject.rs_proc->p_pid);
1484220163Strasz		break;
1485220163Strasz	case RCTL_SUBJECT_TYPE_USER:
1486220163Strasz		if (rule->rr_subject.rs_uip == NULL)
1487220163Strasz			sbuf_printf(sb, ":");
1488220163Strasz		else
1489220163Strasz			sbuf_printf(sb, "%d:",
1490220163Strasz			    rule->rr_subject.rs_uip->ui_uid);
1491220163Strasz		break;
1492220163Strasz	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1493220527Strasz		if (rule->rr_subject.rs_loginclass == NULL)
1494220163Strasz			sbuf_printf(sb, ":");
1495220163Strasz		else
1496220163Strasz			sbuf_printf(sb, "%s:",
1497220527Strasz			    rule->rr_subject.rs_loginclass->lc_name);
1498220163Strasz		break;
1499220163Strasz	case RCTL_SUBJECT_TYPE_JAIL:
1500221362Strasz		if (rule->rr_subject.rs_prison_racct == NULL)
1501220163Strasz			sbuf_printf(sb, ":");
1502220163Strasz		else
1503220163Strasz			sbuf_printf(sb, "%s:",
1504221362Strasz			    rule->rr_subject.rs_prison_racct->prr_name);
1505220163Strasz		break;
1506220163Strasz	default:
1507220163Strasz		panic("rctl_rule_to_sbuf: unknown subject type %d",
1508220163Strasz		    rule->rr_subject_type);
1509220163Strasz	}
1510220163Strasz
1511220163Strasz	amount = rule->rr_amount;
1512220163Strasz	if (amount != RCTL_AMOUNT_UNDEFINED &&
1513224036Strasz	    RACCT_IS_IN_MILLIONS(rule->rr_resource))
1514224036Strasz		amount /= 1000000;
1515220163Strasz
1516220163Strasz	sbuf_printf(sb, "%s:%s=%jd",
1517220163Strasz	    rctl_resource_name(rule->rr_resource),
1518220163Strasz	    rctl_action_name(rule->rr_action),
1519220163Strasz	    amount);
1520220163Strasz
1521220163Strasz	if (rule->rr_per != rule->rr_subject_type)
1522220163Strasz		sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1523220163Strasz}
1524220163Strasz
1525220163Strasz/*
1526220163Strasz * Routine used by RCTL syscalls to read in input string.
1527220163Strasz */
1528220163Straszstatic int
1529220163Straszrctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1530220163Strasz{
1531298045Strasz	char *str;
1532220163Strasz	int error;
1533220163Strasz
1534282213Strasz	ASSERT_RACCT_ENABLED();
1535282213Strasz
1536220163Strasz	if (inbuflen <= 0)
1537220163Strasz		return (EINVAL);
1538290552Strasz	if (inbuflen > RCTL_MAX_INBUFSIZE)
1539234380Strasz		return (E2BIG);
1540220163Strasz
1541220163Strasz	str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1542220163Strasz	error = copyinstr(inbufp, str, inbuflen, NULL);
1543220163Strasz	if (error != 0) {
1544220163Strasz		free(str, M_RCTL);
1545220163Strasz		return (error);
1546220163Strasz	}
1547220163Strasz
1548220163Strasz	*inputstr = str;
1549220163Strasz
1550220163Strasz	return (0);
1551220163Strasz}
1552220163Strasz
1553220163Strasz/*
1554220163Strasz * Routine used by RCTL syscalls to write out output string.
1555220163Strasz */
1556220163Straszstatic int
1557220163Straszrctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1558220163Strasz{
1559220163Strasz	int error;
1560220163Strasz
1561282213Strasz	ASSERT_RACCT_ENABLED();
1562282213Strasz
1563220163Strasz	if (outputsbuf == NULL)
1564220163Strasz		return (0);
1565220163Strasz
1566220163Strasz	sbuf_finish(outputsbuf);
1567220163Strasz	if (outbuflen < sbuf_len(outputsbuf) + 1) {
1568220163Strasz		sbuf_delete(outputsbuf);
1569220163Strasz		return (ERANGE);
1570220163Strasz	}
1571220163Strasz	error = copyout(sbuf_data(outputsbuf), outbufp,
1572220163Strasz	    sbuf_len(outputsbuf) + 1);
1573220163Strasz	sbuf_delete(outputsbuf);
1574220163Strasz	return (error);
1575220163Strasz}
1576220163Strasz
1577220163Straszstatic struct sbuf *
1578220163Straszrctl_racct_to_sbuf(struct racct *racct, int sloppy)
1579220163Strasz{
1580298045Strasz	struct sbuf *sb;
1581298045Strasz	int64_t amount;
1582220163Strasz	int i;
1583220163Strasz
1584282213Strasz	ASSERT_RACCT_ENABLED();
1585282213Strasz
1586220163Strasz	sb = sbuf_new_auto();
1587220163Strasz	for (i = 0; i <= RACCT_MAX; i++) {
1588223844Strasz		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1589220163Strasz			continue;
1590298414Strasz		RACCT_LOCK();
1591220163Strasz		amount = racct->r_resources[i];
1592298414Strasz		RACCT_UNLOCK();
1593224036Strasz		if (RACCT_IS_IN_MILLIONS(i))
1594225371Strasz			amount /= 1000000;
1595220163Strasz		sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1596220163Strasz	}
1597220163Strasz	sbuf_setpos(sb, sbuf_len(sb) - 1);
1598220163Strasz	return (sb);
1599220163Strasz}
1600220163Strasz
1601220163Straszint
1602225617Skmacysys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1603220163Strasz{
1604220163Strasz	struct rctl_rule *filter;
1605220163Strasz	struct sbuf *outputsbuf = NULL;
1606220163Strasz	struct proc *p;
1607220163Strasz	struct uidinfo *uip;
1608220163Strasz	struct loginclass *lc;
1609221362Strasz	struct prison_racct *prr;
1610298045Strasz	char *inputstr;
1611298045Strasz	int error;
1612220163Strasz
1613282213Strasz	if (!racct_enable)
1614282213Strasz		return (ENOSYS);
1615282213Strasz
1616220527Strasz	error = priv_check(td, PRIV_RCTL_GET_RACCT);
1617220163Strasz	if (error != 0)
1618220163Strasz		return (error);
1619220163Strasz
1620220163Strasz	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1621220163Strasz	if (error != 0)
1622220163Strasz		return (error);
1623220163Strasz
1624220163Strasz	sx_slock(&allproc_lock);
1625220163Strasz	error = rctl_string_to_rule(inputstr, &filter);
1626220163Strasz	free(inputstr, M_RCTL);
1627220163Strasz	if (error != 0) {
1628220163Strasz		sx_sunlock(&allproc_lock);
1629220163Strasz		return (error);
1630220163Strasz	}
1631220163Strasz
1632220163Strasz	switch (filter->rr_subject_type) {
1633220163Strasz	case RCTL_SUBJECT_TYPE_PROCESS:
1634220163Strasz		p = filter->rr_subject.rs_proc;
1635220163Strasz		if (p == NULL) {
1636220163Strasz			error = EINVAL;
1637220163Strasz			goto out;
1638220163Strasz		}
1639220163Strasz		outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1640220163Strasz		break;
1641220163Strasz	case RCTL_SUBJECT_TYPE_USER:
1642220163Strasz		uip = filter->rr_subject.rs_uip;
1643220163Strasz		if (uip == NULL) {
1644220163Strasz			error = EINVAL;
1645220163Strasz			goto out;
1646220163Strasz		}
1647220163Strasz		outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1648220163Strasz		break;
1649220163Strasz	case RCTL_SUBJECT_TYPE_LOGINCLASS:
1650220527Strasz		lc = filter->rr_subject.rs_loginclass;
1651220163Strasz		if (lc == NULL) {
1652220163Strasz			error = EINVAL;
1653220163Strasz			goto out;
1654220163Strasz		}
1655220163Strasz		outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1656220163Strasz		break;
1657220163Strasz	case RCTL_SUBJECT_TYPE_JAIL:
1658221362Strasz		prr = filter->rr_subject.rs_prison_racct;
1659221362Strasz		if (prr == NULL) {
1660220163Strasz			error = EINVAL;
1661220163Strasz			goto out;
1662220163Strasz		}
1663221362Strasz		outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1664220163Strasz		break;
1665220163Strasz	default:
1666220163Strasz		error = EINVAL;
1667220163Strasz	}
1668220163Straszout:
1669220163Strasz	rctl_rule_release(filter);
1670220163Strasz	sx_sunlock(&allproc_lock);
1671220163Strasz	if (error != 0)
1672220163Strasz		return (error);
1673220163Strasz
1674220163Strasz	error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1675220163Strasz
1676220163Strasz	return (error);
1677220163Strasz}
1678220163Strasz
1679220163Straszstatic void
1680220163Straszrctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1681220163Strasz{
1682220163Strasz	struct rctl_rule *filter = (struct rctl_rule *)arg2;
1683220163Strasz	struct rctl_rule_link *link;
1684220163Strasz	struct sbuf *sb = (struct sbuf *)arg3;
1685220163Strasz
1686282213Strasz	ASSERT_RACCT_ENABLED();
1687298414Strasz	RACCT_LOCK_ASSERT();
1688282213Strasz
1689220163Strasz	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1690220163Strasz		if (!rctl_rule_matches(link->rrl_rule, filter))
1691220163Strasz			continue;
1692220163Strasz		rctl_rule_to_sbuf(sb, link->rrl_rule);
1693220163Strasz		sbuf_printf(sb, ",");
1694220163Strasz	}
1695220163Strasz}
1696220163Strasz
1697220163Straszint
1698225617Skmacysys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1699220163Strasz{
1700220163Strasz	struct sbuf *sb;
1701220163Strasz	struct rctl_rule *filter;
1702220163Strasz	struct rctl_rule_link *link;
1703220163Strasz	struct proc *p;
1704298045Strasz	char *inputstr, *buf;
1705298045Strasz	size_t bufsize;
1706298045Strasz	int error;
1707220163Strasz
1708282213Strasz	if (!racct_enable)
1709282213Strasz		return (ENOSYS);
1710282213Strasz
1711220163Strasz	error = priv_check(td, PRIV_RCTL_GET_RULES);
1712220163Strasz	if (error != 0)
1713220163Strasz		return (error);
1714220163Strasz
1715220163Strasz	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1716220163Strasz	if (error != 0)
1717220163Strasz		return (error);
1718220163Strasz
1719220163Strasz	sx_slock(&allproc_lock);
1720220163Strasz	error = rctl_string_to_rule(inputstr, &filter);
1721220163Strasz	free(inputstr, M_RCTL);
1722220163Strasz	if (error != 0) {
1723220163Strasz		sx_sunlock(&allproc_lock);
1724220163Strasz		return (error);
1725220163Strasz	}
1726220163Strasz
1727290551Strasz	bufsize = uap->outbuflen;
1728290551Strasz	if (bufsize > rctl_maxbufsize) {
1729290551Strasz		sx_sunlock(&allproc_lock);
1730290551Strasz		return (E2BIG);
1731290551Strasz	}
1732290551Strasz
1733220163Strasz	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1734220163Strasz	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1735220163Strasz	KASSERT(sb != NULL, ("sbuf_new failed"));
1736220163Strasz
1737220163Strasz	FOREACH_PROC_IN_SYSTEM(p) {
1738298414Strasz		RACCT_LOCK();
1739220163Strasz		LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1740220163Strasz			/*
1741220163Strasz			 * Non-process rules will be added to the buffer later.
1742220163Strasz			 * Adding them here would result in duplicated output.
1743220163Strasz			 */
1744220163Strasz			if (link->rrl_rule->rr_subject_type !=
1745220163Strasz			    RCTL_SUBJECT_TYPE_PROCESS)
1746220163Strasz				continue;
1747220163Strasz			if (!rctl_rule_matches(link->rrl_rule, filter))
1748220163Strasz				continue;
1749220163Strasz			rctl_rule_to_sbuf(sb, link->rrl_rule);
1750220163Strasz			sbuf_printf(sb, ",");
1751220163Strasz		}
1752298414Strasz		RACCT_UNLOCK();
1753220163Strasz	}
1754220163Strasz
1755290857Strasz	loginclass_racct_foreach(rctl_get_rules_callback,
1756290857Strasz	    rctl_rule_pre_callback, rctl_rule_post_callback,
1757290857Strasz	    filter, sb);
1758290857Strasz	ui_racct_foreach(rctl_get_rules_callback,
1759290857Strasz	    rctl_rule_pre_callback, rctl_rule_post_callback,
1760290857Strasz	    filter, sb);
1761290857Strasz	prison_racct_foreach(rctl_get_rules_callback,
1762290857Strasz	    rctl_rule_pre_callback, rctl_rule_post_callback,
1763290857Strasz	    filter, sb);
1764220163Strasz	if (sbuf_error(sb) == ENOMEM) {
1765290551Strasz		error = ERANGE;
1766290551Strasz		goto out;
1767220163Strasz	}
1768220163Strasz
1769220163Strasz	/*
1770220163Strasz	 * Remove trailing ",".
1771220163Strasz	 */
1772220163Strasz	if (sbuf_len(sb) > 0)
1773220163Strasz		sbuf_setpos(sb, sbuf_len(sb) - 1);
1774220163Strasz
1775220163Strasz	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1776290551Straszout:
1777220163Strasz	rctl_rule_release(filter);
1778220163Strasz	sx_sunlock(&allproc_lock);
1779220163Strasz	free(buf, M_RCTL);
1780220163Strasz	return (error);
1781220163Strasz}
1782220163Strasz
1783220163Straszint
1784225617Skmacysys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1785220163Strasz{
1786220163Strasz	struct sbuf *sb;
1787220163Strasz	struct rctl_rule *filter;
1788220163Strasz	struct rctl_rule_link *link;
1789298045Strasz	char *inputstr, *buf;
1790298045Strasz	size_t bufsize;
1791298045Strasz	int error;
1792220163Strasz
1793282213Strasz	if (!racct_enable)
1794282213Strasz		return (ENOSYS);
1795282213Strasz
1796220163Strasz	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1797220163Strasz	if (error != 0)
1798220163Strasz		return (error);
1799220163Strasz
1800220163Strasz	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1801220163Strasz	if (error != 0)
1802220163Strasz		return (error);
1803220163Strasz
1804220163Strasz	sx_slock(&allproc_lock);
1805220163Strasz	error = rctl_string_to_rule(inputstr, &filter);
1806220163Strasz	free(inputstr, M_RCTL);
1807220163Strasz	if (error != 0) {
1808220163Strasz		sx_sunlock(&allproc_lock);
1809220163Strasz		return (error);
1810220163Strasz	}
1811220163Strasz
1812220163Strasz	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1813220163Strasz		rctl_rule_release(filter);
1814220163Strasz		sx_sunlock(&allproc_lock);
1815220163Strasz		return (EINVAL);
1816220163Strasz	}
1817220163Strasz	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1818220163Strasz		rctl_rule_release(filter);
1819220163Strasz		sx_sunlock(&allproc_lock);
1820220163Strasz		return (EOPNOTSUPP);
1821220163Strasz	}
1822220163Strasz	if (filter->rr_subject.rs_proc == NULL) {
1823220163Strasz		rctl_rule_release(filter);
1824220163Strasz		sx_sunlock(&allproc_lock);
1825220163Strasz		return (EINVAL);
1826220163Strasz	}
1827220163Strasz
1828290551Strasz	bufsize = uap->outbuflen;
1829290551Strasz	if (bufsize > rctl_maxbufsize) {
1830290551Strasz		rctl_rule_release(filter);
1831290551Strasz		sx_sunlock(&allproc_lock);
1832290551Strasz		return (E2BIG);
1833290551Strasz	}
1834290551Strasz
1835220163Strasz	buf = malloc(bufsize, M_RCTL, M_WAITOK);
1836220163Strasz	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1837220163Strasz	KASSERT(sb != NULL, ("sbuf_new failed"));
1838220163Strasz
1839298414Strasz	RACCT_LOCK();
1840220163Strasz	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1841220163Strasz	    rrl_next) {
1842220163Strasz		rctl_rule_to_sbuf(sb, link->rrl_rule);
1843220163Strasz		sbuf_printf(sb, ",");
1844220163Strasz	}
1845298414Strasz	RACCT_UNLOCK();
1846220163Strasz	if (sbuf_error(sb) == ENOMEM) {
1847290551Strasz		error = ERANGE;
1848298330Scem		sbuf_delete(sb);
1849290551Strasz		goto out;
1850220163Strasz	}
1851220163Strasz
1852220163Strasz	/*
1853220163Strasz	 * Remove trailing ",".
1854220163Strasz	 */
1855220163Strasz	if (sbuf_len(sb) > 0)
1856220163Strasz		sbuf_setpos(sb, sbuf_len(sb) - 1);
1857220163Strasz
1858220163Strasz	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1859290551Straszout:
1860220163Strasz	rctl_rule_release(filter);
1861220163Strasz	sx_sunlock(&allproc_lock);
1862220163Strasz	free(buf, M_RCTL);
1863220163Strasz	return (error);
1864220163Strasz}
1865220163Strasz
1866220163Straszint
1867225617Skmacysys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1868220163Strasz{
1869220163Strasz	struct rctl_rule *rule;
1870220163Strasz	char *inputstr;
1871298045Strasz	int error;
1872220163Strasz
1873282213Strasz	if (!racct_enable)
1874282213Strasz		return (ENOSYS);
1875282213Strasz
1876220163Strasz	error = priv_check(td, PRIV_RCTL_ADD_RULE);
1877220163Strasz	if (error != 0)
1878220163Strasz		return (error);
1879220163Strasz
1880220163Strasz	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1881220163Strasz	if (error != 0)
1882220163Strasz		return (error);
1883220163Strasz
1884220163Strasz	sx_slock(&allproc_lock);
1885220163Strasz	error = rctl_string_to_rule(inputstr, &rule);
1886220163Strasz	free(inputstr, M_RCTL);
1887220163Strasz	if (error != 0) {
1888220163Strasz		sx_sunlock(&allproc_lock);
1889220163Strasz		return (error);
1890220163Strasz	}
1891220163Strasz	/*
1892220163Strasz	 * The 'per' part of a rule is optional.
1893220163Strasz	 */
1894220163Strasz	if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1895220163Strasz	    rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1896220163Strasz		rule->rr_per = rule->rr_subject_type;
1897220163Strasz
1898220163Strasz	if (!rctl_rule_fully_specified(rule)) {
1899220163Strasz		error = EINVAL;
1900220163Strasz		goto out;
1901220163Strasz	}
1902220163Strasz
1903220163Strasz	error = rctl_rule_add(rule);
1904220163Strasz
1905220163Straszout:
1906220163Strasz	rctl_rule_release(rule);
1907220163Strasz	sx_sunlock(&allproc_lock);
1908220163Strasz	return (error);
1909220163Strasz}
1910220163Strasz
1911220163Straszint
1912225617Skmacysys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1913220163Strasz{
1914220163Strasz	struct rctl_rule *filter;
1915220163Strasz	char *inputstr;
1916298045Strasz	int error;
1917220163Strasz
1918282213Strasz	if (!racct_enable)
1919282213Strasz		return (ENOSYS);
1920282213Strasz
1921220163Strasz	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1922220163Strasz	if (error != 0)
1923220163Strasz		return (error);
1924220163Strasz
1925220163Strasz	error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1926220163Strasz	if (error != 0)
1927220163Strasz		return (error);
1928220163Strasz
1929220163Strasz	sx_slock(&allproc_lock);
1930220163Strasz	error = rctl_string_to_rule(inputstr, &filter);
1931220163Strasz	free(inputstr, M_RCTL);
1932220163Strasz	if (error != 0) {
1933220163Strasz		sx_sunlock(&allproc_lock);
1934220163Strasz		return (error);
1935220163Strasz	}
1936220163Strasz
1937220163Strasz	error = rctl_rule_remove(filter);
1938220163Strasz	rctl_rule_release(filter);
1939220163Strasz	sx_sunlock(&allproc_lock);
1940220163Strasz
1941220163Strasz	return (error);
1942220163Strasz}
1943220163Strasz
1944220163Strasz/*
1945220163Strasz * Update RCTL rule list after credential change.
1946220163Strasz */
1947220163Straszvoid
1948220163Straszrctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1949220163Strasz{
1950298045Strasz	LIST_HEAD(, rctl_rule_link) newrules;
1951220163Strasz	struct rctl_rule_link *link, *newlink;
1952220163Strasz	struct uidinfo *newuip;
1953220163Strasz	struct loginclass *newlc;
1954221362Strasz	struct prison_racct *newprr;
1955298045Strasz	int rulecnt, i;
1956220163Strasz
1957335536Savg	if (!racct_enable)
1958335536Savg		return;
1959282213Strasz
1960335536Savg	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1961335536Savg
1962220163Strasz	newuip = newcred->cr_ruidinfo;
1963220163Strasz	newlc = newcred->cr_loginclass;
1964221362Strasz	newprr = newcred->cr_prison->pr_prison_racct;
1965335536Savg
1966220163Strasz	LIST_INIT(&newrules);
1967220163Strasz
1968220163Straszagain:
1969220163Strasz	/*
1970220163Strasz	 * First, count the rules that apply to the process with new
1971220163Strasz	 * credentials.
1972220163Strasz	 */
1973220163Strasz	rulecnt = 0;
1974298414Strasz	RACCT_LOCK();
1975220163Strasz	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1976220163Strasz		if (link->rrl_rule->rr_subject_type ==
1977220163Strasz		    RCTL_SUBJECT_TYPE_PROCESS)
1978220163Strasz			rulecnt++;
1979220163Strasz	}
1980220163Strasz	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1981220163Strasz		rulecnt++;
1982220163Strasz	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1983220163Strasz		rulecnt++;
1984221362Strasz	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1985220163Strasz		rulecnt++;
1986298414Strasz	RACCT_UNLOCK();
1987220163Strasz
1988220163Strasz	/*
1989220163Strasz	 * Create temporary list.  We've dropped the rctl_lock in order
1990220163Strasz	 * to use M_WAITOK.
1991220163Strasz	 */
1992220163Strasz	for (i = 0; i < rulecnt; i++) {
1993220163Strasz		newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1994220163Strasz		newlink->rrl_rule = NULL;
1995294792Strasz		newlink->rrl_exceeded = 0;
1996220163Strasz		LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1997220163Strasz	}
1998220163Strasz
1999220163Strasz	newlink = LIST_FIRST(&newrules);
2000220163Strasz
2001220163Strasz	/*
2002220163Strasz	 * Assign rules to the newly allocated list entries.
2003220163Strasz	 */
2004298414Strasz	RACCT_LOCK();
2005220163Strasz	LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2006220163Strasz		if (link->rrl_rule->rr_subject_type ==
2007220163Strasz		    RCTL_SUBJECT_TYPE_PROCESS) {
2008220163Strasz			if (newlink == NULL)
2009220163Strasz				goto goaround;
2010220163Strasz			rctl_rule_acquire(link->rrl_rule);
2011220163Strasz			newlink->rrl_rule = link->rrl_rule;
2012294792Strasz			newlink->rrl_exceeded = link->rrl_exceeded;
2013220163Strasz			newlink = LIST_NEXT(newlink, rrl_next);
2014220163Strasz			rulecnt--;
2015220163Strasz		}
2016220163Strasz	}
2017220163Strasz
2018220163Strasz	LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2019220163Strasz		if (newlink == NULL)
2020220163Strasz			goto goaround;
2021220163Strasz		rctl_rule_acquire(link->rrl_rule);
2022220163Strasz		newlink->rrl_rule = link->rrl_rule;
2023294792Strasz		newlink->rrl_exceeded = link->rrl_exceeded;
2024220163Strasz		newlink = LIST_NEXT(newlink, rrl_next);
2025220163Strasz		rulecnt--;
2026220163Strasz	}
2027220163Strasz
2028220163Strasz	LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2029220163Strasz		if (newlink == NULL)
2030220163Strasz			goto goaround;
2031220163Strasz		rctl_rule_acquire(link->rrl_rule);
2032220163Strasz		newlink->rrl_rule = link->rrl_rule;
2033294792Strasz		newlink->rrl_exceeded = link->rrl_exceeded;
2034220163Strasz		newlink = LIST_NEXT(newlink, rrl_next);
2035220163Strasz		rulecnt--;
2036220163Strasz	}
2037220163Strasz
2038221362Strasz	LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2039220163Strasz		if (newlink == NULL)
2040220163Strasz			goto goaround;
2041220163Strasz		rctl_rule_acquire(link->rrl_rule);
2042220163Strasz		newlink->rrl_rule = link->rrl_rule;
2043294792Strasz		newlink->rrl_exceeded = link->rrl_exceeded;
2044220163Strasz		newlink = LIST_NEXT(newlink, rrl_next);
2045220163Strasz		rulecnt--;
2046220163Strasz	}
2047220163Strasz
2048220163Strasz	if (rulecnt == 0) {
2049220163Strasz		/*
2050220163Strasz		 * Free the old rule list.
2051220163Strasz		 */
2052220163Strasz		while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2053220163Strasz			link = LIST_FIRST(&p->p_racct->r_rule_links);
2054220163Strasz			LIST_REMOVE(link, rrl_next);
2055220163Strasz			rctl_rule_release(link->rrl_rule);
2056220163Strasz			uma_zfree(rctl_rule_link_zone, link);
2057220163Strasz		}
2058220163Strasz
2059220163Strasz		/*
2060220163Strasz		 * Replace lists and we're done.
2061220163Strasz		 *
2062220163Strasz		 * XXX: Is there any way to switch list heads instead
2063220163Strasz		 *      of iterating here?
2064220163Strasz		 */
2065220163Strasz		while (!LIST_EMPTY(&newrules)) {
2066220163Strasz			newlink = LIST_FIRST(&newrules);
2067220163Strasz			LIST_REMOVE(newlink, rrl_next);
2068220163Strasz			LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2069220163Strasz			    newlink, rrl_next);
2070220163Strasz		}
2071220163Strasz
2072298414Strasz		RACCT_UNLOCK();
2073220163Strasz
2074220163Strasz		return;
2075220163Strasz	}
2076220163Strasz
2077220163Straszgoaround:
2078298414Strasz	RACCT_UNLOCK();
2079220163Strasz
2080220163Strasz	/*
2081220163Strasz	 * Rule list changed while we were not holding the rctl_lock.
2082220163Strasz	 * Free the new list and try again.
2083220163Strasz	 */
2084220163Strasz	while (!LIST_EMPTY(&newrules)) {
2085220163Strasz		newlink = LIST_FIRST(&newrules);
2086220163Strasz		LIST_REMOVE(newlink, rrl_next);
2087220163Strasz		if (newlink->rrl_rule != NULL)
2088220163Strasz			rctl_rule_release(newlink->rrl_rule);
2089220163Strasz		uma_zfree(rctl_rule_link_zone, newlink);
2090220163Strasz	}
2091220163Strasz
2092220163Strasz	goto again;
2093220163Strasz}
2094220163Strasz
2095220163Strasz/*
2096220163Strasz * Assign RCTL rules to the newly created process.
2097220163Strasz */
2098220163Straszint
2099220163Straszrctl_proc_fork(struct proc *parent, struct proc *child)
2100220163Strasz{
2101298045Strasz	struct rctl_rule *rule;
2102298045Strasz	struct rctl_rule_link *link;
2103220163Strasz	int error;
2104220163Strasz
2105282213Strasz	ASSERT_RACCT_ENABLED();
2106298414Strasz	RACCT_LOCK_ASSERT();
2107234383Strasz	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2108220163Strasz
2109298414Strasz	LIST_INIT(&child->p_racct->r_rule_links);
2110220163Strasz
2111220163Strasz	/*
2112220163Strasz	 * Go through limits applicable to the parent and assign them
2113220163Strasz	 * to the child.  Rules with 'process' subject have to be duplicated
2114220163Strasz	 * in order to make their rr_subject point to the new process.
2115220163Strasz	 */
2116220163Strasz	LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2117220163Strasz		if (link->rrl_rule->rr_subject_type ==
2118220163Strasz		    RCTL_SUBJECT_TYPE_PROCESS) {
2119220163Strasz			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2120220163Strasz			if (rule == NULL)
2121220163Strasz				goto fail;
2122220163Strasz			KASSERT(rule->rr_subject.rs_proc == parent,
2123220163Strasz			    ("rule->rr_subject.rs_proc != parent"));
2124220163Strasz			rule->rr_subject.rs_proc = child;
2125220163Strasz			error = rctl_racct_add_rule_locked(child->p_racct,
2126220163Strasz			    rule);
2127220163Strasz			rctl_rule_release(rule);
2128220163Strasz			if (error != 0)
2129220163Strasz				goto fail;
2130220163Strasz		} else {
2131220163Strasz			error = rctl_racct_add_rule_locked(child->p_racct,
2132220163Strasz			    link->rrl_rule);
2133220163Strasz			if (error != 0)
2134220163Strasz				goto fail;
2135220163Strasz		}
2136220163Strasz	}
2137220163Strasz
2138220163Strasz	return (0);
2139220163Strasz
2140220163Straszfail:
2141220163Strasz	while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2142220163Strasz		link = LIST_FIRST(&child->p_racct->r_rule_links);
2143220163Strasz		LIST_REMOVE(link, rrl_next);
2144220163Strasz		rctl_rule_release(link->rrl_rule);
2145220163Strasz		uma_zfree(rctl_rule_link_zone, link);
2146220163Strasz	}
2147298414Strasz
2148220163Strasz	return (EAGAIN);
2149220163Strasz}
2150220163Strasz
2151220163Strasz/*
2152220163Strasz * Release rules attached to the racct.
2153220163Strasz */
2154220163Straszvoid
2155220163Straszrctl_racct_release(struct racct *racct)
2156220163Strasz{
2157220163Strasz	struct rctl_rule_link *link;
2158220163Strasz
2159282213Strasz	ASSERT_RACCT_ENABLED();
2160298414Strasz	RACCT_LOCK_ASSERT();
2161282213Strasz
2162220163Strasz	while (!LIST_EMPTY(&racct->r_rule_links)) {
2163220163Strasz		link = LIST_FIRST(&racct->r_rule_links);
2164220163Strasz		LIST_REMOVE(link, rrl_next);
2165220163Strasz		rctl_rule_release(link->rrl_rule);
2166220163Strasz		uma_zfree(rctl_rule_link_zone, link);
2167220163Strasz	}
2168220163Strasz}
2169220163Strasz
2170220163Straszstatic void
2171220163Straszrctl_init(void)
2172220163Strasz{
2173220163Strasz
2174282213Strasz	if (!racct_enable)
2175282213Strasz		return;
2176282213Strasz
2177298045Strasz	rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2178298050Strasz	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2179220163Strasz	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2180220163Strasz	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2181298050Strasz	    UMA_ALIGN_PTR, 0);
2182297633Strasz
2183297722Strasz	/*
2184297722Strasz	 * Set default values, making sure not to overwrite the ones
2185297722Strasz	 * fetched from tunables.  Most of those could be set at the
2186297722Strasz	 * declaration, except for the rctl_throttle_max - we cannot
2187297722Strasz	 * set it there due to hz not being compile time constant.
2188297722Strasz	 */
2189297722Strasz	if (rctl_throttle_min < 1)
2190297633Strasz		rctl_throttle_min = 1;
2191297722Strasz	if (rctl_throttle_max < rctl_throttle_min)
2192297633Strasz		rctl_throttle_max = 2 * hz;
2193297722Strasz	if (rctl_throttle_pct < 0)
2194297633Strasz		rctl_throttle_pct = 100;
2195297722Strasz	if (rctl_throttle_pct2 < 0)
2196297633Strasz		rctl_throttle_pct2 = 100;
2197220163Strasz}
2198220163Strasz
2199220163Strasz#else /* !RCTL */
2200220163Strasz
2201220163Straszint
2202225617Skmacysys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2203220163Strasz{
2204220163Strasz
2205220163Strasz	return (ENOSYS);
2206220163Strasz}
2207220163Strasz
2208220163Straszint
2209225617Skmacysys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2210220163Strasz{
2211220163Strasz
2212220163Strasz	return (ENOSYS);
2213220163Strasz}
2214220163Strasz
2215220163Straszint
2216225617Skmacysys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2217220163Strasz{
2218220163Strasz
2219220163Strasz	return (ENOSYS);
2220220163Strasz}
2221220163Strasz
2222220163Straszint
2223225617Skmacysys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2224220163Strasz{
2225220163Strasz
2226220163Strasz	return (ENOSYS);
2227220163Strasz}
2228220163Strasz
2229220163Straszint
2230225617Skmacysys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
2231220163Strasz{
2232220163Strasz
2233220163Strasz	return (ENOSYS);
2234220163Strasz}
2235220163Strasz
2236220163Strasz#endif /* !RCTL */
2237