kern_racct.c revision 225981
1220137Strasz/*-
2220137Strasz * Copyright (c) 2010 The FreeBSD Foundation
3220137Strasz * All rights reserved.
4220137Strasz *
5220137Strasz * This software was developed by Edward Tomasz Napierala under sponsorship
6220137Strasz * from the FreeBSD Foundation.
7220137Strasz *
8220137Strasz * Redistribution and use in source and binary forms, with or without
9220137Strasz * modification, are permitted provided that the following conditions
10220137Strasz * are met:
11220137Strasz * 1. Redistributions of source code must retain the above copyright
12220137Strasz *    notice, this list of conditions and the following disclaimer.
13220137Strasz * 2. Redistributions in binary form must reproduce the above copyright
14220137Strasz *    notice, this list of conditions and the following disclaimer in the
15220137Strasz *    documentation and/or other materials provided with the distribution.
16220137Strasz *
17220137Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18220137Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19220137Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20220137Strasz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21220137Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22220137Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23220137Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24220137Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25220137Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26220137Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27220137Strasz * SUCH DAMAGE.
28220137Strasz *
29220137Strasz * $FreeBSD: head/sys/kern/kern_racct.c 225981 2011-10-04 14:56:33Z trasz $
30220137Strasz */
31220137Strasz
32220137Strasz#include <sys/cdefs.h>
33220137Strasz__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 225981 2011-10-04 14:56:33Z trasz $");
34220137Strasz
35220137Strasz#include "opt_kdtrace.h"
36220137Strasz
37220137Strasz#include <sys/param.h>
38220137Strasz#include <sys/eventhandler.h>
39220137Strasz#include <sys/param.h>
40220137Strasz#include <sys/jail.h>
41220137Strasz#include <sys/kernel.h>
42220137Strasz#include <sys/kthread.h>
43220137Strasz#include <sys/lock.h>
44220137Strasz#include <sys/loginclass.h>
45220137Strasz#include <sys/malloc.h>
46220137Strasz#include <sys/mutex.h>
47220137Strasz#include <sys/proc.h>
48220137Strasz#include <sys/racct.h>
49220137Strasz#include <sys/resourcevar.h>
50220137Strasz#include <sys/sbuf.h>
51220137Strasz#include <sys/sched.h>
52220137Strasz#include <sys/sdt.h>
53220137Strasz#include <sys/sx.h>
54220137Strasz#include <sys/sysent.h>
55220137Strasz#include <sys/sysproto.h>
56220137Strasz#include <sys/systm.h>
57220137Strasz#include <sys/umtx.h>
58220137Strasz
59220137Strasz#ifdef RCTL
60220137Strasz#include <sys/rctl.h>
61220137Strasz#endif
62220137Strasz
63220137Strasz#ifdef RACCT
64220137Strasz
65220137StraszFEATURE(racct, "Resource Accounting");
66220137Strasz
67220137Straszstatic struct mtx racct_lock;
68220137StraszMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
69220137Strasz
70220137Straszstatic uma_zone_t racct_zone;
71220137Strasz
72220137Straszstatic void racct_sub_racct(struct racct *dest, const struct racct *src);
73220137Straszstatic void racct_sub_cred_locked(struct ucred *cred, int resource,
74220137Strasz		uint64_t amount);
75220137Straszstatic void racct_add_cred_locked(struct ucred *cred, int resource,
76220137Strasz		uint64_t amount);
77220137Strasz
78220137StraszSDT_PROVIDER_DEFINE(racct);
79220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
80220137Strasz    "uint64_t");
81220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
82220137Strasz    "struct proc *", "int", "uint64_t");
83220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
84220137Strasz    "int", "uint64_t");
85220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
86220137Strasz    "int", "uint64_t");
87220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
88220137Strasz    "uint64_t");
89220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
90220137Strasz    "struct proc *", "int", "uint64_t");
91220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
92220137Strasz    "uint64_t");
93220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
94220137Strasz    "int", "uint64_t");
95220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
96220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
97220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
98220137Strasz    "struct racct *");
99220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
100220137Strasz    "struct racct *", "struct racct *");
101220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
102220137Strasz    "struct racct *");
103220137Strasz
104220137Straszint racct_types[] = {
105220137Strasz	[RACCT_CPU] =
106224036Strasz		RACCT_IN_MILLIONS,
107220137Strasz	[RACCT_DATA] =
108220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
109220137Strasz	[RACCT_STACK] =
110220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
111220137Strasz	[RACCT_CORE] =
112220137Strasz		RACCT_DENIABLE,
113220137Strasz	[RACCT_RSS] =
114220137Strasz		RACCT_RECLAIMABLE,
115220137Strasz	[RACCT_MEMLOCK] =
116220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
117220137Strasz	[RACCT_NPROC] =
118220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
119220137Strasz	[RACCT_NOFILE] =
120220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
121220137Strasz	[RACCT_VMEM] =
122220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
123220137Strasz	[RACCT_NPTS] =
124220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
125220137Strasz	[RACCT_SWAP] =
126220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
127220137Strasz	[RACCT_NTHR] =
128220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
129220137Strasz	[RACCT_MSGQQUEUED] =
130220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
131220137Strasz	[RACCT_MSGQSIZE] =
132220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
133220137Strasz	[RACCT_NMSGQ] =
134220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
135220137Strasz	[RACCT_NSEM] =
136220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
137220137Strasz	[RACCT_NSEMOP] =
138220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
139220137Strasz	[RACCT_NSHM] =
140220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
141220137Strasz	[RACCT_SHMSIZE] =
142220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
143220137Strasz	[RACCT_WALLCLOCK] =
144224036Strasz		RACCT_IN_MILLIONS };
145220137Strasz
146220137Straszstatic void
147220137Straszracct_add_racct(struct racct *dest, const struct racct *src)
148220137Strasz{
149220137Strasz	int i;
150220137Strasz
151220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
152220137Strasz
153220137Strasz	/*
154220137Strasz	 * Update resource usage in dest.
155220137Strasz	 */
156220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
157220137Strasz		KASSERT(dest->r_resources[i] >= 0,
158220137Strasz		    ("racct propagation meltdown: dest < 0"));
159220137Strasz		KASSERT(src->r_resources[i] >= 0,
160220137Strasz		    ("racct propagation meltdown: src < 0"));
161220137Strasz		dest->r_resources[i] += src->r_resources[i];
162220137Strasz	}
163220137Strasz}
164220137Strasz
165220137Straszstatic void
166220137Straszracct_sub_racct(struct racct *dest, const struct racct *src)
167220137Strasz{
168220137Strasz	int i;
169220137Strasz
170220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
171220137Strasz
172220137Strasz	/*
173220137Strasz	 * Update resource usage in dest.
174220137Strasz	 */
175220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
176223844Strasz		if (!RACCT_IS_SLOPPY(i)) {
177220137Strasz			KASSERT(dest->r_resources[i] >= 0,
178220137Strasz			    ("racct propagation meltdown: dest < 0"));
179220137Strasz			KASSERT(src->r_resources[i] >= 0,
180220137Strasz			    ("racct propagation meltdown: src < 0"));
181220137Strasz			KASSERT(src->r_resources[i] <= dest->r_resources[i],
182220137Strasz			    ("racct propagation meltdown: src > dest"));
183220137Strasz		}
184223844Strasz		if (RACCT_IS_RECLAIMABLE(i)) {
185220137Strasz			dest->r_resources[i] -= src->r_resources[i];
186220137Strasz			if (dest->r_resources[i] < 0) {
187223844Strasz				KASSERT(RACCT_IS_SLOPPY(i),
188220137Strasz				    ("racct_sub_racct: usage < 0"));
189220137Strasz				dest->r_resources[i] = 0;
190220137Strasz			}
191220137Strasz		}
192220137Strasz	}
193220137Strasz}
194220137Strasz
195220137Straszvoid
196220137Straszracct_create(struct racct **racctp)
197220137Strasz{
198220137Strasz
199220137Strasz	SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
200220137Strasz
201220137Strasz	KASSERT(*racctp == NULL, ("racct already allocated"));
202220137Strasz
203220137Strasz	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
204220137Strasz}
205220137Strasz
206220137Straszstatic void
207220137Straszracct_destroy_locked(struct racct **racctp)
208220137Strasz{
209220137Strasz	int i;
210220137Strasz	struct racct *racct;
211220137Strasz
212220137Strasz	SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
213220137Strasz
214220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
215220137Strasz	KASSERT(racctp != NULL, ("NULL racctp"));
216220137Strasz	KASSERT(*racctp != NULL, ("NULL racct"));
217220137Strasz
218220137Strasz	racct = *racctp;
219220137Strasz
220220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
221223844Strasz		if (RACCT_IS_SLOPPY(i))
222220137Strasz			continue;
223223844Strasz		if (!RACCT_IS_RECLAIMABLE(i))
224220137Strasz			continue;
225220137Strasz		KASSERT(racct->r_resources[i] == 0,
226220137Strasz		    ("destroying non-empty racct: "
227220137Strasz		    "%ju allocated for resource %d\n",
228220137Strasz		    racct->r_resources[i], i));
229220137Strasz	}
230220137Strasz	uma_zfree(racct_zone, racct);
231220137Strasz	*racctp = NULL;
232220137Strasz}
233220137Strasz
234220137Straszvoid
235220137Straszracct_destroy(struct racct **racct)
236220137Strasz{
237220137Strasz
238220137Strasz	mtx_lock(&racct_lock);
239220137Strasz	racct_destroy_locked(racct);
240220137Strasz	mtx_unlock(&racct_lock);
241220137Strasz}
242220137Strasz
243220137Strasz/*
244220137Strasz * Increase consumption of 'resource' by 'amount' for 'racct'
245220137Strasz * and all its parents.  Differently from other cases, 'amount' here
246220137Strasz * may be less than zero.
247220137Strasz */
248220137Straszstatic void
249220137Straszracct_alloc_resource(struct racct *racct, int resource,
250220137Strasz    uint64_t amount)
251220137Strasz{
252220137Strasz
253220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
254220137Strasz	KASSERT(racct != NULL, ("NULL racct"));
255220137Strasz
256220137Strasz	racct->r_resources[resource] += amount;
257220137Strasz	if (racct->r_resources[resource] < 0) {
258223844Strasz		KASSERT(RACCT_IS_SLOPPY(resource),
259220137Strasz		    ("racct_alloc_resource: usage < 0"));
260220137Strasz		racct->r_resources[resource] = 0;
261220137Strasz	}
262220137Strasz}
263220137Strasz
264225944Straszstatic int
265225944Straszracct_add_locked(struct proc *p, int resource, uint64_t amount)
266220137Strasz{
267220137Strasz#ifdef RCTL
268220137Strasz	int error;
269220137Strasz#endif
270220137Strasz
271220137Strasz	if (p->p_flag & P_SYSTEM)
272220137Strasz		return (0);
273220137Strasz
274220137Strasz	SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
275220137Strasz
276220137Strasz	/*
277220137Strasz	 * We need proc lock to dereference p->p_ucred.
278220137Strasz	 */
279220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
280220137Strasz
281220137Strasz#ifdef RCTL
282220137Strasz	error = rctl_enforce(p, resource, amount);
283223844Strasz	if (error && RACCT_IS_DENIABLE(resource)) {
284220137Strasz		SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
285220137Strasz		    amount, 0, 0);
286220137Strasz		return (error);
287220137Strasz	}
288220137Strasz#endif
289220137Strasz	racct_alloc_resource(p->p_racct, resource, amount);
290220137Strasz	racct_add_cred_locked(p->p_ucred, resource, amount);
291220137Strasz
292220137Strasz	return (0);
293220137Strasz}
294220137Strasz
295225944Strasz/*
296225944Strasz * Increase allocation of 'resource' by 'amount' for process 'p'.
297225944Strasz * Return 0 if it's below limits, or errno, if it's not.
298225944Strasz */
299225944Straszint
300225944Straszracct_add(struct proc *p, int resource, uint64_t amount)
301225944Strasz{
302225944Strasz	int error;
303225944Strasz
304225944Strasz	mtx_lock(&racct_lock);
305225944Strasz	error = racct_add_locked(p, resource, amount);
306225944Strasz	mtx_unlock(&racct_lock);
307225944Strasz	return (error);
308225944Strasz}
309225944Strasz
310220137Straszstatic void
311220137Straszracct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
312220137Strasz{
313220137Strasz	struct prison *pr;
314220137Strasz
315220137Strasz	SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
316220137Strasz	    0, 0);
317220137Strasz
318220137Strasz	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
319220137Strasz	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
320221362Strasz		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
321221362Strasz		    amount);
322220137Strasz	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
323220137Strasz}
324220137Strasz
325220137Strasz/*
326220137Strasz * Increase allocation of 'resource' by 'amount' for credential 'cred'.
327220137Strasz * Doesn't check for limits and never fails.
328220137Strasz *
329220137Strasz * XXX: Shouldn't this ever return an error?
330220137Strasz */
331220137Straszvoid
332220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount)
333220137Strasz{
334220137Strasz
335220137Strasz	mtx_lock(&racct_lock);
336220137Strasz	racct_add_cred_locked(cred, resource, amount);
337220137Strasz	mtx_unlock(&racct_lock);
338220137Strasz}
339220137Strasz
340220137Strasz/*
341220137Strasz * Increase allocation of 'resource' by 'amount' for process 'p'.
342220137Strasz * Doesn't check for limits and never fails.
343220137Strasz */
344220137Straszvoid
345220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount)
346220137Strasz{
347220137Strasz
348220137Strasz	if (p->p_flag & P_SYSTEM)
349220137Strasz		return;
350220137Strasz
351220137Strasz	SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
352220137Strasz
353220137Strasz	/*
354220137Strasz	 * We need proc lock to dereference p->p_ucred.
355220137Strasz	 */
356220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
357220137Strasz
358220137Strasz	mtx_lock(&racct_lock);
359220137Strasz	racct_alloc_resource(p->p_racct, resource, amount);
360220137Strasz	mtx_unlock(&racct_lock);
361220137Strasz	racct_add_cred(p->p_ucred, resource, amount);
362220137Strasz}
363220137Strasz
364220137Straszstatic int
365220137Straszracct_set_locked(struct proc *p, int resource, uint64_t amount)
366220137Strasz{
367220137Strasz	int64_t diff;
368220137Strasz#ifdef RCTL
369220137Strasz	int error;
370220137Strasz#endif
371220137Strasz
372220137Strasz	if (p->p_flag & P_SYSTEM)
373220137Strasz		return (0);
374220137Strasz
375220137Strasz	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
376220137Strasz
377220137Strasz	/*
378220137Strasz	 * We need proc lock to dereference p->p_ucred.
379220137Strasz	 */
380220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
381220137Strasz
382220137Strasz	diff = amount - p->p_racct->r_resources[resource];
383220137Strasz#ifdef notyet
384223844Strasz	KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
385220137Strasz	    ("racct_set: usage of non-reclaimable resource %d dropping",
386220137Strasz	     resource));
387220137Strasz#endif
388220137Strasz#ifdef RCTL
389220137Strasz	if (diff > 0) {
390220137Strasz		error = rctl_enforce(p, resource, diff);
391223844Strasz		if (error && RACCT_IS_DENIABLE(resource)) {
392220137Strasz			SDT_PROBE(racct, kernel, rusage, set_failure, p,
393220137Strasz			    resource, amount, 0, 0);
394220137Strasz			return (error);
395220137Strasz		}
396220137Strasz	}
397220137Strasz#endif
398220137Strasz	racct_alloc_resource(p->p_racct, resource, diff);
399220137Strasz	if (diff > 0)
400220137Strasz		racct_add_cred_locked(p->p_ucred, resource, diff);
401220137Strasz	else if (diff < 0)
402220137Strasz		racct_sub_cred_locked(p->p_ucred, resource, -diff);
403220137Strasz
404220137Strasz	return (0);
405220137Strasz}
406220137Strasz
407220137Strasz/*
408220137Strasz * Set allocation of 'resource' to 'amount' for process 'p'.
409220137Strasz * Return 0 if it's below limits, or errno, if it's not.
410220137Strasz *
411220137Strasz * Note that decreasing the allocation always returns 0,
412220137Strasz * even if it's above the limit.
413220137Strasz */
414220137Straszint
415220137Straszracct_set(struct proc *p, int resource, uint64_t amount)
416220137Strasz{
417220137Strasz	int error;
418220137Strasz
419220137Strasz	mtx_lock(&racct_lock);
420220137Strasz	error = racct_set_locked(p, resource, amount);
421220137Strasz	mtx_unlock(&racct_lock);
422220137Strasz	return (error);
423220137Strasz}
424220137Strasz
425220137Straszvoid
426220137Straszracct_set_force(struct proc *p, int resource, uint64_t amount)
427220137Strasz{
428220137Strasz	int64_t diff;
429220137Strasz
430220137Strasz	if (p->p_flag & P_SYSTEM)
431220137Strasz		return;
432220137Strasz
433220137Strasz	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
434220137Strasz
435220137Strasz	/*
436220137Strasz	 * We need proc lock to dereference p->p_ucred.
437220137Strasz	 */
438220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
439220137Strasz
440220137Strasz	mtx_lock(&racct_lock);
441220137Strasz	diff = amount - p->p_racct->r_resources[resource];
442220137Strasz	racct_alloc_resource(p->p_racct, resource, diff);
443220137Strasz	if (diff > 0)
444220137Strasz		racct_add_cred_locked(p->p_ucred, resource, diff);
445220137Strasz	else if (diff < 0)
446220137Strasz		racct_sub_cred_locked(p->p_ucred, resource, -diff);
447220137Strasz	mtx_unlock(&racct_lock);
448220137Strasz}
449220137Strasz
450220137Strasz/*
451220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated.
452220137Strasz * Allocating more than that would be denied, unless the resource
453220137Strasz * is marked undeniable.  Amount of already allocated resource does
454220137Strasz * not matter.
455220137Strasz */
456220137Straszuint64_t
457220137Straszracct_get_limit(struct proc *p, int resource)
458220137Strasz{
459220137Strasz
460220137Strasz#ifdef RCTL
461220137Strasz	return (rctl_get_limit(p, resource));
462220137Strasz#else
463220137Strasz	return (UINT64_MAX);
464220137Strasz#endif
465220137Strasz}
466220137Strasz
467220137Strasz/*
468220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated.
469220137Strasz * Allocating more than that would be denied, unless the resource
470220137Strasz * is marked undeniable.  Amount of already allocated resource does
471220137Strasz * matter.
472220137Strasz */
473220137Straszuint64_t
474220137Straszracct_get_available(struct proc *p, int resource)
475220137Strasz{
476220137Strasz
477220137Strasz#ifdef RCTL
478220137Strasz	return (rctl_get_available(p, resource));
479220137Strasz#else
480220137Strasz	return (UINT64_MAX);
481220137Strasz#endif
482220137Strasz}
483220137Strasz
484220137Strasz/*
485220137Strasz * Decrease allocation of 'resource' by 'amount' for process 'p'.
486220137Strasz */
487220137Straszvoid
488220137Straszracct_sub(struct proc *p, int resource, uint64_t amount)
489220137Strasz{
490220137Strasz
491220137Strasz	if (p->p_flag & P_SYSTEM)
492220137Strasz		return;
493220137Strasz
494220137Strasz	SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
495220137Strasz
496220137Strasz	/*
497220137Strasz	 * We need proc lock to dereference p->p_ucred.
498220137Strasz	 */
499220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
500223844Strasz	KASSERT(RACCT_IS_RECLAIMABLE(resource),
501220137Strasz	    ("racct_sub: called for non-reclaimable resource %d", resource));
502220137Strasz
503220137Strasz	mtx_lock(&racct_lock);
504220137Strasz	KASSERT(amount <= p->p_racct->r_resources[resource],
505220137Strasz	    ("racct_sub: freeing %ju of resource %d, which is more "
506220137Strasz	     "than allocated %jd for %s (pid %d)", amount, resource,
507220137Strasz	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
508220137Strasz
509220137Strasz	racct_alloc_resource(p->p_racct, resource, -amount);
510220137Strasz	racct_sub_cred_locked(p->p_ucred, resource, amount);
511220137Strasz	mtx_unlock(&racct_lock);
512220137Strasz}
513220137Strasz
514220137Straszstatic void
515220137Straszracct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
516220137Strasz{
517220137Strasz	struct prison *pr;
518220137Strasz
519220137Strasz	SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
520220137Strasz	    0, 0);
521220137Strasz
522220137Strasz#ifdef notyet
523223844Strasz	KASSERT(RACCT_IS_RECLAIMABLE(resource),
524220137Strasz	    ("racct_sub_cred: called for non-reclaimable resource %d",
525220137Strasz	     resource));
526220137Strasz#endif
527220137Strasz
528220137Strasz	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
529220137Strasz	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
530221362Strasz		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
531221362Strasz		    -amount);
532220137Strasz	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
533220137Strasz}
534220137Strasz
535220137Strasz/*
536220137Strasz * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
537220137Strasz */
538220137Straszvoid
539220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
540220137Strasz{
541220137Strasz
542220137Strasz	mtx_lock(&racct_lock);
543220137Strasz	racct_sub_cred_locked(cred, resource, amount);
544220137Strasz	mtx_unlock(&racct_lock);
545220137Strasz}
546220137Strasz
547220137Strasz/*
548220137Strasz * Inherit resource usage information from the parent process.
549220137Strasz */
550220137Straszint
551220137Straszracct_proc_fork(struct proc *parent, struct proc *child)
552220137Strasz{
553220137Strasz	int i, error = 0;
554220137Strasz
555220137Strasz	/*
556220137Strasz	 * Create racct for the child process.
557220137Strasz	 */
558220137Strasz	racct_create(&child->p_racct);
559220137Strasz
560220137Strasz	/*
561220137Strasz	 * No resource accounting for kernel processes.
562220137Strasz	 */
563220137Strasz	if (child->p_flag & P_SYSTEM)
564220137Strasz		return (0);
565220137Strasz
566220137Strasz	PROC_LOCK(parent);
567220137Strasz	PROC_LOCK(child);
568220137Strasz	mtx_lock(&racct_lock);
569220137Strasz
570225981Strasz#ifdef RCTL
571225981Strasz	error = rctl_proc_fork(parent, child);
572225981Strasz	if (error != 0)
573225981Strasz		goto out;
574225981Strasz#endif
575225981Strasz
576220137Strasz	/*
577220137Strasz	 * Inherit resource usage.
578220137Strasz	 */
579220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
580220137Strasz		if (parent->p_racct->r_resources[i] == 0 ||
581223844Strasz		    !RACCT_IS_INHERITABLE(i))
582220137Strasz			continue;
583220137Strasz
584220137Strasz		error = racct_set_locked(child, i,
585220137Strasz		    parent->p_racct->r_resources[i]);
586225938Strasz		if (error != 0)
587220137Strasz			goto out;
588220137Strasz	}
589220137Strasz
590225944Strasz	error = racct_add_locked(child, RACCT_NPROC, 1);
591225944Strasz	error += racct_add_locked(child, RACCT_NTHR, 1);
592225944Strasz
593220137Straszout:
594220137Strasz	mtx_unlock(&racct_lock);
595220137Strasz	PROC_UNLOCK(child);
596220137Strasz	PROC_UNLOCK(parent);
597220137Strasz
598220137Strasz	return (error);
599220137Strasz}
600220137Strasz
601225940Strasz/*
602225940Strasz * Called at the end of fork1(), to handle rules that require the process
603225940Strasz * to be fully initialized.
604225940Strasz */
605220137Straszvoid
606225940Straszracct_proc_fork_done(struct proc *child)
607225940Strasz{
608225940Strasz
609225940Strasz#ifdef RCTL
610225940Strasz	PROC_LOCK(child);
611225940Strasz	mtx_lock(&racct_lock);
612225940Strasz	rctl_enforce(child, RACCT_NPROC, 0);
613225940Strasz	rctl_enforce(child, RACCT_NTHR, 0);
614225940Strasz	mtx_unlock(&racct_lock);
615225940Strasz	PROC_UNLOCK(child);
616225940Strasz#endif
617225940Strasz}
618225940Strasz
619225940Straszvoid
620220137Straszracct_proc_exit(struct proc *p)
621220137Strasz{
622225364Strasz	int i;
623220137Strasz	uint64_t runtime;
624220137Strasz
625220137Strasz	PROC_LOCK(p);
626220137Strasz	/*
627220137Strasz	 * We don't need to calculate rux, proc_reap() has already done this.
628220137Strasz	 */
629220137Strasz	runtime = cputick2usec(p->p_rux.rux_runtime);
630220137Strasz#ifdef notyet
631220137Strasz	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
632220137Strasz#else
633220137Strasz	if (runtime < p->p_prev_runtime)
634220137Strasz		runtime = p->p_prev_runtime;
635220137Strasz#endif
636225364Strasz	mtx_lock(&racct_lock);
637225364Strasz	racct_set_locked(p, RACCT_CPU, runtime);
638220137Strasz
639225364Strasz	for (i = 0; i <= RACCT_MAX; i++) {
640225364Strasz		if (p->p_racct->r_resources[i] == 0)
641225364Strasz			continue;
642225364Strasz	    	if (!RACCT_IS_RECLAIMABLE(i))
643225364Strasz			continue;
644225364Strasz		racct_set_locked(p, i, 0);
645225364Strasz	}
646225364Strasz
647225364Strasz	mtx_unlock(&racct_lock);
648220137Strasz	PROC_UNLOCK(p);
649220137Strasz
650220137Strasz#ifdef RCTL
651220137Strasz	rctl_racct_release(p->p_racct);
652220137Strasz#endif
653220137Strasz	racct_destroy(&p->p_racct);
654220137Strasz}
655220137Strasz
656220137Strasz/*
657220137Strasz * Called after credentials change, to move resource utilisation
658220137Strasz * between raccts.
659220137Strasz */
660220137Straszvoid
661220137Straszracct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
662220137Strasz    struct ucred *newcred)
663220137Strasz{
664220137Strasz	struct uidinfo *olduip, *newuip;
665220137Strasz	struct loginclass *oldlc, *newlc;
666220137Strasz	struct prison *oldpr, *newpr, *pr;
667220137Strasz
668220137Strasz	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
669220137Strasz
670220137Strasz	newuip = newcred->cr_ruidinfo;
671220137Strasz	olduip = oldcred->cr_ruidinfo;
672220137Strasz	newlc = newcred->cr_loginclass;
673220137Strasz	oldlc = oldcred->cr_loginclass;
674220137Strasz	newpr = newcred->cr_prison;
675220137Strasz	oldpr = oldcred->cr_prison;
676220137Strasz
677220137Strasz	mtx_lock(&racct_lock);
678220137Strasz	if (newuip != olduip) {
679220137Strasz		racct_sub_racct(olduip->ui_racct, p->p_racct);
680220137Strasz		racct_add_racct(newuip->ui_racct, p->p_racct);
681220137Strasz	}
682220137Strasz	if (newlc != oldlc) {
683220137Strasz		racct_sub_racct(oldlc->lc_racct, p->p_racct);
684220137Strasz		racct_add_racct(newlc->lc_racct, p->p_racct);
685220137Strasz	}
686220137Strasz	if (newpr != oldpr) {
687220137Strasz		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
688221362Strasz			racct_sub_racct(pr->pr_prison_racct->prr_racct,
689221362Strasz			    p->p_racct);
690220137Strasz		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
691221362Strasz			racct_add_racct(pr->pr_prison_racct->prr_racct,
692221362Strasz			    p->p_racct);
693220137Strasz	}
694220137Strasz	mtx_unlock(&racct_lock);
695220137Strasz
696220137Strasz#ifdef RCTL
697220137Strasz	rctl_proc_ucred_changed(p, newcred);
698220137Strasz#endif
699220137Strasz}
700220137Strasz
701220137Straszstatic void
702220137Straszracctd(void)
703220137Strasz{
704220137Strasz	struct thread *td;
705220137Strasz	struct proc *p;
706220137Strasz	struct timeval wallclock;
707220137Strasz	uint64_t runtime;
708220137Strasz
709220137Strasz	for (;;) {
710220137Strasz		sx_slock(&allproc_lock);
711220137Strasz
712220137Strasz		FOREACH_PROC_IN_SYSTEM(p) {
713220137Strasz			if (p->p_state != PRS_NORMAL)
714220137Strasz				continue;
715220137Strasz			if (p->p_flag & P_SYSTEM)
716220137Strasz				continue;
717220137Strasz
718220137Strasz			microuptime(&wallclock);
719220137Strasz			timevalsub(&wallclock, &p->p_stats->p_start);
720220137Strasz			PROC_LOCK(p);
721220137Strasz			PROC_SLOCK(p);
722220137Strasz			FOREACH_THREAD_IN_PROC(p, td) {
723220137Strasz				ruxagg(p, td);
724220137Strasz				thread_lock(td);
725220137Strasz				thread_unlock(td);
726220137Strasz			}
727220137Strasz			runtime = cputick2usec(p->p_rux.rux_runtime);
728220137Strasz			PROC_SUNLOCK(p);
729220137Strasz#ifdef notyet
730220137Strasz			KASSERT(runtime >= p->p_prev_runtime,
731220137Strasz			    ("runtime < p_prev_runtime"));
732220137Strasz#else
733220137Strasz			if (runtime < p->p_prev_runtime)
734220137Strasz				runtime = p->p_prev_runtime;
735220137Strasz#endif
736220137Strasz			p->p_prev_runtime = runtime;
737220137Strasz			mtx_lock(&racct_lock);
738220137Strasz			racct_set_locked(p, RACCT_CPU, runtime);
739220137Strasz			racct_set_locked(p, RACCT_WALLCLOCK,
740220137Strasz			    wallclock.tv_sec * 1000000 + wallclock.tv_usec);
741220137Strasz			mtx_unlock(&racct_lock);
742220137Strasz			PROC_UNLOCK(p);
743220137Strasz		}
744220137Strasz		sx_sunlock(&allproc_lock);
745220137Strasz		pause("-", hz);
746220137Strasz	}
747220137Strasz}
748220137Strasz
749220137Straszstatic struct kproc_desc racctd_kp = {
750220137Strasz	"racctd",
751220137Strasz	racctd,
752220137Strasz	NULL
753220137Strasz};
754220137StraszSYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
755220137Strasz
756220137Straszstatic void
757220137Straszracct_init(void)
758220137Strasz{
759220137Strasz
760220137Strasz	racct_zone = uma_zcreate("racct", sizeof(struct racct),
761220137Strasz	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
762220137Strasz	/*
763220137Strasz	 * XXX: Move this somewhere.
764220137Strasz	 */
765221362Strasz	prison0.pr_prison_racct = prison_racct_find("0");
766220137Strasz}
767220137StraszSYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
768220137Strasz
769220137Strasz#else /* !RACCT */
770220137Strasz
771220137Straszint
772220137Straszracct_add(struct proc *p, int resource, uint64_t amount)
773220137Strasz{
774220137Strasz
775220137Strasz	return (0);
776220137Strasz}
777220137Strasz
778220137Straszvoid
779220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount)
780220137Strasz{
781220137Strasz}
782220137Strasz
783220137Straszvoid
784220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount)
785220137Strasz{
786220137Strasz
787220137Strasz	return;
788220137Strasz}
789220137Strasz
790220137Straszint
791220137Straszracct_set(struct proc *p, int resource, uint64_t amount)
792220137Strasz{
793220137Strasz
794220137Strasz	return (0);
795220137Strasz}
796220137Strasz
797220137Straszvoid
798220372Straszracct_set_force(struct proc *p, int resource, uint64_t amount)
799220372Strasz{
800220372Strasz}
801220372Strasz
802220372Straszvoid
803220137Straszracct_sub(struct proc *p, int resource, uint64_t amount)
804220137Strasz{
805220137Strasz}
806220137Strasz
807220137Straszvoid
808220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
809220137Strasz{
810220137Strasz}
811220137Strasz
812220137Straszuint64_t
813220137Straszracct_get_limit(struct proc *p, int resource)
814220137Strasz{
815220137Strasz
816220137Strasz	return (UINT64_MAX);
817220137Strasz}
818220137Strasz
819220372Straszuint64_t
820220372Straszracct_get_available(struct proc *p, int resource)
821220372Strasz{
822220372Strasz
823220372Strasz	return (UINT64_MAX);
824220372Strasz}
825220372Strasz
826220137Straszvoid
827220137Straszracct_create(struct racct **racctp)
828220137Strasz{
829220137Strasz}
830220137Strasz
831220137Straszvoid
832220137Straszracct_destroy(struct racct **racctp)
833220137Strasz{
834220137Strasz}
835220137Strasz
836220137Straszint
837220137Straszracct_proc_fork(struct proc *parent, struct proc *child)
838220137Strasz{
839220137Strasz
840220137Strasz	return (0);
841220137Strasz}
842220137Strasz
843220137Straszvoid
844225940Straszracct_proc_fork_done(struct proc *child)
845225940Strasz{
846225940Strasz}
847225940Strasz
848225940Straszvoid
849220137Straszracct_proc_exit(struct proc *p)
850220137Strasz{
851220137Strasz}
852220137Strasz
853220137Strasz#endif /* !RACCT */
854