kern_racct.c revision 232598
1220137Strasz/*-
2220137Strasz * Copyright (c) 2010 The FreeBSD Foundation
3220137Strasz * All rights reserved.
4220137Strasz *
5220137Strasz * This software was developed by Edward Tomasz Napierala under sponsorship
6220137Strasz * from the FreeBSD Foundation.
7220137Strasz *
8220137Strasz * Redistribution and use in source and binary forms, with or without
9220137Strasz * modification, are permitted provided that the following conditions
10220137Strasz * are met:
11220137Strasz * 1. Redistributions of source code must retain the above copyright
12220137Strasz *    notice, this list of conditions and the following disclaimer.
13220137Strasz * 2. Redistributions in binary form must reproduce the above copyright
14220137Strasz *    notice, this list of conditions and the following disclaimer in the
15220137Strasz *    documentation and/or other materials provided with the distribution.
16220137Strasz *
17220137Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18220137Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19220137Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20220137Strasz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21220137Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22220137Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23220137Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24220137Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25220137Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26220137Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27220137Strasz * SUCH DAMAGE.
28220137Strasz *
29220137Strasz * $FreeBSD: head/sys/kern/kern_racct.c 232598 2012-03-06 11:05:50Z trasz $
30220137Strasz */
31220137Strasz
32220137Strasz#include <sys/cdefs.h>
33220137Strasz__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 232598 2012-03-06 11:05:50Z trasz $");
34220137Strasz
35220137Strasz#include "opt_kdtrace.h"
36220137Strasz
37220137Strasz#include <sys/param.h>
38228430Savg#include <sys/systm.h>
39220137Strasz#include <sys/eventhandler.h>
40220137Strasz#include <sys/jail.h>
41220137Strasz#include <sys/kernel.h>
42220137Strasz#include <sys/kthread.h>
43220137Strasz#include <sys/lock.h>
44220137Strasz#include <sys/loginclass.h>
45220137Strasz#include <sys/malloc.h>
46220137Strasz#include <sys/mutex.h>
47220137Strasz#include <sys/proc.h>
48220137Strasz#include <sys/racct.h>
49220137Strasz#include <sys/resourcevar.h>
50220137Strasz#include <sys/sbuf.h>
51220137Strasz#include <sys/sched.h>
52220137Strasz#include <sys/sdt.h>
53220137Strasz#include <sys/sx.h>
54220137Strasz#include <sys/sysent.h>
55220137Strasz#include <sys/sysproto.h>
56220137Strasz#include <sys/umtx.h>
57220137Strasz
58220137Strasz#ifdef RCTL
59220137Strasz#include <sys/rctl.h>
60220137Strasz#endif
61220137Strasz
62220137Strasz#ifdef RACCT
63220137Strasz
64220137StraszFEATURE(racct, "Resource Accounting");
65220137Strasz
66220137Straszstatic struct mtx racct_lock;
67220137StraszMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
68220137Strasz
69220137Straszstatic uma_zone_t racct_zone;
70220137Strasz
71220137Straszstatic void racct_sub_racct(struct racct *dest, const struct racct *src);
72220137Straszstatic void racct_sub_cred_locked(struct ucred *cred, int resource,
73220137Strasz		uint64_t amount);
74220137Straszstatic void racct_add_cred_locked(struct ucred *cred, int resource,
75220137Strasz		uint64_t amount);
76220137Strasz
77220137StraszSDT_PROVIDER_DEFINE(racct);
78220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
79220137Strasz    "uint64_t");
80220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
81220137Strasz    "struct proc *", "int", "uint64_t");
82220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
83220137Strasz    "int", "uint64_t");
84220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
85220137Strasz    "int", "uint64_t");
86220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
87220137Strasz    "uint64_t");
88220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
89220137Strasz    "struct proc *", "int", "uint64_t");
90220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
91220137Strasz    "uint64_t");
92220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
93220137Strasz    "int", "uint64_t");
94220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
95220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
96220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
97220137Strasz    "struct racct *");
98220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
99220137Strasz    "struct racct *", "struct racct *");
100220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
101220137Strasz    "struct racct *");
102220137Strasz
103220137Straszint racct_types[] = {
104220137Strasz	[RACCT_CPU] =
105224036Strasz		RACCT_IN_MILLIONS,
106220137Strasz	[RACCT_DATA] =
107220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
108220137Strasz	[RACCT_STACK] =
109220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
110220137Strasz	[RACCT_CORE] =
111220137Strasz		RACCT_DENIABLE,
112220137Strasz	[RACCT_RSS] =
113220137Strasz		RACCT_RECLAIMABLE,
114220137Strasz	[RACCT_MEMLOCK] =
115220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
116220137Strasz	[RACCT_NPROC] =
117220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
118220137Strasz	[RACCT_NOFILE] =
119220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
120220137Strasz	[RACCT_VMEM] =
121220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
122220137Strasz	[RACCT_NPTS] =
123220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
124220137Strasz	[RACCT_SWAP] =
125220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
126220137Strasz	[RACCT_NTHR] =
127220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
128220137Strasz	[RACCT_MSGQQUEUED] =
129220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
130220137Strasz	[RACCT_MSGQSIZE] =
131220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
132220137Strasz	[RACCT_NMSGQ] =
133220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
134220137Strasz	[RACCT_NSEM] =
135220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
136220137Strasz	[RACCT_NSEMOP] =
137220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
138220137Strasz	[RACCT_NSHM] =
139220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
140220137Strasz	[RACCT_SHMSIZE] =
141220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
142220137Strasz	[RACCT_WALLCLOCK] =
143224036Strasz		RACCT_IN_MILLIONS };
144220137Strasz
145220137Straszstatic void
146220137Straszracct_add_racct(struct racct *dest, const struct racct *src)
147220137Strasz{
148220137Strasz	int i;
149220137Strasz
150220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
151220137Strasz
152220137Strasz	/*
153220137Strasz	 * Update resource usage in dest.
154220137Strasz	 */
155220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
156220137Strasz		KASSERT(dest->r_resources[i] >= 0,
157220137Strasz		    ("racct propagation meltdown: dest < 0"));
158220137Strasz		KASSERT(src->r_resources[i] >= 0,
159220137Strasz		    ("racct propagation meltdown: src < 0"));
160220137Strasz		dest->r_resources[i] += src->r_resources[i];
161220137Strasz	}
162220137Strasz}
163220137Strasz
164220137Straszstatic void
165220137Straszracct_sub_racct(struct racct *dest, const struct racct *src)
166220137Strasz{
167220137Strasz	int i;
168220137Strasz
169220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
170220137Strasz
171220137Strasz	/*
172220137Strasz	 * Update resource usage in dest.
173220137Strasz	 */
174220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
175223844Strasz		if (!RACCT_IS_SLOPPY(i)) {
176220137Strasz			KASSERT(dest->r_resources[i] >= 0,
177220137Strasz			    ("racct propagation meltdown: dest < 0"));
178220137Strasz			KASSERT(src->r_resources[i] >= 0,
179220137Strasz			    ("racct propagation meltdown: src < 0"));
180220137Strasz			KASSERT(src->r_resources[i] <= dest->r_resources[i],
181220137Strasz			    ("racct propagation meltdown: src > dest"));
182220137Strasz		}
183223844Strasz		if (RACCT_IS_RECLAIMABLE(i)) {
184220137Strasz			dest->r_resources[i] -= src->r_resources[i];
185220137Strasz			if (dest->r_resources[i] < 0) {
186223844Strasz				KASSERT(RACCT_IS_SLOPPY(i),
187220137Strasz				    ("racct_sub_racct: usage < 0"));
188220137Strasz				dest->r_resources[i] = 0;
189220137Strasz			}
190220137Strasz		}
191220137Strasz	}
192220137Strasz}
193220137Strasz
194220137Straszvoid
195220137Straszracct_create(struct racct **racctp)
196220137Strasz{
197220137Strasz
198220137Strasz	SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
199220137Strasz
200220137Strasz	KASSERT(*racctp == NULL, ("racct already allocated"));
201220137Strasz
202220137Strasz	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
203220137Strasz}
204220137Strasz
205220137Straszstatic void
206220137Straszracct_destroy_locked(struct racct **racctp)
207220137Strasz{
208220137Strasz	int i;
209220137Strasz	struct racct *racct;
210220137Strasz
211220137Strasz	SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
212220137Strasz
213220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
214220137Strasz	KASSERT(racctp != NULL, ("NULL racctp"));
215220137Strasz	KASSERT(*racctp != NULL, ("NULL racct"));
216220137Strasz
217220137Strasz	racct = *racctp;
218220137Strasz
219220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
220223844Strasz		if (RACCT_IS_SLOPPY(i))
221220137Strasz			continue;
222223844Strasz		if (!RACCT_IS_RECLAIMABLE(i))
223220137Strasz			continue;
224220137Strasz		KASSERT(racct->r_resources[i] == 0,
225220137Strasz		    ("destroying non-empty racct: "
226220137Strasz		    "%ju allocated for resource %d\n",
227220137Strasz		    racct->r_resources[i], i));
228220137Strasz	}
229220137Strasz	uma_zfree(racct_zone, racct);
230220137Strasz	*racctp = NULL;
231220137Strasz}
232220137Strasz
233220137Straszvoid
234220137Straszracct_destroy(struct racct **racct)
235220137Strasz{
236220137Strasz
237220137Strasz	mtx_lock(&racct_lock);
238220137Strasz	racct_destroy_locked(racct);
239220137Strasz	mtx_unlock(&racct_lock);
240220137Strasz}
241220137Strasz
242220137Strasz/*
243220137Strasz * Increase consumption of 'resource' by 'amount' for 'racct'
244220137Strasz * and all its parents.  Differently from other cases, 'amount' here
245220137Strasz * may be less than zero.
246220137Strasz */
247220137Straszstatic void
248220137Straszracct_alloc_resource(struct racct *racct, int resource,
249220137Strasz    uint64_t amount)
250220137Strasz{
251220137Strasz
252220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
253220137Strasz	KASSERT(racct != NULL, ("NULL racct"));
254220137Strasz
255220137Strasz	racct->r_resources[resource] += amount;
256220137Strasz	if (racct->r_resources[resource] < 0) {
257223844Strasz		KASSERT(RACCT_IS_SLOPPY(resource),
258220137Strasz		    ("racct_alloc_resource: usage < 0"));
259220137Strasz		racct->r_resources[resource] = 0;
260220137Strasz	}
261220137Strasz}
262220137Strasz
263225944Straszstatic int
264225944Straszracct_add_locked(struct proc *p, int resource, uint64_t amount)
265220137Strasz{
266220137Strasz#ifdef RCTL
267220137Strasz	int error;
268220137Strasz#endif
269220137Strasz
270220137Strasz	if (p->p_flag & P_SYSTEM)
271220137Strasz		return (0);
272220137Strasz
273220137Strasz	SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
274220137Strasz
275220137Strasz	/*
276220137Strasz	 * We need proc lock to dereference p->p_ucred.
277220137Strasz	 */
278220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
279220137Strasz
280220137Strasz#ifdef RCTL
281220137Strasz	error = rctl_enforce(p, resource, amount);
282223844Strasz	if (error && RACCT_IS_DENIABLE(resource)) {
283220137Strasz		SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
284220137Strasz		    amount, 0, 0);
285220137Strasz		return (error);
286220137Strasz	}
287220137Strasz#endif
288220137Strasz	racct_alloc_resource(p->p_racct, resource, amount);
289220137Strasz	racct_add_cred_locked(p->p_ucred, resource, amount);
290220137Strasz
291220137Strasz	return (0);
292220137Strasz}
293220137Strasz
294225944Strasz/*
295225944Strasz * Increase allocation of 'resource' by 'amount' for process 'p'.
296225944Strasz * Return 0 if it's below limits, or errno, if it's not.
297225944Strasz */
298225944Straszint
299225944Straszracct_add(struct proc *p, int resource, uint64_t amount)
300225944Strasz{
301225944Strasz	int error;
302225944Strasz
303225944Strasz	mtx_lock(&racct_lock);
304225944Strasz	error = racct_add_locked(p, resource, amount);
305225944Strasz	mtx_unlock(&racct_lock);
306225944Strasz	return (error);
307225944Strasz}
308225944Strasz
309220137Straszstatic void
310220137Straszracct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
311220137Strasz{
312220137Strasz	struct prison *pr;
313220137Strasz
314220137Strasz	SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
315220137Strasz	    0, 0);
316220137Strasz
317220137Strasz	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
318220137Strasz	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
319221362Strasz		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
320221362Strasz		    amount);
321220137Strasz	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
322220137Strasz}
323220137Strasz
324220137Strasz/*
325220137Strasz * Increase allocation of 'resource' by 'amount' for credential 'cred'.
326220137Strasz * Doesn't check for limits and never fails.
327220137Strasz *
328220137Strasz * XXX: Shouldn't this ever return an error?
329220137Strasz */
330220137Straszvoid
331220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount)
332220137Strasz{
333220137Strasz
334220137Strasz	mtx_lock(&racct_lock);
335220137Strasz	racct_add_cred_locked(cred, resource, amount);
336220137Strasz	mtx_unlock(&racct_lock);
337220137Strasz}
338220137Strasz
339220137Strasz/*
340220137Strasz * Increase allocation of 'resource' by 'amount' for process 'p'.
341220137Strasz * Doesn't check for limits and never fails.
342220137Strasz */
343220137Straszvoid
344220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount)
345220137Strasz{
346220137Strasz
347220137Strasz	if (p->p_flag & P_SYSTEM)
348220137Strasz		return;
349220137Strasz
350220137Strasz	SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
351220137Strasz
352220137Strasz	/*
353220137Strasz	 * We need proc lock to dereference p->p_ucred.
354220137Strasz	 */
355220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
356220137Strasz
357220137Strasz	mtx_lock(&racct_lock);
358220137Strasz	racct_alloc_resource(p->p_racct, resource, amount);
359220137Strasz	mtx_unlock(&racct_lock);
360220137Strasz	racct_add_cred(p->p_ucred, resource, amount);
361220137Strasz}
362220137Strasz
363220137Straszstatic int
364220137Straszracct_set_locked(struct proc *p, int resource, uint64_t amount)
365220137Strasz{
366220137Strasz	int64_t diff;
367220137Strasz#ifdef RCTL
368220137Strasz	int error;
369220137Strasz#endif
370220137Strasz
371220137Strasz	if (p->p_flag & P_SYSTEM)
372220137Strasz		return (0);
373220137Strasz
374220137Strasz	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
375220137Strasz
376220137Strasz	/*
377220137Strasz	 * We need proc lock to dereference p->p_ucred.
378220137Strasz	 */
379220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
380220137Strasz
381220137Strasz	diff = amount - p->p_racct->r_resources[resource];
382220137Strasz#ifdef notyet
383223844Strasz	KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
384220137Strasz	    ("racct_set: usage of non-reclaimable resource %d dropping",
385220137Strasz	     resource));
386220137Strasz#endif
387220137Strasz#ifdef RCTL
388220137Strasz	if (diff > 0) {
389220137Strasz		error = rctl_enforce(p, resource, diff);
390223844Strasz		if (error && RACCT_IS_DENIABLE(resource)) {
391220137Strasz			SDT_PROBE(racct, kernel, rusage, set_failure, p,
392220137Strasz			    resource, amount, 0, 0);
393220137Strasz			return (error);
394220137Strasz		}
395220137Strasz	}
396220137Strasz#endif
397220137Strasz	racct_alloc_resource(p->p_racct, resource, diff);
398220137Strasz	if (diff > 0)
399220137Strasz		racct_add_cred_locked(p->p_ucred, resource, diff);
400220137Strasz	else if (diff < 0)
401220137Strasz		racct_sub_cred_locked(p->p_ucred, resource, -diff);
402220137Strasz
403220137Strasz	return (0);
404220137Strasz}
405220137Strasz
406220137Strasz/*
407220137Strasz * Set allocation of 'resource' to 'amount' for process 'p'.
408220137Strasz * Return 0 if it's below limits, or errno, if it's not.
409220137Strasz *
410220137Strasz * Note that decreasing the allocation always returns 0,
411220137Strasz * even if it's above the limit.
412220137Strasz */
413220137Straszint
414220137Straszracct_set(struct proc *p, int resource, uint64_t amount)
415220137Strasz{
416220137Strasz	int error;
417220137Strasz
418220137Strasz	mtx_lock(&racct_lock);
419220137Strasz	error = racct_set_locked(p, resource, amount);
420220137Strasz	mtx_unlock(&racct_lock);
421220137Strasz	return (error);
422220137Strasz}
423220137Strasz
424220137Straszvoid
425220137Straszracct_set_force(struct proc *p, int resource, uint64_t amount)
426220137Strasz{
427220137Strasz	int64_t diff;
428220137Strasz
429220137Strasz	if (p->p_flag & P_SYSTEM)
430220137Strasz		return;
431220137Strasz
432220137Strasz	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
433220137Strasz
434220137Strasz	/*
435220137Strasz	 * We need proc lock to dereference p->p_ucred.
436220137Strasz	 */
437220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
438220137Strasz
439220137Strasz	mtx_lock(&racct_lock);
440220137Strasz	diff = amount - p->p_racct->r_resources[resource];
441220137Strasz	racct_alloc_resource(p->p_racct, resource, diff);
442220137Strasz	if (diff > 0)
443220137Strasz		racct_add_cred_locked(p->p_ucred, resource, diff);
444220137Strasz	else if (diff < 0)
445220137Strasz		racct_sub_cred_locked(p->p_ucred, resource, -diff);
446220137Strasz	mtx_unlock(&racct_lock);
447220137Strasz}
448220137Strasz
449220137Strasz/*
450220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated.
451220137Strasz * Allocating more than that would be denied, unless the resource
452220137Strasz * is marked undeniable.  Amount of already allocated resource does
453220137Strasz * not matter.
454220137Strasz */
455220137Straszuint64_t
456220137Straszracct_get_limit(struct proc *p, int resource)
457220137Strasz{
458220137Strasz
459220137Strasz#ifdef RCTL
460220137Strasz	return (rctl_get_limit(p, resource));
461220137Strasz#else
462220137Strasz	return (UINT64_MAX);
463220137Strasz#endif
464220137Strasz}
465220137Strasz
466220137Strasz/*
467220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated.
468220137Strasz * Allocating more than that would be denied, unless the resource
469220137Strasz * is marked undeniable.  Amount of already allocated resource does
470220137Strasz * matter.
471220137Strasz */
472220137Straszuint64_t
473220137Straszracct_get_available(struct proc *p, int resource)
474220137Strasz{
475220137Strasz
476220137Strasz#ifdef RCTL
477220137Strasz	return (rctl_get_available(p, resource));
478220137Strasz#else
479220137Strasz	return (UINT64_MAX);
480220137Strasz#endif
481220137Strasz}
482220137Strasz
483220137Strasz/*
484220137Strasz * Decrease allocation of 'resource' by 'amount' for process 'p'.
485220137Strasz */
486220137Straszvoid
487220137Straszracct_sub(struct proc *p, int resource, uint64_t amount)
488220137Strasz{
489220137Strasz
490220137Strasz	if (p->p_flag & P_SYSTEM)
491220137Strasz		return;
492220137Strasz
493220137Strasz	SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
494220137Strasz
495220137Strasz	/*
496220137Strasz	 * We need proc lock to dereference p->p_ucred.
497220137Strasz	 */
498220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
499223844Strasz	KASSERT(RACCT_IS_RECLAIMABLE(resource),
500220137Strasz	    ("racct_sub: called for non-reclaimable resource %d", resource));
501220137Strasz
502220137Strasz	mtx_lock(&racct_lock);
503220137Strasz	KASSERT(amount <= p->p_racct->r_resources[resource],
504220137Strasz	    ("racct_sub: freeing %ju of resource %d, which is more "
505220137Strasz	     "than allocated %jd for %s (pid %d)", amount, resource,
506220137Strasz	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
507220137Strasz
508220137Strasz	racct_alloc_resource(p->p_racct, resource, -amount);
509220137Strasz	racct_sub_cred_locked(p->p_ucred, resource, amount);
510220137Strasz	mtx_unlock(&racct_lock);
511220137Strasz}
512220137Strasz
513220137Straszstatic void
514220137Straszracct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
515220137Strasz{
516220137Strasz	struct prison *pr;
517220137Strasz
518220137Strasz	SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
519220137Strasz	    0, 0);
520220137Strasz
521220137Strasz#ifdef notyet
522223844Strasz	KASSERT(RACCT_IS_RECLAIMABLE(resource),
523220137Strasz	    ("racct_sub_cred: called for non-reclaimable resource %d",
524220137Strasz	     resource));
525220137Strasz#endif
526220137Strasz
527220137Strasz	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
528220137Strasz	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
529221362Strasz		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
530221362Strasz		    -amount);
531220137Strasz	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
532220137Strasz}
533220137Strasz
534220137Strasz/*
535220137Strasz * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
536220137Strasz */
537220137Straszvoid
538220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
539220137Strasz{
540220137Strasz
541220137Strasz	mtx_lock(&racct_lock);
542220137Strasz	racct_sub_cred_locked(cred, resource, amount);
543220137Strasz	mtx_unlock(&racct_lock);
544220137Strasz}
545220137Strasz
546220137Strasz/*
547220137Strasz * Inherit resource usage information from the parent process.
548220137Strasz */
549220137Straszint
550220137Straszracct_proc_fork(struct proc *parent, struct proc *child)
551220137Strasz{
552220137Strasz	int i, error = 0;
553220137Strasz
554220137Strasz	/*
555220137Strasz	 * Create racct for the child process.
556220137Strasz	 */
557220137Strasz	racct_create(&child->p_racct);
558220137Strasz
559220137Strasz	/*
560220137Strasz	 * No resource accounting for kernel processes.
561220137Strasz	 */
562220137Strasz	if (child->p_flag & P_SYSTEM)
563220137Strasz		return (0);
564220137Strasz
565220137Strasz	PROC_LOCK(parent);
566220137Strasz	PROC_LOCK(child);
567220137Strasz	mtx_lock(&racct_lock);
568220137Strasz
569225981Strasz#ifdef RCTL
570225981Strasz	error = rctl_proc_fork(parent, child);
571225981Strasz	if (error != 0)
572225981Strasz		goto out;
573225981Strasz#endif
574225981Strasz
575220137Strasz	/*
576220137Strasz	 * Inherit resource usage.
577220137Strasz	 */
578220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
579220137Strasz		if (parent->p_racct->r_resources[i] == 0 ||
580223844Strasz		    !RACCT_IS_INHERITABLE(i))
581220137Strasz			continue;
582220137Strasz
583220137Strasz		error = racct_set_locked(child, i,
584220137Strasz		    parent->p_racct->r_resources[i]);
585225938Strasz		if (error != 0)
586220137Strasz			goto out;
587220137Strasz	}
588220137Strasz
589225944Strasz	error = racct_add_locked(child, RACCT_NPROC, 1);
590225944Strasz	error += racct_add_locked(child, RACCT_NTHR, 1);
591225944Strasz
592220137Straszout:
593220137Strasz	mtx_unlock(&racct_lock);
594220137Strasz	PROC_UNLOCK(child);
595220137Strasz	PROC_UNLOCK(parent);
596220137Strasz
597220137Strasz	return (error);
598220137Strasz}
599220137Strasz
600225940Strasz/*
601225940Strasz * Called at the end of fork1(), to handle rules that require the process
602225940Strasz * to be fully initialized.
603225940Strasz */
604220137Straszvoid
605225940Straszracct_proc_fork_done(struct proc *child)
606225940Strasz{
607225940Strasz
608225940Strasz#ifdef RCTL
609225940Strasz	PROC_LOCK(child);
610225940Strasz	mtx_lock(&racct_lock);
611225940Strasz	rctl_enforce(child, RACCT_NPROC, 0);
612225940Strasz	rctl_enforce(child, RACCT_NTHR, 0);
613225940Strasz	mtx_unlock(&racct_lock);
614225940Strasz	PROC_UNLOCK(child);
615225940Strasz#endif
616225940Strasz}
617225940Strasz
618225940Straszvoid
619220137Straszracct_proc_exit(struct proc *p)
620220137Strasz{
621225364Strasz	int i;
622220137Strasz	uint64_t runtime;
623220137Strasz
624220137Strasz	PROC_LOCK(p);
625220137Strasz	/*
626220137Strasz	 * We don't need to calculate rux, proc_reap() has already done this.
627220137Strasz	 */
628220137Strasz	runtime = cputick2usec(p->p_rux.rux_runtime);
629220137Strasz#ifdef notyet
630220137Strasz	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
631220137Strasz#else
632220137Strasz	if (runtime < p->p_prev_runtime)
633220137Strasz		runtime = p->p_prev_runtime;
634220137Strasz#endif
635225364Strasz	mtx_lock(&racct_lock);
636225364Strasz	racct_set_locked(p, RACCT_CPU, runtime);
637220137Strasz
638225364Strasz	for (i = 0; i <= RACCT_MAX; i++) {
639225364Strasz		if (p->p_racct->r_resources[i] == 0)
640225364Strasz			continue;
641225364Strasz	    	if (!RACCT_IS_RECLAIMABLE(i))
642225364Strasz			continue;
643225364Strasz		racct_set_locked(p, i, 0);
644225364Strasz	}
645225364Strasz
646225364Strasz	mtx_unlock(&racct_lock);
647220137Strasz	PROC_UNLOCK(p);
648220137Strasz
649220137Strasz#ifdef RCTL
650220137Strasz	rctl_racct_release(p->p_racct);
651220137Strasz#endif
652220137Strasz	racct_destroy(&p->p_racct);
653220137Strasz}
654220137Strasz
655220137Strasz/*
656220137Strasz * Called after credentials change, to move resource utilisation
657220137Strasz * between raccts.
658220137Strasz */
659220137Straszvoid
660220137Straszracct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
661220137Strasz    struct ucred *newcred)
662220137Strasz{
663220137Strasz	struct uidinfo *olduip, *newuip;
664220137Strasz	struct loginclass *oldlc, *newlc;
665220137Strasz	struct prison *oldpr, *newpr, *pr;
666220137Strasz
667220137Strasz	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
668220137Strasz
669220137Strasz	newuip = newcred->cr_ruidinfo;
670220137Strasz	olduip = oldcred->cr_ruidinfo;
671220137Strasz	newlc = newcred->cr_loginclass;
672220137Strasz	oldlc = oldcred->cr_loginclass;
673220137Strasz	newpr = newcred->cr_prison;
674220137Strasz	oldpr = oldcred->cr_prison;
675220137Strasz
676220137Strasz	mtx_lock(&racct_lock);
677220137Strasz	if (newuip != olduip) {
678220137Strasz		racct_sub_racct(olduip->ui_racct, p->p_racct);
679220137Strasz		racct_add_racct(newuip->ui_racct, p->p_racct);
680220137Strasz	}
681220137Strasz	if (newlc != oldlc) {
682220137Strasz		racct_sub_racct(oldlc->lc_racct, p->p_racct);
683220137Strasz		racct_add_racct(newlc->lc_racct, p->p_racct);
684220137Strasz	}
685220137Strasz	if (newpr != oldpr) {
686220137Strasz		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
687221362Strasz			racct_sub_racct(pr->pr_prison_racct->prr_racct,
688221362Strasz			    p->p_racct);
689220137Strasz		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
690221362Strasz			racct_add_racct(pr->pr_prison_racct->prr_racct,
691221362Strasz			    p->p_racct);
692220137Strasz	}
693220137Strasz	mtx_unlock(&racct_lock);
694220137Strasz
695220137Strasz#ifdef RCTL
696220137Strasz	rctl_proc_ucred_changed(p, newcred);
697220137Strasz#endif
698220137Strasz}
699220137Strasz
700232598Straszvoid
701232598Straszracct_move(struct racct *dest, struct racct *src)
702232598Strasz{
703232598Strasz
704232598Strasz	mtx_lock(&racct_lock);
705232598Strasz
706232598Strasz	racct_add_racct(dest, src);
707232598Strasz	racct_sub_racct(src, src);
708232598Strasz
709232598Strasz	mtx_unlock(&racct_lock);
710232598Strasz}
711232598Strasz
712220137Straszstatic void
713220137Straszracctd(void)
714220137Strasz{
715220137Strasz	struct thread *td;
716220137Strasz	struct proc *p;
717220137Strasz	struct timeval wallclock;
718220137Strasz	uint64_t runtime;
719220137Strasz
720220137Strasz	for (;;) {
721220137Strasz		sx_slock(&allproc_lock);
722220137Strasz
723220137Strasz		FOREACH_PROC_IN_SYSTEM(p) {
724220137Strasz			if (p->p_state != PRS_NORMAL)
725220137Strasz				continue;
726220137Strasz			if (p->p_flag & P_SYSTEM)
727220137Strasz				continue;
728220137Strasz
729220137Strasz			microuptime(&wallclock);
730220137Strasz			timevalsub(&wallclock, &p->p_stats->p_start);
731220137Strasz			PROC_LOCK(p);
732220137Strasz			PROC_SLOCK(p);
733220137Strasz			FOREACH_THREAD_IN_PROC(p, td) {
734220137Strasz				ruxagg(p, td);
735220137Strasz				thread_lock(td);
736220137Strasz				thread_unlock(td);
737220137Strasz			}
738220137Strasz			runtime = cputick2usec(p->p_rux.rux_runtime);
739220137Strasz			PROC_SUNLOCK(p);
740220137Strasz#ifdef notyet
741220137Strasz			KASSERT(runtime >= p->p_prev_runtime,
742220137Strasz			    ("runtime < p_prev_runtime"));
743220137Strasz#else
744220137Strasz			if (runtime < p->p_prev_runtime)
745220137Strasz				runtime = p->p_prev_runtime;
746220137Strasz#endif
747220137Strasz			p->p_prev_runtime = runtime;
748220137Strasz			mtx_lock(&racct_lock);
749220137Strasz			racct_set_locked(p, RACCT_CPU, runtime);
750220137Strasz			racct_set_locked(p, RACCT_WALLCLOCK,
751220137Strasz			    wallclock.tv_sec * 1000000 + wallclock.tv_usec);
752220137Strasz			mtx_unlock(&racct_lock);
753220137Strasz			PROC_UNLOCK(p);
754220137Strasz		}
755220137Strasz		sx_sunlock(&allproc_lock);
756220137Strasz		pause("-", hz);
757220137Strasz	}
758220137Strasz}
759220137Strasz
760220137Straszstatic struct kproc_desc racctd_kp = {
761220137Strasz	"racctd",
762220137Strasz	racctd,
763220137Strasz	NULL
764220137Strasz};
765220137StraszSYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
766220137Strasz
767220137Straszstatic void
768220137Straszracct_init(void)
769220137Strasz{
770220137Strasz
771220137Strasz	racct_zone = uma_zcreate("racct", sizeof(struct racct),
772220137Strasz	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
773220137Strasz	/*
774220137Strasz	 * XXX: Move this somewhere.
775220137Strasz	 */
776221362Strasz	prison0.pr_prison_racct = prison_racct_find("0");
777220137Strasz}
778220137StraszSYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
779220137Strasz
780220137Strasz#else /* !RACCT */
781220137Strasz
782220137Straszint
783220137Straszracct_add(struct proc *p, int resource, uint64_t amount)
784220137Strasz{
785220137Strasz
786220137Strasz	return (0);
787220137Strasz}
788220137Strasz
789220137Straszvoid
790220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount)
791220137Strasz{
792220137Strasz}
793220137Strasz
794220137Straszvoid
795220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount)
796220137Strasz{
797220137Strasz
798220137Strasz	return;
799220137Strasz}
800220137Strasz
801220137Straszint
802220137Straszracct_set(struct proc *p, int resource, uint64_t amount)
803220137Strasz{
804220137Strasz
805220137Strasz	return (0);
806220137Strasz}
807220137Strasz
808220137Straszvoid
809220372Straszracct_set_force(struct proc *p, int resource, uint64_t amount)
810220372Strasz{
811220372Strasz}
812220372Strasz
813220372Straszvoid
814220137Straszracct_sub(struct proc *p, int resource, uint64_t amount)
815220137Strasz{
816220137Strasz}
817220137Strasz
818220137Straszvoid
819220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
820220137Strasz{
821220137Strasz}
822220137Strasz
823220137Straszuint64_t
824220137Straszracct_get_limit(struct proc *p, int resource)
825220137Strasz{
826220137Strasz
827220137Strasz	return (UINT64_MAX);
828220137Strasz}
829220137Strasz
830220372Straszuint64_t
831220372Straszracct_get_available(struct proc *p, int resource)
832220372Strasz{
833220372Strasz
834220372Strasz	return (UINT64_MAX);
835220372Strasz}
836220372Strasz
837220137Straszvoid
838220137Straszracct_create(struct racct **racctp)
839220137Strasz{
840220137Strasz}
841220137Strasz
842220137Straszvoid
843220137Straszracct_destroy(struct racct **racctp)
844220137Strasz{
845220137Strasz}
846220137Strasz
847220137Straszint
848220137Straszracct_proc_fork(struct proc *parent, struct proc *child)
849220137Strasz{
850220137Strasz
851220137Strasz	return (0);
852220137Strasz}
853220137Strasz
854220137Straszvoid
855225940Straszracct_proc_fork_done(struct proc *child)
856225940Strasz{
857225940Strasz}
858225940Strasz
859225940Straszvoid
860220137Straszracct_proc_exit(struct proc *p)
861220137Strasz{
862220137Strasz}
863220137Strasz
864220137Strasz#endif /* !RACCT */
865