kern_racct.c revision 220372
1220137Strasz/*-
2220137Strasz * Copyright (c) 2010 The FreeBSD Foundation
3220137Strasz * All rights reserved.
4220137Strasz *
5220137Strasz * This software was developed by Edward Tomasz Napierala under sponsorship
6220137Strasz * from the FreeBSD Foundation.
7220137Strasz *
8220137Strasz * Redistribution and use in source and binary forms, with or without
9220137Strasz * modification, are permitted provided that the following conditions
10220137Strasz * are met:
11220137Strasz * 1. Redistributions of source code must retain the above copyright
12220137Strasz *    notice, this list of conditions and the following disclaimer.
13220137Strasz * 2. Redistributions in binary form must reproduce the above copyright
14220137Strasz *    notice, this list of conditions and the following disclaimer in the
15220137Strasz *    documentation and/or other materials provided with the distribution.
16220137Strasz *
17220137Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18220137Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19220137Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20220137Strasz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21220137Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22220137Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23220137Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24220137Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25220137Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26220137Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27220137Strasz * SUCH DAMAGE.
28220137Strasz *
29220137Strasz * $FreeBSD: head/sys/kern/kern_racct.c 220372 2011-04-05 19:50:34Z trasz $
30220137Strasz */
31220137Strasz
32220137Strasz#include <sys/cdefs.h>
33220137Strasz__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 220372 2011-04-05 19:50:34Z trasz $");
34220137Strasz
35220137Strasz#include "opt_kdtrace.h"
36220137Strasz
37220137Strasz#include <sys/param.h>
38220137Strasz#include <sys/eventhandler.h>
39220137Strasz#include <sys/param.h>
40220137Strasz#include <sys/jail.h>
41220137Strasz#include <sys/kernel.h>
42220137Strasz#include <sys/kthread.h>
43220137Strasz#include <sys/lock.h>
44220137Strasz#include <sys/loginclass.h>
45220137Strasz#include <sys/malloc.h>
46220137Strasz#include <sys/mutex.h>
47220137Strasz#include <sys/proc.h>
48220137Strasz#include <sys/racct.h>
49220137Strasz#include <sys/resourcevar.h>
50220137Strasz#include <sys/sbuf.h>
51220137Strasz#include <sys/sched.h>
52220137Strasz#include <sys/sdt.h>
53220137Strasz#include <sys/sx.h>
54220137Strasz#include <sys/sysent.h>
55220137Strasz#include <sys/sysproto.h>
56220137Strasz#include <sys/systm.h>
57220137Strasz#include <sys/umtx.h>
58220137Strasz
59220137Strasz#ifdef RCTL
60220137Strasz#include <sys/rctl.h>
61220137Strasz#endif
62220137Strasz
63220137Strasz#ifdef RACCT
64220137Strasz
65220137StraszFEATURE(racct, "Resource Accounting");
66220137Strasz
67220137Straszstatic struct mtx racct_lock;
68220137StraszMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
69220137Strasz
70220137Straszstatic uma_zone_t racct_zone;
71220137Strasz
72220137Straszstatic void racct_sub_racct(struct racct *dest, const struct racct *src);
73220137Straszstatic void racct_sub_cred_locked(struct ucred *cred, int resource,
74220137Strasz		uint64_t amount);
75220137Straszstatic void racct_add_cred_locked(struct ucred *cred, int resource,
76220137Strasz		uint64_t amount);
77220137Strasz
78220137StraszSDT_PROVIDER_DEFINE(racct);
79220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
80220137Strasz    "uint64_t");
81220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
82220137Strasz    "struct proc *", "int", "uint64_t");
83220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
84220137Strasz    "int", "uint64_t");
85220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
86220137Strasz    "int", "uint64_t");
87220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
88220137Strasz    "uint64_t");
89220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
90220137Strasz    "struct proc *", "int", "uint64_t");
91220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
92220137Strasz    "uint64_t");
93220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
94220137Strasz    "int", "uint64_t");
95220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
96220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
97220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
98220137Strasz    "struct racct *");
99220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
100220137Strasz    "struct racct *", "struct racct *");
101220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
102220137Strasz    "struct racct *");
103220137Strasz
104220137Straszint racct_types[] = {
105220137Strasz	[RACCT_CPU] =
106220137Strasz		RACCT_IN_THOUSANDS,
107220137Strasz	[RACCT_FSIZE] =
108220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
109220137Strasz	[RACCT_DATA] =
110220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
111220137Strasz	[RACCT_STACK] =
112220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
113220137Strasz	[RACCT_CORE] =
114220137Strasz		RACCT_DENIABLE,
115220137Strasz	[RACCT_RSS] =
116220137Strasz		RACCT_RECLAIMABLE,
117220137Strasz	[RACCT_MEMLOCK] =
118220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
119220137Strasz	[RACCT_NPROC] =
120220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
121220137Strasz	[RACCT_NOFILE] =
122220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
123220137Strasz	[RACCT_SBSIZE] =
124220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
125220137Strasz	[RACCT_VMEM] =
126220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
127220137Strasz	[RACCT_NPTS] =
128220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
129220137Strasz	[RACCT_SWAP] =
130220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
131220137Strasz	[RACCT_NTHR] =
132220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE,
133220137Strasz	[RACCT_MSGQQUEUED] =
134220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
135220137Strasz	[RACCT_MSGQSIZE] =
136220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
137220137Strasz	[RACCT_NMSGQ] =
138220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
139220137Strasz	[RACCT_NSEM] =
140220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
141220137Strasz	[RACCT_NSEMOP] =
142220137Strasz		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
143220137Strasz	[RACCT_NSHM] =
144220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
145220137Strasz	[RACCT_SHMSIZE] =
146220137Strasz		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
147220137Strasz	[RACCT_WALLCLOCK] =
148220137Strasz		RACCT_IN_THOUSANDS };
149220137Strasz
150220137Straszstatic void
151220137Straszracct_add_racct(struct racct *dest, const struct racct *src)
152220137Strasz{
153220137Strasz	int i;
154220137Strasz
155220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
156220137Strasz
157220137Strasz	/*
158220137Strasz	 * Update resource usage in dest.
159220137Strasz	 */
160220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
161220137Strasz		KASSERT(dest->r_resources[i] >= 0,
162220137Strasz		    ("racct propagation meltdown: dest < 0"));
163220137Strasz		KASSERT(src->r_resources[i] >= 0,
164220137Strasz		    ("racct propagation meltdown: src < 0"));
165220137Strasz		dest->r_resources[i] += src->r_resources[i];
166220137Strasz	}
167220137Strasz}
168220137Strasz
169220137Straszstatic void
170220137Straszracct_sub_racct(struct racct *dest, const struct racct *src)
171220137Strasz{
172220137Strasz	int i;
173220137Strasz
174220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
175220137Strasz
176220137Strasz	/*
177220137Strasz	 * Update resource usage in dest.
178220137Strasz	 */
179220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
180220137Strasz		if (!racct_is_sloppy(i)) {
181220137Strasz			KASSERT(dest->r_resources[i] >= 0,
182220137Strasz			    ("racct propagation meltdown: dest < 0"));
183220137Strasz			KASSERT(src->r_resources[i] >= 0,
184220137Strasz			    ("racct propagation meltdown: src < 0"));
185220137Strasz			KASSERT(src->r_resources[i] <= dest->r_resources[i],
186220137Strasz			    ("racct propagation meltdown: src > dest"));
187220137Strasz		}
188220137Strasz		if (racct_is_reclaimable(i)) {
189220137Strasz			dest->r_resources[i] -= src->r_resources[i];
190220137Strasz			if (dest->r_resources[i] < 0) {
191220137Strasz				KASSERT(racct_is_sloppy(i),
192220137Strasz				    ("racct_sub_racct: usage < 0"));
193220137Strasz				dest->r_resources[i] = 0;
194220137Strasz			}
195220137Strasz		}
196220137Strasz	}
197220137Strasz}
198220137Strasz
199220137Straszvoid
200220137Straszracct_create(struct racct **racctp)
201220137Strasz{
202220137Strasz
203220137Strasz	SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
204220137Strasz
205220137Strasz	KASSERT(*racctp == NULL, ("racct already allocated"));
206220137Strasz
207220137Strasz	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
208220137Strasz}
209220137Strasz
210220137Straszstatic void
211220137Straszracct_destroy_locked(struct racct **racctp)
212220137Strasz{
213220137Strasz	int i;
214220137Strasz	struct racct *racct;
215220137Strasz
216220137Strasz	SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
217220137Strasz
218220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
219220137Strasz	KASSERT(racctp != NULL, ("NULL racctp"));
220220137Strasz	KASSERT(*racctp != NULL, ("NULL racct"));
221220137Strasz
222220137Strasz	racct = *racctp;
223220137Strasz
224220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
225220137Strasz		if (racct_is_sloppy(i))
226220137Strasz			continue;
227220137Strasz		if (!racct_is_reclaimable(i))
228220137Strasz			continue;
229220137Strasz		KASSERT(racct->r_resources[i] == 0,
230220137Strasz		    ("destroying non-empty racct: "
231220137Strasz		    "%ju allocated for resource %d\n",
232220137Strasz		    racct->r_resources[i], i));
233220137Strasz	}
234220137Strasz	uma_zfree(racct_zone, racct);
235220137Strasz	*racctp = NULL;
236220137Strasz}
237220137Strasz
238220137Straszvoid
239220137Straszracct_destroy(struct racct **racct)
240220137Strasz{
241220137Strasz
242220137Strasz	mtx_lock(&racct_lock);
243220137Strasz	racct_destroy_locked(racct);
244220137Strasz	mtx_unlock(&racct_lock);
245220137Strasz}
246220137Strasz
247220137Strasz/*
248220137Strasz * Increase consumption of 'resource' by 'amount' for 'racct'
249220137Strasz * and all its parents.  Differently from other cases, 'amount' here
250220137Strasz * may be less than zero.
251220137Strasz */
252220137Straszstatic void
253220137Straszracct_alloc_resource(struct racct *racct, int resource,
254220137Strasz    uint64_t amount)
255220137Strasz{
256220137Strasz
257220137Strasz	mtx_assert(&racct_lock, MA_OWNED);
258220137Strasz	KASSERT(racct != NULL, ("NULL racct"));
259220137Strasz
260220137Strasz	racct->r_resources[resource] += amount;
261220137Strasz	if (racct->r_resources[resource] < 0) {
262220137Strasz		KASSERT(racct_is_sloppy(resource),
263220137Strasz		    ("racct_alloc_resource: usage < 0"));
264220137Strasz		racct->r_resources[resource] = 0;
265220137Strasz	}
266220137Strasz}
267220137Strasz
268220137Strasz/*
269220137Strasz * Increase allocation of 'resource' by 'amount' for process 'p'.
270220137Strasz * Return 0 if it's below limits, or errno, if it's not.
271220137Strasz */
272220137Straszint
273220137Straszracct_add(struct proc *p, int resource, uint64_t amount)
274220137Strasz{
275220137Strasz#ifdef RCTL
276220137Strasz	int error;
277220137Strasz#endif
278220137Strasz
279220137Strasz	if (p->p_flag & P_SYSTEM)
280220137Strasz		return (0);
281220137Strasz
282220137Strasz	SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
283220137Strasz
284220137Strasz	/*
285220137Strasz	 * We need proc lock to dereference p->p_ucred.
286220137Strasz	 */
287220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
288220137Strasz
289220137Strasz	mtx_lock(&racct_lock);
290220137Strasz#ifdef RCTL
291220137Strasz	error = rctl_enforce(p, resource, amount);
292220137Strasz	if (error && racct_is_deniable(resource)) {
293220137Strasz		SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
294220137Strasz		    amount, 0, 0);
295220137Strasz		mtx_unlock(&racct_lock);
296220137Strasz		return (error);
297220137Strasz	}
298220137Strasz#endif
299220137Strasz	racct_alloc_resource(p->p_racct, resource, amount);
300220137Strasz	racct_add_cred_locked(p->p_ucred, resource, amount);
301220137Strasz	mtx_unlock(&racct_lock);
302220137Strasz
303220137Strasz	return (0);
304220137Strasz}
305220137Strasz
306220137Straszstatic void
307220137Straszracct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
308220137Strasz{
309220137Strasz	struct prison *pr;
310220137Strasz
311220137Strasz	SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
312220137Strasz	    0, 0);
313220137Strasz
314220137Strasz	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
315220137Strasz	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
316220137Strasz		racct_alloc_resource(pr->pr_racct, resource, amount);
317220137Strasz	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
318220137Strasz}
319220137Strasz
320220137Strasz/*
321220137Strasz * Increase allocation of 'resource' by 'amount' for credential 'cred'.
322220137Strasz * Doesn't check for limits and never fails.
323220137Strasz *
324220137Strasz * XXX: Shouldn't this ever return an error?
325220137Strasz */
326220137Straszvoid
327220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount)
328220137Strasz{
329220137Strasz
330220137Strasz	mtx_lock(&racct_lock);
331220137Strasz	racct_add_cred_locked(cred, resource, amount);
332220137Strasz	mtx_unlock(&racct_lock);
333220137Strasz}
334220137Strasz
335220137Strasz/*
336220137Strasz * Increase allocation of 'resource' by 'amount' for process 'p'.
337220137Strasz * Doesn't check for limits and never fails.
338220137Strasz */
339220137Straszvoid
340220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount)
341220137Strasz{
342220137Strasz
343220137Strasz	if (p->p_flag & P_SYSTEM)
344220137Strasz		return;
345220137Strasz
346220137Strasz	SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
347220137Strasz
348220137Strasz	/*
349220137Strasz	 * We need proc lock to dereference p->p_ucred.
350220137Strasz	 */
351220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
352220137Strasz
353220137Strasz	mtx_lock(&racct_lock);
354220137Strasz	racct_alloc_resource(p->p_racct, resource, amount);
355220137Strasz	mtx_unlock(&racct_lock);
356220137Strasz	racct_add_cred(p->p_ucred, resource, amount);
357220137Strasz}
358220137Strasz
359220137Straszstatic int
360220137Straszracct_set_locked(struct proc *p, int resource, uint64_t amount)
361220137Strasz{
362220137Strasz	int64_t diff;
363220137Strasz#ifdef RCTL
364220137Strasz	int error;
365220137Strasz#endif
366220137Strasz
367220137Strasz	if (p->p_flag & P_SYSTEM)
368220137Strasz		return (0);
369220137Strasz
370220137Strasz	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
371220137Strasz
372220137Strasz	/*
373220137Strasz	 * We need proc lock to dereference p->p_ucred.
374220137Strasz	 */
375220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
376220137Strasz
377220137Strasz	diff = amount - p->p_racct->r_resources[resource];
378220137Strasz#ifdef notyet
379220137Strasz	KASSERT(diff >= 0 || racct_is_reclaimable(resource),
380220137Strasz	    ("racct_set: usage of non-reclaimable resource %d dropping",
381220137Strasz	     resource));
382220137Strasz#endif
383220137Strasz#ifdef RCTL
384220137Strasz	if (diff > 0) {
385220137Strasz		error = rctl_enforce(p, resource, diff);
386220137Strasz		if (error && racct_is_deniable(resource)) {
387220137Strasz			SDT_PROBE(racct, kernel, rusage, set_failure, p,
388220137Strasz			    resource, amount, 0, 0);
389220137Strasz			return (error);
390220137Strasz		}
391220137Strasz	}
392220137Strasz#endif
393220137Strasz	racct_alloc_resource(p->p_racct, resource, diff);
394220137Strasz	if (diff > 0)
395220137Strasz		racct_add_cred_locked(p->p_ucred, resource, diff);
396220137Strasz	else if (diff < 0)
397220137Strasz		racct_sub_cred_locked(p->p_ucred, resource, -diff);
398220137Strasz
399220137Strasz	return (0);
400220137Strasz}
401220137Strasz
402220137Strasz/*
403220137Strasz * Set allocation of 'resource' to 'amount' for process 'p'.
404220137Strasz * Return 0 if it's below limits, or errno, if it's not.
405220137Strasz *
406220137Strasz * Note that decreasing the allocation always returns 0,
407220137Strasz * even if it's above the limit.
408220137Strasz */
409220137Straszint
410220137Straszracct_set(struct proc *p, int resource, uint64_t amount)
411220137Strasz{
412220137Strasz	int error;
413220137Strasz
414220137Strasz	mtx_lock(&racct_lock);
415220137Strasz	error = racct_set_locked(p, resource, amount);
416220137Strasz	mtx_unlock(&racct_lock);
417220137Strasz	return (error);
418220137Strasz}
419220137Strasz
420220137Straszvoid
421220137Straszracct_set_force(struct proc *p, int resource, uint64_t amount)
422220137Strasz{
423220137Strasz	int64_t diff;
424220137Strasz
425220137Strasz	if (p->p_flag & P_SYSTEM)
426220137Strasz		return;
427220137Strasz
428220137Strasz	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
429220137Strasz
430220137Strasz	/*
431220137Strasz	 * We need proc lock to dereference p->p_ucred.
432220137Strasz	 */
433220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
434220137Strasz
435220137Strasz	mtx_lock(&racct_lock);
436220137Strasz	diff = amount - p->p_racct->r_resources[resource];
437220137Strasz	racct_alloc_resource(p->p_racct, resource, diff);
438220137Strasz	if (diff > 0)
439220137Strasz		racct_add_cred_locked(p->p_ucred, resource, diff);
440220137Strasz	else if (diff < 0)
441220137Strasz		racct_sub_cred_locked(p->p_ucred, resource, -diff);
442220137Strasz	mtx_unlock(&racct_lock);
443220137Strasz}
444220137Strasz
445220137Strasz/*
446220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated.
447220137Strasz * Allocating more than that would be denied, unless the resource
448220137Strasz * is marked undeniable.  Amount of already allocated resource does
449220137Strasz * not matter.
450220137Strasz */
451220137Straszuint64_t
452220137Straszracct_get_limit(struct proc *p, int resource)
453220137Strasz{
454220137Strasz
455220137Strasz#ifdef RCTL
456220137Strasz	return (rctl_get_limit(p, resource));
457220137Strasz#else
458220137Strasz	return (UINT64_MAX);
459220137Strasz#endif
460220137Strasz}
461220137Strasz
462220137Strasz/*
463220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated.
464220137Strasz * Allocating more than that would be denied, unless the resource
465220137Strasz * is marked undeniable.  Amount of already allocated resource does
466220137Strasz * matter.
467220137Strasz */
468220137Straszuint64_t
469220137Straszracct_get_available(struct proc *p, int resource)
470220137Strasz{
471220137Strasz
472220137Strasz#ifdef RCTL
473220137Strasz	return (rctl_get_available(p, resource));
474220137Strasz#else
475220137Strasz	return (UINT64_MAX);
476220137Strasz#endif
477220137Strasz}
478220137Strasz
479220137Strasz/*
480220137Strasz * Decrease allocation of 'resource' by 'amount' for process 'p'.
481220137Strasz */
482220137Straszvoid
483220137Straszracct_sub(struct proc *p, int resource, uint64_t amount)
484220137Strasz{
485220137Strasz
486220137Strasz	if (p->p_flag & P_SYSTEM)
487220137Strasz		return;
488220137Strasz
489220137Strasz	SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
490220137Strasz
491220137Strasz	/*
492220137Strasz	 * We need proc lock to dereference p->p_ucred.
493220137Strasz	 */
494220137Strasz	PROC_LOCK_ASSERT(p, MA_OWNED);
495220137Strasz	KASSERT(racct_is_reclaimable(resource),
496220137Strasz	    ("racct_sub: called for non-reclaimable resource %d", resource));
497220137Strasz
498220137Strasz	mtx_lock(&racct_lock);
499220137Strasz	KASSERT(amount <= p->p_racct->r_resources[resource],
500220137Strasz	    ("racct_sub: freeing %ju of resource %d, which is more "
501220137Strasz	     "than allocated %jd for %s (pid %d)", amount, resource,
502220137Strasz	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
503220137Strasz
504220137Strasz	racct_alloc_resource(p->p_racct, resource, -amount);
505220137Strasz	racct_sub_cred_locked(p->p_ucred, resource, amount);
506220137Strasz	mtx_unlock(&racct_lock);
507220137Strasz}
508220137Strasz
509220137Straszstatic void
510220137Straszracct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
511220137Strasz{
512220137Strasz	struct prison *pr;
513220137Strasz
514220137Strasz	SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
515220137Strasz	    0, 0);
516220137Strasz
517220137Strasz#ifdef notyet
518220137Strasz	KASSERT(racct_is_reclaimable(resource),
519220137Strasz	    ("racct_sub_cred: called for non-reclaimable resource %d",
520220137Strasz	     resource));
521220137Strasz#endif
522220137Strasz
523220137Strasz	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
524220137Strasz	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
525220137Strasz		racct_alloc_resource(pr->pr_racct, resource, -amount);
526220137Strasz	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
527220137Strasz}
528220137Strasz
529220137Strasz/*
530220137Strasz * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
531220137Strasz */
532220137Straszvoid
533220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
534220137Strasz{
535220137Strasz
536220137Strasz	mtx_lock(&racct_lock);
537220137Strasz	racct_sub_cred_locked(cred, resource, amount);
538220137Strasz	mtx_unlock(&racct_lock);
539220137Strasz}
540220137Strasz
541220137Strasz/*
542220137Strasz * Inherit resource usage information from the parent process.
543220137Strasz */
544220137Straszint
545220137Straszracct_proc_fork(struct proc *parent, struct proc *child)
546220137Strasz{
547220137Strasz	int i, error = 0;
548220137Strasz
549220137Strasz	/*
550220137Strasz	 * Create racct for the child process.
551220137Strasz	 */
552220137Strasz	racct_create(&child->p_racct);
553220137Strasz
554220137Strasz	/*
555220137Strasz	 * No resource accounting for kernel processes.
556220137Strasz	 */
557220137Strasz	if (child->p_flag & P_SYSTEM)
558220137Strasz		return (0);
559220137Strasz
560220137Strasz	PROC_LOCK(parent);
561220137Strasz	PROC_LOCK(child);
562220137Strasz	mtx_lock(&racct_lock);
563220137Strasz
564220137Strasz	/*
565220137Strasz	 * Inherit resource usage.
566220137Strasz	 */
567220137Strasz	for (i = 0; i <= RACCT_MAX; i++) {
568220137Strasz		if (parent->p_racct->r_resources[i] == 0 ||
569220137Strasz		    !racct_is_inheritable(i))
570220137Strasz			continue;
571220137Strasz
572220137Strasz		error = racct_set_locked(child, i,
573220137Strasz		    parent->p_racct->r_resources[i]);
574220137Strasz		if (error != 0) {
575220137Strasz			/*
576220137Strasz			 * XXX: The only purpose of these two lines is
577220137Strasz			 * to prevent from tripping checks in racct_destroy().
578220137Strasz			 */
579220137Strasz			for (i = 0; i <= RACCT_MAX; i++)
580220137Strasz				racct_set_locked(child, i, 0);
581220137Strasz			goto out;
582220137Strasz		}
583220137Strasz	}
584220137Strasz
585220137Strasz#ifdef RCTL
586220137Strasz	error = rctl_proc_fork(parent, child);
587220137Strasz	if (error != 0) {
588220137Strasz		/*
589220137Strasz		 * XXX: The only purpose of these two lines is to prevent from
590220137Strasz		 * tripping checks in racct_destroy().
591220137Strasz		 */
592220137Strasz		for (i = 0; i <= RACCT_MAX; i++)
593220137Strasz			racct_set_locked(child, i, 0);
594220137Strasz	}
595220137Strasz#endif
596220137Strasz
597220137Straszout:
598220137Strasz	if (error != 0)
599220137Strasz		racct_destroy_locked(&child->p_racct);
600220137Strasz	mtx_unlock(&racct_lock);
601220137Strasz	PROC_UNLOCK(child);
602220137Strasz	PROC_UNLOCK(parent);
603220137Strasz
604220137Strasz	return (error);
605220137Strasz}
606220137Strasz
607220137Straszvoid
608220137Straszracct_proc_exit(struct proc *p)
609220137Strasz{
610220137Strasz	uint64_t runtime;
611220137Strasz
612220137Strasz	PROC_LOCK(p);
613220137Strasz	/*
614220137Strasz	 * We don't need to calculate rux, proc_reap() has already done this.
615220137Strasz	 */
616220137Strasz	runtime = cputick2usec(p->p_rux.rux_runtime);
617220137Strasz#ifdef notyet
618220137Strasz	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
619220137Strasz#else
620220137Strasz	if (runtime < p->p_prev_runtime)
621220137Strasz		runtime = p->p_prev_runtime;
622220137Strasz#endif
623220137Strasz	racct_set(p, RACCT_CPU, runtime);
624220137Strasz
625220137Strasz	/*
626220137Strasz	 * XXX: Free this some other way.
627220137Strasz	 */
628220137Strasz	racct_set(p, RACCT_FSIZE, 0);
629220137Strasz	racct_set(p, RACCT_NPTS, 0);
630220137Strasz	racct_set(p, RACCT_NTHR, 0);
631220137Strasz	racct_set(p, RACCT_RSS, 0);
632220137Strasz	PROC_UNLOCK(p);
633220137Strasz
634220137Strasz#ifdef RCTL
635220137Strasz	rctl_racct_release(p->p_racct);
636220137Strasz#endif
637220137Strasz	racct_destroy(&p->p_racct);
638220137Strasz}
639220137Strasz
640220137Strasz/*
641220137Strasz * Called after credentials change, to move resource utilisation
642220137Strasz * between raccts.
643220137Strasz */
644220137Straszvoid
645220137Straszracct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
646220137Strasz    struct ucred *newcred)
647220137Strasz{
648220137Strasz	struct uidinfo *olduip, *newuip;
649220137Strasz	struct loginclass *oldlc, *newlc;
650220137Strasz	struct prison *oldpr, *newpr, *pr;
651220137Strasz
652220137Strasz	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
653220137Strasz
654220137Strasz	newuip = newcred->cr_ruidinfo;
655220137Strasz	olduip = oldcred->cr_ruidinfo;
656220137Strasz	newlc = newcred->cr_loginclass;
657220137Strasz	oldlc = oldcred->cr_loginclass;
658220137Strasz	newpr = newcred->cr_prison;
659220137Strasz	oldpr = oldcred->cr_prison;
660220137Strasz
661220137Strasz	mtx_lock(&racct_lock);
662220137Strasz	if (newuip != olduip) {
663220137Strasz		racct_sub_racct(olduip->ui_racct, p->p_racct);
664220137Strasz		racct_add_racct(newuip->ui_racct, p->p_racct);
665220137Strasz	}
666220137Strasz	if (newlc != oldlc) {
667220137Strasz		racct_sub_racct(oldlc->lc_racct, p->p_racct);
668220137Strasz		racct_add_racct(newlc->lc_racct, p->p_racct);
669220137Strasz	}
670220137Strasz	if (newpr != oldpr) {
671220137Strasz		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
672220137Strasz			racct_sub_racct(pr->pr_racct, p->p_racct);
673220137Strasz		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
674220137Strasz			racct_add_racct(pr->pr_racct, p->p_racct);
675220137Strasz	}
676220137Strasz	mtx_unlock(&racct_lock);
677220137Strasz
678220137Strasz#ifdef RCTL
679220137Strasz	rctl_proc_ucred_changed(p, newcred);
680220137Strasz#endif
681220137Strasz}
682220137Strasz
683220137Straszstatic void
684220137Straszracctd(void)
685220137Strasz{
686220137Strasz	struct thread *td;
687220137Strasz	struct proc *p;
688220137Strasz	struct timeval wallclock;
689220137Strasz	uint64_t runtime;
690220137Strasz
691220137Strasz	for (;;) {
692220137Strasz		sx_slock(&allproc_lock);
693220137Strasz
694220137Strasz		FOREACH_PROC_IN_SYSTEM(p) {
695220137Strasz			if (p->p_state != PRS_NORMAL)
696220137Strasz				continue;
697220137Strasz			if (p->p_flag & P_SYSTEM)
698220137Strasz				continue;
699220137Strasz
700220137Strasz			microuptime(&wallclock);
701220137Strasz			timevalsub(&wallclock, &p->p_stats->p_start);
702220137Strasz			PROC_LOCK(p);
703220137Strasz			PROC_SLOCK(p);
704220137Strasz			FOREACH_THREAD_IN_PROC(p, td) {
705220137Strasz				ruxagg(p, td);
706220137Strasz				thread_lock(td);
707220137Strasz				thread_unlock(td);
708220137Strasz			}
709220137Strasz			runtime = cputick2usec(p->p_rux.rux_runtime);
710220137Strasz			PROC_SUNLOCK(p);
711220137Strasz#ifdef notyet
712220137Strasz			KASSERT(runtime >= p->p_prev_runtime,
713220137Strasz			    ("runtime < p_prev_runtime"));
714220137Strasz#else
715220137Strasz			if (runtime < p->p_prev_runtime)
716220137Strasz				runtime = p->p_prev_runtime;
717220137Strasz#endif
718220137Strasz			p->p_prev_runtime = runtime;
719220137Strasz			mtx_lock(&racct_lock);
720220137Strasz			racct_set_locked(p, RACCT_CPU, runtime);
721220137Strasz			racct_set_locked(p, RACCT_WALLCLOCK,
722220137Strasz			    wallclock.tv_sec * 1000000 + wallclock.tv_usec);
723220137Strasz			mtx_unlock(&racct_lock);
724220137Strasz			PROC_UNLOCK(p);
725220137Strasz		}
726220137Strasz		sx_sunlock(&allproc_lock);
727220137Strasz		pause("-", hz);
728220137Strasz	}
729220137Strasz}
730220137Strasz
731220137Straszstatic struct kproc_desc racctd_kp = {
732220137Strasz	"racctd",
733220137Strasz	racctd,
734220137Strasz	NULL
735220137Strasz};
736220137StraszSYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
737220137Strasz
738220137Straszstatic void
739220137Straszracct_init(void)
740220137Strasz{
741220137Strasz
742220137Strasz	racct_zone = uma_zcreate("racct", sizeof(struct racct),
743220137Strasz	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
744220137Strasz	/*
745220137Strasz	 * XXX: Move this somewhere.
746220137Strasz	 */
747220137Strasz	racct_create(&prison0.pr_racct);
748220137Strasz}
749220137StraszSYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
750220137Strasz
751220137Strasz#else /* !RACCT */
752220137Strasz
753220137Straszint
754220137Straszracct_add(struct proc *p, int resource, uint64_t amount)
755220137Strasz{
756220137Strasz
757220137Strasz	return (0);
758220137Strasz}
759220137Strasz
760220137Straszvoid
761220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount)
762220137Strasz{
763220137Strasz}
764220137Strasz
765220137Straszvoid
766220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount)
767220137Strasz{
768220137Strasz
769220137Strasz	return;
770220137Strasz}
771220137Strasz
772220137Straszint
773220137Straszracct_set(struct proc *p, int resource, uint64_t amount)
774220137Strasz{
775220137Strasz
776220137Strasz	return (0);
777220137Strasz}
778220137Strasz
779220137Straszvoid
780220372Straszracct_set_force(struct proc *p, int resource, uint64_t amount)
781220372Strasz{
782220372Strasz}
783220372Strasz
784220372Straszvoid
785220137Straszracct_sub(struct proc *p, int resource, uint64_t amount)
786220137Strasz{
787220137Strasz}
788220137Strasz
789220137Straszvoid
790220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
791220137Strasz{
792220137Strasz}
793220137Strasz
794220137Straszuint64_t
795220137Straszracct_get_limit(struct proc *p, int resource)
796220137Strasz{
797220137Strasz
798220137Strasz	return (UINT64_MAX);
799220137Strasz}
800220137Strasz
801220372Straszuint64_t
802220372Straszracct_get_available(struct proc *p, int resource)
803220372Strasz{
804220372Strasz
805220372Strasz	return (UINT64_MAX);
806220372Strasz}
807220372Strasz
808220137Straszvoid
809220137Straszracct_create(struct racct **racctp)
810220137Strasz{
811220137Strasz}
812220137Strasz
813220137Straszvoid
814220137Straszracct_destroy(struct racct **racctp)
815220137Strasz{
816220137Strasz}
817220137Strasz
818220137Straszint
819220137Straszracct_proc_fork(struct proc *parent, struct proc *child)
820220137Strasz{
821220137Strasz
822220137Strasz	return (0);
823220137Strasz}
824220137Strasz
825220137Straszvoid
826220137Straszracct_proc_exit(struct proc *p)
827220137Strasz{
828220137Strasz}
829220137Strasz
830220137Strasz#endif /* !RACCT */
831