Deleted Added
full compact
kern_racct.c (235787) kern_racct.c (242139)
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 12 unchanged lines hidden (view full) ---

21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
1/*-
2 * Copyright (c) 2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 12 unchanged lines hidden (view full) ---

21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/kern/kern_racct.c 235787 2012-05-22 15:58:27Z trasz $
29 * $FreeBSD: head/sys/kern/kern_racct.c 242139 2012-10-26 16:01:08Z trasz $
30 */
31
32#include <sys/cdefs.h>
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 235787 2012-05-22 15:58:27Z trasz $");
33__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 242139 2012-10-26 16:01:08Z trasz $");
34
35#include "opt_kdtrace.h"
34
35#include "opt_kdtrace.h"
36#include "opt_sched.h"
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/eventhandler.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/kthread.h>
43#include <sys/lock.h>
44#include <sys/loginclass.h>
45#include <sys/malloc.h>
46#include <sys/mutex.h>
47#include <sys/proc.h>
48#include <sys/racct.h>
49#include <sys/resourcevar.h>
50#include <sys/sbuf.h>
51#include <sys/sched.h>
52#include <sys/sdt.h>
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/eventhandler.h>
41#include <sys/jail.h>
42#include <sys/kernel.h>
43#include <sys/kthread.h>
44#include <sys/lock.h>
45#include <sys/loginclass.h>
46#include <sys/malloc.h>
47#include <sys/mutex.h>
48#include <sys/proc.h>
49#include <sys/racct.h>
50#include <sys/resourcevar.h>
51#include <sys/sbuf.h>
52#include <sys/sched.h>
53#include <sys/sdt.h>
54#include <sys/smp.h>
53#include <sys/sx.h>
55#include <sys/sx.h>
56#include <sys/sysctl.h>
54#include <sys/sysent.h>
55#include <sys/sysproto.h>
56#include <sys/umtx.h>
57#include <sys/sysent.h>
58#include <sys/sysproto.h>
59#include <sys/umtx.h>
60#include <machine/smp.h>
57
58#ifdef RCTL
59#include <sys/rctl.h>
60#endif
61
62#ifdef RACCT
63
64FEATURE(racct, "Resource Accounting");
65
61
62#ifdef RCTL
63#include <sys/rctl.h>
64#endif
65
66#ifdef RACCT
67
68FEATURE(racct, "Resource Accounting");
69
70/*
71 * Do not block processes that have their %cpu usage <= pcpu_threshold.
72 */
73static int pcpu_threshold = 1;
74
75SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting");
76SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold,
77 0, "Processes with higher %cpu usage than this value can be throttled.");
78
79/*
80 * How many seconds it takes to use the scheduler %cpu calculations. When a
81 * process starts, we compute its %cpu usage by dividing its runtime by the
82 * process wall clock time. After RACCT_PCPU_SECS pass, we use the value
83 * provided by the scheduler.
84 */
85#define RACCT_PCPU_SECS 3
86
66static struct mtx racct_lock;
67MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
68
69static uma_zone_t racct_zone;
70
71static void racct_sub_racct(struct racct *dest, const struct racct *src);
72static void racct_sub_cred_locked(struct ucred *cred, int resource,
73 uint64_t amount);

--- 61 unchanged lines hidden (view full) ---

135 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
136 [RACCT_NSEMOP] =
137 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
138 [RACCT_NSHM] =
139 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
140 [RACCT_SHMSIZE] =
141 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
142 [RACCT_WALLCLOCK] =
87static struct mtx racct_lock;
88MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
89
90static uma_zone_t racct_zone;
91
92static void racct_sub_racct(struct racct *dest, const struct racct *src);
93static void racct_sub_cred_locked(struct ucred *cred, int resource,
94 uint64_t amount);

--- 61 unchanged lines hidden (view full) ---

156 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
157 [RACCT_NSEMOP] =
158 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
159 [RACCT_NSHM] =
160 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
161 [RACCT_SHMSIZE] =
162 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
163 [RACCT_WALLCLOCK] =
143 RACCT_IN_MILLIONS };
164 RACCT_IN_MILLIONS,
165 [RACCT_PCTCPU] =
166 RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS };
144
167
168static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE;
169
170#ifdef SCHED_4BSD
171/*
172 * Contains intermediate values for %cpu calculations to avoid using floating
173 * point in the kernel.
174 * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20)
175 * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to
176 * zero so the calculations are more straightforward.
177 */
178fixpt_t ccpu_exp[] = {
179 [0] = FSCALE * 1,
180 [1] = FSCALE * 0.95122942450071400909,
181 [2] = FSCALE * 0.90483741803595957316,
182 [3] = FSCALE * 0.86070797642505780722,
183 [4] = FSCALE * 0.81873075307798185866,
184 [5] = FSCALE * 0.77880078307140486824,
185 [6] = FSCALE * 0.74081822068171786606,
186 [7] = FSCALE * 0.70468808971871343435,
187 [8] = FSCALE * 0.67032004603563930074,
188 [9] = FSCALE * 0.63762815162177329314,
189 [10] = FSCALE * 0.60653065971263342360,
190 [11] = FSCALE * 0.57694981038048669531,
191 [12] = FSCALE * 0.54881163609402643262,
192 [13] = FSCALE * 0.52204577676101604789,
193 [14] = FSCALE * 0.49658530379140951470,
194 [15] = FSCALE * 0.47236655274101470713,
195 [16] = FSCALE * 0.44932896411722159143,
196 [17] = FSCALE * 0.42741493194872666992,
197 [18] = FSCALE * 0.40656965974059911188,
198 [19] = FSCALE * 0.38674102345450120691,
199 [20] = FSCALE * 0.36787944117144232159,
200 [21] = FSCALE * 0.34993774911115535467,
201 [22] = FSCALE * 0.33287108369807955328,
202 [23] = FSCALE * 0.31663676937905321821,
203 [24] = FSCALE * 0.30119421191220209664,
204 [25] = FSCALE * 0.28650479686019010032,
205 [26] = FSCALE * 0.27253179303401260312,
206 [27] = FSCALE * 0.25924026064589150757,
207 [28] = FSCALE * 0.24659696394160647693,
208 [29] = FSCALE * 0.23457028809379765313,
209 [30] = FSCALE * 0.22313016014842982893,
210 [31] = FSCALE * 0.21224797382674305771,
211 [32] = FSCALE * 0.20189651799465540848,
212 [33] = FSCALE * 0.19204990862075411423,
213 [34] = FSCALE * 0.18268352405273465022,
214 [35] = FSCALE * 0.17377394345044512668,
215 [36] = FSCALE * 0.16529888822158653829,
216 [37] = FSCALE * 0.15723716631362761621,
217 [38] = FSCALE * 0.14956861922263505264,
218 [39] = FSCALE * 0.14227407158651357185,
219 [40] = FSCALE * 0.13533528323661269189,
220 [41] = FSCALE * 0.12873490358780421886,
221 [42] = FSCALE * 0.12245642825298191021,
222 [43] = FSCALE * 0.11648415777349695786,
223 [44] = FSCALE * 0.11080315836233388333,
224 [45] = FSCALE * 0.10539922456186433678,
225 [46] = FSCALE * 0.10025884372280373372,
226 [47] = FSCALE * 0.09536916221554961888,
227 [48] = FSCALE * 0.09071795328941250337,
228 [49] = FSCALE * 0.08629358649937051097,
229 [50] = FSCALE * 0.08208499862389879516,
230 [51] = FSCALE * 0.07808166600115315231,
231 [52] = FSCALE * 0.07427357821433388042,
232 [53] = FSCALE * 0.07065121306042958674,
233 [54] = FSCALE * 0.06720551273974976512,
234 [55] = FSCALE * 0.06392786120670757270,
235 [56] = FSCALE * 0.06081006262521796499,
236 [57] = FSCALE * 0.05784432087483846296,
237 [58] = FSCALE * 0.05502322005640722902,
238 [59] = FSCALE * 0.05233970594843239308,
239 [60] = FSCALE * 0.04978706836786394297,
240 [61] = FSCALE * 0.04735892439114092119,
241 [62] = FSCALE * 0.04504920239355780606,
242 [63] = FSCALE * 0.04285212686704017991,
243 [64] = FSCALE * 0.04076220397836621516,
244 [65] = FSCALE * 0.03877420783172200988,
245 [66] = FSCALE * 0.03688316740124000544,
246 [67] = FSCALE * 0.03508435410084502588,
247 [68] = FSCALE * 0.03337326996032607948,
248 [69] = FSCALE * 0.03174563637806794323,
249 [70] = FSCALE * 0.03019738342231850073,
250 [71] = FSCALE * 0.02872463965423942912,
251 [72] = FSCALE * 0.02732372244729256080,
252 [73] = FSCALE * 0.02599112877875534358,
253 [74] = FSCALE * 0.02472352647033939120,
254 [75] = FSCALE * 0.02351774585600910823,
255 [76] = FSCALE * 0.02237077185616559577,
256 [77] = FSCALE * 0.02127973643837716938,
257 [78] = FSCALE * 0.02024191144580438847,
258 [79] = FSCALE * 0.01925470177538692429,
259 [80] = FSCALE * 0.01831563888873418029,
260 [81] = FSCALE * 0.01742237463949351138,
261 [82] = FSCALE * 0.01657267540176124754,
262 [83] = FSCALE * 0.01576441648485449082,
263 [84] = FSCALE * 0.01499557682047770621,
264 [85] = FSCALE * 0.01426423390899925527,
265 [86] = FSCALE * 0.01356855901220093175,
266 [87] = FSCALE * 0.01290681258047986886,
267 [88] = FSCALE * 0.01227733990306844117,
268 [89] = FSCALE * 0.01167856697039544521,
269 [90] = FSCALE * 0.01110899653824230649,
270 [91] = FSCALE * 0.01056720438385265337,
271 [92] = FSCALE * 0.01005183574463358164,
272 [93] = FSCALE * 0.00956160193054350793,
273 [94] = FSCALE * 0.00909527710169581709,
274 [95] = FSCALE * 0.00865169520312063417,
275 [96] = FSCALE * 0.00822974704902002884,
276 [97] = FSCALE * 0.00782837754922577143,
277 [98] = FSCALE * 0.00744658307092434051,
278 [99] = FSCALE * 0.00708340892905212004,
279 [100] = FSCALE * 0.00673794699908546709,
280 [101] = FSCALE * 0.00640933344625638184,
281 [102] = FSCALE * 0.00609674656551563610,
282 [103] = FSCALE * 0.00579940472684214321,
283 [104] = FSCALE * 0.00551656442076077241,
284 [105] = FSCALE * 0.00524751839918138427,
285 [106] = FSCALE * 0.00499159390691021621,
286 [107] = FSCALE * 0.00474815099941147558,
287 [108] = FSCALE * 0.00451658094261266798,
288 [109] = FSCALE * 0.00429630469075234057,
289 [110] = FSCALE * 0.00408677143846406699,
290};
291#endif
292
293#define CCPU_EXP_MAX 110
294
295/*
296 * This function is analogical to the getpcpu() function in the ps(1) command.
297 * They should both calculate in the same way so that the racct %cpu
298 * calculations are consistent with the values showed by the ps(1) tool.
299 * The calculations are more complex in the 4BSD scheduler because of the value
300 * of the ccpu variable. In ULE it is defined to be zero which saves us some
301 * work.
302 */
303static uint64_t
304racct_getpcpu(struct proc *p, u_int pcpu)
305{
306 u_int swtime;
307#ifdef SCHED_4BSD
308 fixpt_t pctcpu, pctcpu_next;
309#endif
310#ifdef SMP
311 struct pcpu *pc;
312 int found;
313#endif
314 fixpt_t p_pctcpu;
315 struct thread *td;
316
317 /*
318 * If the process is swapped out, we count its %cpu usage as zero.
319 * This behaviour is consistent with the userland ps(1) tool.
320 */
321 if ((p->p_flag & P_INMEM) == 0)
322 return (0);
323 swtime = (ticks - p->p_swtick) / hz;
324
325 /*
326 * For short-lived processes, the sched_pctcpu() returns small
327 * values even for cpu intensive processes. Therefore we use
328 * our own estimate in this case.
329 */
330 if (swtime < RACCT_PCPU_SECS)
331 return (pcpu);
332
333 p_pctcpu = 0;
334 FOREACH_THREAD_IN_PROC(p, td) {
335 if (td == PCPU_GET(idlethread))
336 continue;
337#ifdef SMP
338 found = 0;
339 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
340 if (td == pc->pc_idlethread) {
341 found = 1;
342 break;
343 }
344 }
345 if (found)
346 continue;
347#endif
348 thread_lock(td);
349#ifdef SCHED_4BSD
350 pctcpu = sched_pctcpu(td);
351 /* Count also the yet unfinished second. */
352 pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT;
353 pctcpu_next += sched_pctcpu_delta(td);
354 p_pctcpu += max(pctcpu, pctcpu_next);
355#else
356 /*
357 * In ULE the %cpu statistics are updated on every
358 * sched_pctcpu() call. So special calculations to
359 * account for the latest (unfinished) second are
360 * not needed.
361 */
362 p_pctcpu += sched_pctcpu(td);
363#endif
364 thread_unlock(td);
365 }
366
367#ifdef SCHED_4BSD
368 if (swtime <= CCPU_EXP_MAX)
369 return ((100 * (uint64_t)p_pctcpu * 1000000) /
370 (FSCALE - ccpu_exp[swtime]));
371#endif
372
373 return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE);
374}
375
145static void
146racct_add_racct(struct racct *dest, const struct racct *src)
147{
148 int i;
149
150 mtx_assert(&racct_lock, MA_OWNED);
151
152 /*

--- 22 unchanged lines hidden (view full) ---

175 if (!RACCT_IS_SLOPPY(i)) {
176 KASSERT(dest->r_resources[i] >= 0,
177 ("racct propagation meltdown: dest < 0"));
178 KASSERT(src->r_resources[i] >= 0,
179 ("racct propagation meltdown: src < 0"));
180 KASSERT(src->r_resources[i] <= dest->r_resources[i],
181 ("racct propagation meltdown: src > dest"));
182 }
376static void
377racct_add_racct(struct racct *dest, const struct racct *src)
378{
379 int i;
380
381 mtx_assert(&racct_lock, MA_OWNED);
382
383 /*

--- 22 unchanged lines hidden (view full) ---

406 if (!RACCT_IS_SLOPPY(i)) {
407 KASSERT(dest->r_resources[i] >= 0,
408 ("racct propagation meltdown: dest < 0"));
409 KASSERT(src->r_resources[i] >= 0,
410 ("racct propagation meltdown: src < 0"));
411 KASSERT(src->r_resources[i] <= dest->r_resources[i],
412 ("racct propagation meltdown: src > dest"));
413 }
183 if (RACCT_IS_RECLAIMABLE(i)) {
414 if (RACCT_CAN_DROP(i)) {
184 dest->r_resources[i] -= src->r_resources[i];
185 if (dest->r_resources[i] < 0) {
186 KASSERT(RACCT_IS_SLOPPY(i),
187 ("racct_sub_racct: usage < 0"));
188 dest->r_resources[i] = 0;
189 }
190 }
191 }

--- 57 unchanged lines hidden (view full) ---

249 uint64_t amount)
250{
251
252 mtx_assert(&racct_lock, MA_OWNED);
253 KASSERT(racct != NULL, ("NULL racct"));
254
255 racct->r_resources[resource] += amount;
256 if (racct->r_resources[resource] < 0) {
415 dest->r_resources[i] -= src->r_resources[i];
416 if (dest->r_resources[i] < 0) {
417 KASSERT(RACCT_IS_SLOPPY(i),
418 ("racct_sub_racct: usage < 0"));
419 dest->r_resources[i] = 0;
420 }
421 }
422 }

--- 57 unchanged lines hidden (view full) ---

480 uint64_t amount)
481{
482
483 mtx_assert(&racct_lock, MA_OWNED);
484 KASSERT(racct != NULL, ("NULL racct"));
485
486 racct->r_resources[resource] += amount;
487 if (racct->r_resources[resource] < 0) {
257 KASSERT(RACCT_IS_SLOPPY(resource),
488 KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource),
258 ("racct_alloc_resource: usage < 0"));
259 racct->r_resources[resource] = 0;
260 }
489 ("racct_alloc_resource: usage < 0"));
490 racct->r_resources[resource] = 0;
491 }
492
493 /*
494 * There are some cases where the racct %cpu resource would grow
495 * beyond 100%.
496 * For example in racct_proc_exit() we add the process %cpu usage
497 * to the ucred racct containers. If too many processes terminated
498 * in a short time span, the ucred %cpu resource could grow too much.
499 * Also, the 4BSD scheduler sometimes returns for a thread more than
500 * 100% cpu usage. So we set a boundary here to 100%.
501 */
502 if ((resource == RACCT_PCTCPU) &&
503 (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000))
504 racct->r_resources[RACCT_PCTCPU] = 100 * 1000000;
261}
262
263static int
264racct_add_locked(struct proc *p, int resource, uint64_t amount)
265{
266#ifdef RCTL
267 int error;
268#endif

--- 83 unchanged lines hidden (view full) ---

352 racct_alloc_resource(p->p_racct, resource, amount);
353 mtx_unlock(&racct_lock);
354 racct_add_cred(p->p_ucred, resource, amount);
355}
356
357static int
358racct_set_locked(struct proc *p, int resource, uint64_t amount)
359{
505}
506
507static int
508racct_add_locked(struct proc *p, int resource, uint64_t amount)
509{
510#ifdef RCTL
511 int error;
512#endif

--- 83 unchanged lines hidden (view full) ---

596 racct_alloc_resource(p->p_racct, resource, amount);
597 mtx_unlock(&racct_lock);
598 racct_add_cred(p->p_ucred, resource, amount);
599}
600
601static int
602racct_set_locked(struct proc *p, int resource, uint64_t amount)
603{
360 int64_t diff;
604 int64_t old_amount, decayed_amount;
605 int64_t diff_proc, diff_cred;
361#ifdef RCTL
362 int error;
363#endif
364
365 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
366
367 /*
368 * We need proc lock to dereference p->p_ucred.
369 */
370 PROC_LOCK_ASSERT(p, MA_OWNED);
371
606#ifdef RCTL
607 int error;
608#endif
609
610 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
611
612 /*
613 * We need proc lock to dereference p->p_ucred.
614 */
615 PROC_LOCK_ASSERT(p, MA_OWNED);
616
372 diff = amount - p->p_racct->r_resources[resource];
617 old_amount = p->p_racct->r_resources[resource];
618 /*
619 * The diffs may be negative.
620 */
621 diff_proc = amount - old_amount;
622 if (RACCT_IS_DECAYING(resource)) {
623 /*
624 * Resources in per-credential racct containers may decay.
625 * If this is the case, we need to calculate the difference
626 * between the new amount and the proportional value of the
627 * old amount that has decayed in the ucred racct containers.
628 */
629 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
630 diff_cred = amount - decayed_amount;
631 } else
632 diff_cred = diff_proc;
373#ifdef notyet
633#ifdef notyet
374 KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
375 ("racct_set: usage of non-reclaimable resource %d dropping",
634 KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource),
635 ("racct_set: usage of non-droppable resource %d dropping",
376 resource));
377#endif
378#ifdef RCTL
636 resource));
637#endif
638#ifdef RCTL
379 if (diff > 0) {
380 error = rctl_enforce(p, resource, diff);
639 if (diff_proc > 0) {
640 error = rctl_enforce(p, resource, diff_proc);
381 if (error && RACCT_IS_DENIABLE(resource)) {
382 SDT_PROBE(racct, kernel, rusage, set_failure, p,
383 resource, amount, 0, 0);
384 return (error);
385 }
386 }
387#endif
641 if (error && RACCT_IS_DENIABLE(resource)) {
642 SDT_PROBE(racct, kernel, rusage, set_failure, p,
643 resource, amount, 0, 0);
644 return (error);
645 }
646 }
647#endif
388 racct_alloc_resource(p->p_racct, resource, diff);
389 if (diff > 0)
390 racct_add_cred_locked(p->p_ucred, resource, diff);
391 else if (diff < 0)
392 racct_sub_cred_locked(p->p_ucred, resource, -diff);
648 racct_alloc_resource(p->p_racct, resource, diff_proc);
649 if (diff_cred > 0)
650 racct_add_cred_locked(p->p_ucred, resource, diff_cred);
651 else if (diff_cred < 0)
652 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred);
393
394 return (0);
395}
396
397/*
398 * Set allocation of 'resource' to 'amount' for process 'p'.
399 * Return 0 if it's below limits, or errno, if it's not.
400 *

--- 6 unchanged lines hidden (view full) ---

407 int error;
408
409 mtx_lock(&racct_lock);
410 error = racct_set_locked(p, resource, amount);
411 mtx_unlock(&racct_lock);
412 return (error);
413}
414
653
654 return (0);
655}
656
657/*
658 * Set allocation of 'resource' to 'amount' for process 'p'.
659 * Return 0 if it's below limits, or errno, if it's not.
660 *

--- 6 unchanged lines hidden (view full) ---

667 int error;
668
669 mtx_lock(&racct_lock);
670 error = racct_set_locked(p, resource, amount);
671 mtx_unlock(&racct_lock);
672 return (error);
673}
674
415void
416racct_set_force(struct proc *p, int resource, uint64_t amount)
675static void
676racct_set_force_locked(struct proc *p, int resource, uint64_t amount)
417{
677{
418 int64_t diff;
678 int64_t old_amount, decayed_amount;
679 int64_t diff_proc, diff_cred;
419
420 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
421
422 /*
423 * We need proc lock to dereference p->p_ucred.
424 */
425 PROC_LOCK_ASSERT(p, MA_OWNED);
426
680
681 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
682
683 /*
684 * We need proc lock to dereference p->p_ucred.
685 */
686 PROC_LOCK_ASSERT(p, MA_OWNED);
687
688 old_amount = p->p_racct->r_resources[resource];
689 /*
690 * The diffs may be negative.
691 */
692 diff_proc = amount - old_amount;
693 if (RACCT_IS_DECAYING(resource)) {
694 /*
695 * Resources in per-credential racct containers may decay.
696 * If this is the case, we need to calculate the difference
697 * between the new amount and the proportional value of the
698 * old amount that has decayed in the ucred racct containers.
699 */
700 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
701 diff_cred = amount - decayed_amount;
702 } else
703 diff_cred = diff_proc;
704
705 racct_alloc_resource(p->p_racct, resource, diff_proc);
706 if (diff_cred > 0)
707 racct_add_cred_locked(p->p_ucred, resource, diff_cred);
708 else if (diff_cred < 0)
709 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred);
710}
711
712void
713racct_set_force(struct proc *p, int resource, uint64_t amount)
714{
427 mtx_lock(&racct_lock);
715 mtx_lock(&racct_lock);
428 diff = amount - p->p_racct->r_resources[resource];
429 racct_alloc_resource(p->p_racct, resource, diff);
430 if (diff > 0)
431 racct_add_cred_locked(p->p_ucred, resource, diff);
432 else if (diff < 0)
433 racct_sub_cred_locked(p->p_ucred, resource, -diff);
716 racct_set_force_locked(p, resource, amount);
434 mtx_unlock(&racct_lock);
435}
436
437/*
438 * Returns amount of 'resource' the process 'p' can keep allocated.
439 * Allocating more than that would be denied, unless the resource
440 * is marked undeniable. Amount of already allocated resource does
441 * not matter.

--- 22 unchanged lines hidden (view full) ---

464#ifdef RCTL
465 return (rctl_get_available(p, resource));
466#else
467 return (UINT64_MAX);
468#endif
469}
470
471/*
717 mtx_unlock(&racct_lock);
718}
719
720/*
721 * Returns amount of 'resource' the process 'p' can keep allocated.
722 * Allocating more than that would be denied, unless the resource
723 * is marked undeniable. Amount of already allocated resource does
724 * not matter.

--- 22 unchanged lines hidden (view full) ---

747#ifdef RCTL
748 return (rctl_get_available(p, resource));
749#else
750 return (UINT64_MAX);
751#endif
752}
753
754/*
755 * Returns amount of the %cpu resource that process 'p' can add to its %cpu
756 * utilization. Adding more than that would lead to the process being
757 * throttled.
758 */
759static int64_t
760racct_pcpu_available(struct proc *p)
761{
762
763#ifdef RCTL
764 return (rctl_pcpu_available(p));
765#else
766 return (INT64_MAX);
767#endif
768}
769
770/*
472 * Decrease allocation of 'resource' by 'amount' for process 'p'.
473 */
474void
475racct_sub(struct proc *p, int resource, uint64_t amount)
476{
477
478 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
479
480 /*
481 * We need proc lock to dereference p->p_ucred.
482 */
483 PROC_LOCK_ASSERT(p, MA_OWNED);
771 * Decrease allocation of 'resource' by 'amount' for process 'p'.
772 */
773void
774racct_sub(struct proc *p, int resource, uint64_t amount)
775{
776
777 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
778
779 /*
780 * We need proc lock to dereference p->p_ucred.
781 */
782 PROC_LOCK_ASSERT(p, MA_OWNED);
484 KASSERT(RACCT_IS_RECLAIMABLE(resource),
485 ("racct_sub: called for non-reclaimable resource %d", resource));
783 KASSERT(RACCT_CAN_DROP(resource),
784 ("racct_sub: called for non-droppable resource %d", resource));
486
487 mtx_lock(&racct_lock);
488 KASSERT(amount <= p->p_racct->r_resources[resource],
489 ("racct_sub: freeing %ju of resource %d, which is more "
490 "than allocated %jd for %s (pid %d)", amount, resource,
491 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
492
493 racct_alloc_resource(p->p_racct, resource, -amount);

--- 5 unchanged lines hidden (view full) ---

499racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
500{
501 struct prison *pr;
502
503 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
504 0, 0);
505
506#ifdef notyet
785
786 mtx_lock(&racct_lock);
787 KASSERT(amount <= p->p_racct->r_resources[resource],
788 ("racct_sub: freeing %ju of resource %d, which is more "
789 "than allocated %jd for %s (pid %d)", amount, resource,
790 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
791
792 racct_alloc_resource(p->p_racct, resource, -amount);

--- 5 unchanged lines hidden (view full) ---

798racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
799{
800 struct prison *pr;
801
802 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
803 0, 0);
804
805#ifdef notyet
507 KASSERT(RACCT_IS_RECLAIMABLE(resource),
508 ("racct_sub_cred: called for non-reclaimable resource %d",
806 KASSERT(RACCT_CAN_DROP(resource),
807 ("racct_sub_cred: called for resource %d which can not drop",
509 resource));
510#endif
511
512 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
513 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
514 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
515 -amount);
516 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);

--- 29 unchanged lines hidden (view full) ---

546 mtx_lock(&racct_lock);
547
548#ifdef RCTL
549 error = rctl_proc_fork(parent, child);
550 if (error != 0)
551 goto out;
552#endif
553
808 resource));
809#endif
810
811 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
812 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
813 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
814 -amount);
815 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);

--- 29 unchanged lines hidden (view full) ---

845 mtx_lock(&racct_lock);
846
847#ifdef RCTL
848 error = rctl_proc_fork(parent, child);
849 if (error != 0)
850 goto out;
851#endif
852
853 /* Init process cpu time. */
854 child->p_prev_runtime = 0;
855 child->p_throttled = 0;
856
554 /*
555 * Inherit resource usage.
556 */
557 for (i = 0; i <= RACCT_MAX; i++) {
558 if (parent->p_racct->r_resources[i] == 0 ||
559 !RACCT_IS_INHERITABLE(i))
560 continue;
561

--- 35 unchanged lines hidden (view full) ---

597#endif
598}
599
600void
601racct_proc_exit(struct proc *p)
602{
603 int i;
604 uint64_t runtime;
857 /*
858 * Inherit resource usage.
859 */
860 for (i = 0; i <= RACCT_MAX; i++) {
861 if (parent->p_racct->r_resources[i] == 0 ||
862 !RACCT_IS_INHERITABLE(i))
863 continue;
864

--- 35 unchanged lines hidden (view full) ---

900#endif
901}
902
903void
904racct_proc_exit(struct proc *p)
905{
906 int i;
907 uint64_t runtime;
908 struct timeval wallclock;
909 uint64_t pct_estimate, pct;
605
606 PROC_LOCK(p);
607 /*
608 * We don't need to calculate rux, proc_reap() has already done this.
609 */
610 runtime = cputick2usec(p->p_rux.rux_runtime);
611#ifdef notyet
612 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
613#else
614 if (runtime < p->p_prev_runtime)
615 runtime = p->p_prev_runtime;
616#endif
910
911 PROC_LOCK(p);
912 /*
913 * We don't need to calculate rux, proc_reap() has already done this.
914 */
915 runtime = cputick2usec(p->p_rux.rux_runtime);
916#ifdef notyet
917 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
918#else
919 if (runtime < p->p_prev_runtime)
920 runtime = p->p_prev_runtime;
921#endif
922 microuptime(&wallclock);
923 timevalsub(&wallclock, &p->p_stats->p_start);
924 pct_estimate = (1000000 * runtime * 100) /
925 ((uint64_t)wallclock.tv_sec * 1000000 +
926 wallclock.tv_usec);
927 pct = racct_getpcpu(p, pct_estimate);
928
617 mtx_lock(&racct_lock);
618 racct_set_locked(p, RACCT_CPU, runtime);
929 mtx_lock(&racct_lock);
930 racct_set_locked(p, RACCT_CPU, runtime);
931 racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
619
620 for (i = 0; i <= RACCT_MAX; i++) {
621 if (p->p_racct->r_resources[i] == 0)
622 continue;
623 if (!RACCT_IS_RECLAIMABLE(i))
624 continue;
625 racct_set_locked(p, i, 0);
626 }

--- 60 unchanged lines hidden (view full) ---

687
688 racct_add_racct(dest, src);
689 racct_sub_racct(src, src);
690
691 mtx_unlock(&racct_lock);
692}
693
694static void
932
933 for (i = 0; i <= RACCT_MAX; i++) {
934 if (p->p_racct->r_resources[i] == 0)
935 continue;
936 if (!RACCT_IS_RECLAIMABLE(i))
937 continue;
938 racct_set_locked(p, i, 0);
939 }

--- 60 unchanged lines hidden (view full) ---

1000
1001 racct_add_racct(dest, src);
1002 racct_sub_racct(src, src);
1003
1004 mtx_unlock(&racct_lock);
1005}
1006
1007static void
1008racct_proc_throttle(struct proc *p)
1009{
1010 struct thread *td;
1011#ifdef SMP
1012 int cpuid;
1013#endif
1014
1015 PROC_LOCK_ASSERT(p, MA_OWNED);
1016
1017 /*
1018 * Do not block kernel processes. Also do not block processes with
1019 * low %cpu utilization to improve interactivity.
1020 */
1021 if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) ||
1022 (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold))
1023 return;
1024 p->p_throttled = 1;
1025
1026 FOREACH_THREAD_IN_PROC(p, td) {
1027 switch (td->td_state) {
1028 case TDS_RUNQ:
1029 /*
1030 * If the thread is on the scheduler run-queue, we can
1031 * not just remove it from there. So we set the flag
1032 * TDF_NEEDRESCHED for the thread, so that once it is
1033 * running, it is taken off the cpu as soon as possible.
1034 */
1035 thread_lock(td);
1036 td->td_flags |= TDF_NEEDRESCHED;
1037 thread_unlock(td);
1038 break;
1039 case TDS_RUNNING:
1040 /*
1041 * If the thread is running, we request a context
1042 * switch for it by setting the TDF_NEEDRESCHED flag.
1043 */
1044 thread_lock(td);
1045 td->td_flags |= TDF_NEEDRESCHED;
1046#ifdef SMP
1047 cpuid = td->td_oncpu;
1048 if ((cpuid != NOCPU) && (td != curthread))
1049 ipi_cpu(cpuid, IPI_AST);
1050#endif
1051 thread_unlock(td);
1052 break;
1053 default:
1054 break;
1055 }
1056 }
1057}
1058
1059static void
1060racct_proc_wakeup(struct proc *p)
1061{
1062 PROC_LOCK_ASSERT(p, MA_OWNED);
1063
1064 if (p->p_throttled) {
1065 p->p_throttled = 0;
1066 wakeup(p->p_racct);
1067 }
1068}
1069
1070static void
1071racct_decay_resource(struct racct *racct, void * res, void* dummy)
1072{
1073 int resource;
1074 int64_t r_old, r_new;
1075
1076 resource = *(int *)res;
1077 r_old = racct->r_resources[resource];
1078
1079 /* If there is nothing to decay, just exit. */
1080 if (r_old <= 0)
1081 return;
1082
1083 mtx_lock(&racct_lock);
1084 r_new = r_old * RACCT_DECAY_FACTOR / FSCALE;
1085 racct->r_resources[resource] = r_new;
1086 mtx_unlock(&racct_lock);
1087}
1088
1089static void
1090racct_decay(int resource)
1091{
1092 ui_racct_foreach(racct_decay_resource, &resource, NULL);
1093 loginclass_racct_foreach(racct_decay_resource, &resource, NULL);
1094 prison_racct_foreach(racct_decay_resource, &resource, NULL);
1095}
1096
1097static void
695racctd(void)
696{
697 struct thread *td;
698 struct proc *p;
699 struct timeval wallclock;
700 uint64_t runtime;
1098racctd(void)
1099{
1100 struct thread *td;
1101 struct proc *p;
1102 struct timeval wallclock;
1103 uint64_t runtime;
1104 uint64_t pct, pct_estimate;
701
702 for (;;) {
1105
1106 for (;;) {
1107 racct_decay(RACCT_PCTCPU);
1108
703 sx_slock(&allproc_lock);
704
1109 sx_slock(&allproc_lock);
1110
1111 LIST_FOREACH(p, &zombproc, p_list) {
1112 PROC_LOCK(p);
1113 racct_set(p, RACCT_PCTCPU, 0);
1114 PROC_UNLOCK(p);
1115 }
1116
705 FOREACH_PROC_IN_SYSTEM(p) {
1117 FOREACH_PROC_IN_SYSTEM(p) {
706 if (p->p_state != PRS_NORMAL)
1118 PROC_LOCK(p);
1119 if (p->p_state != PRS_NORMAL) {
1120 PROC_UNLOCK(p);
707 continue;
1121 continue;
1122 }
708
709 microuptime(&wallclock);
710 timevalsub(&wallclock, &p->p_stats->p_start);
1123
1124 microuptime(&wallclock);
1125 timevalsub(&wallclock, &p->p_stats->p_start);
711 PROC_LOCK(p);
712 PROC_SLOCK(p);
713 FOREACH_THREAD_IN_PROC(p, td)
714 ruxagg(p, td);
715 runtime = cputick2usec(p->p_rux.rux_runtime);
716 PROC_SUNLOCK(p);
717#ifdef notyet
718 KASSERT(runtime >= p->p_prev_runtime,
719 ("runtime < p_prev_runtime"));
720#else
721 if (runtime < p->p_prev_runtime)
722 runtime = p->p_prev_runtime;
723#endif
724 p->p_prev_runtime = runtime;
1126 PROC_SLOCK(p);
1127 FOREACH_THREAD_IN_PROC(p, td)
1128 ruxagg(p, td);
1129 runtime = cputick2usec(p->p_rux.rux_runtime);
1130 PROC_SUNLOCK(p);
1131#ifdef notyet
1132 KASSERT(runtime >= p->p_prev_runtime,
1133 ("runtime < p_prev_runtime"));
1134#else
1135 if (runtime < p->p_prev_runtime)
1136 runtime = p->p_prev_runtime;
1137#endif
1138 p->p_prev_runtime = runtime;
1139 pct_estimate = (1000000 * runtime * 100) /
1140 ((uint64_t)wallclock.tv_sec * 1000000 +
1141 wallclock.tv_usec);
1142 pct = racct_getpcpu(p, pct_estimate);
725 mtx_lock(&racct_lock);
1143 mtx_lock(&racct_lock);
1144 racct_set_force_locked(p, RACCT_PCTCPU, pct);
726 racct_set_locked(p, RACCT_CPU, runtime);
727 racct_set_locked(p, RACCT_WALLCLOCK,
728 (uint64_t)wallclock.tv_sec * 1000000 +
729 wallclock.tv_usec);
730 mtx_unlock(&racct_lock);
731 PROC_UNLOCK(p);
732 }
1145 racct_set_locked(p, RACCT_CPU, runtime);
1146 racct_set_locked(p, RACCT_WALLCLOCK,
1147 (uint64_t)wallclock.tv_sec * 1000000 +
1148 wallclock.tv_usec);
1149 mtx_unlock(&racct_lock);
1150 PROC_UNLOCK(p);
1151 }
1152
1153 /*
1154 * To ensure that processes are throttled in a fair way, we need
1155 * to iterate over all processes again and check the limits
1156 * for %cpu resource only after ucred racct containers have been
1157 * properly filled.
1158 */
1159 FOREACH_PROC_IN_SYSTEM(p) {
1160 PROC_LOCK(p);
1161 if (p->p_state != PRS_NORMAL) {
1162 PROC_UNLOCK(p);
1163 continue;
1164 }
1165
1166 if (racct_pcpu_available(p) <= 0)
1167 racct_proc_throttle(p);
1168 else if (p->p_throttled)
1169 racct_proc_wakeup(p);
1170 PROC_UNLOCK(p);
1171 }
733 sx_sunlock(&allproc_lock);
734 pause("-", hz);
735 }
736}
737
738static struct kproc_desc racctd_kp = {
739 "racctd",
740 racctd,

--- 102 unchanged lines hidden ---
1172 sx_sunlock(&allproc_lock);
1173 pause("-", hz);
1174 }
1175}
1176
1177static struct kproc_desc racctd_kp = {
1178 "racctd",
1179 racctd,

--- 102 unchanged lines hidden ---