kern_racct.c revision 242957
1221163Sadrian/*- 2221163Sadrian * Copyright (c) 2010 The FreeBSD Foundation 3221163Sadrian * All rights reserved. 4221163Sadrian * 5221163Sadrian * This software was developed by Edward Tomasz Napierala under sponsorship 6221163Sadrian * from the FreeBSD Foundation. 7221163Sadrian * 8221163Sadrian * Redistribution and use in source and binary forms, with or without 9221163Sadrian * modification, are permitted provided that the following conditions 10221163Sadrian * are met: 11221163Sadrian * 1. Redistributions of source code must retain the above copyright 12221163Sadrian * notice, this list of conditions and the following disclaimer. 13221163Sadrian * 2. Redistributions in binary form must reproduce the above copyright 14221163Sadrian * notice, this list of conditions and the following disclaimer in the 15221163Sadrian * documentation and/or other materials provided with the distribution. 16221163Sadrian * 17221163Sadrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18221163Sadrian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19221163Sadrian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20221163Sadrian * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21221163Sadrian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22221163Sadrian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23221163Sadrian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24221163Sadrian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25221163Sadrian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26221163Sadrian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27221163Sadrian * SUCH DAMAGE. 28221163Sadrian * 29221163Sadrian * $FreeBSD: head/sys/kern/kern_racct.c 242957 2012-11-13 11:29:08Z trasz $ 30221163Sadrian */ 31221163Sadrian 32221163Sadrian#include <sys/cdefs.h> 33221163Sadrian__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 242957 2012-11-13 11:29:08Z trasz $"); 34221163Sadrian 35221163Sadrian#include "opt_kdtrace.h" 36221163Sadrian#include "opt_sched.h" 37221163Sadrian 38221163Sadrian#include <sys/param.h> 39221163Sadrian#include <sys/systm.h> 40221163Sadrian#include <sys/eventhandler.h> 41221163Sadrian#include <sys/jail.h> 42221163Sadrian#include <sys/kernel.h> 43221163Sadrian#include <sys/kthread.h> 44221163Sadrian#include <sys/lock.h> 45221163Sadrian#include <sys/loginclass.h> 46221163Sadrian#include <sys/malloc.h> 47221163Sadrian#include <sys/mutex.h> 48221163Sadrian#include <sys/proc.h> 49221163Sadrian#include <sys/racct.h> 50221163Sadrian#include <sys/resourcevar.h> 51221163Sadrian#include <sys/sbuf.h> 52221163Sadrian#include <sys/sched.h> 53221163Sadrian#include <sys/sdt.h> 54221163Sadrian#include <sys/smp.h> 55221163Sadrian#include <sys/sx.h> 56221163Sadrian#include <sys/sysctl.h> 57221163Sadrian#include <sys/sysent.h> 58221163Sadrian#include <sys/sysproto.h> 59221163Sadrian#include <sys/umtx.h> 60221163Sadrian#include <machine/smp.h> 61221163Sadrian 62221163Sadrian#ifdef RCTL 63221163Sadrian#include <sys/rctl.h> 64221163Sadrian#endif 65221163Sadrian 66221163Sadrian#ifdef RACCT 67221163Sadrian 68221163SadrianFEATURE(racct, "Resource Accounting"); 69221163Sadrian 70221163Sadrian/* 71221163Sadrian * Do not block processes that have their %cpu usage <= pcpu_threshold. 72221163Sadrian */ 73221163Sadrianstatic int pcpu_threshold = 1; 74221163Sadrian 75221163SadrianSYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting"); 76221163SadrianSYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 77221163Sadrian 0, "Processes with higher %cpu usage than this value can be throttled."); 78221163Sadrian 79221163Sadrian/* 80221163Sadrian * How many seconds it takes to use the scheduler %cpu calculations. When a 81221163Sadrian * process starts, we compute its %cpu usage by dividing its runtime by the 82221163Sadrian * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 83221163Sadrian * provided by the scheduler. 84221163Sadrian */ 85221163Sadrian#define RACCT_PCPU_SECS 3 86221163Sadrian 87221163Sadrianstatic struct mtx racct_lock; 88221163SadrianMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 89221163Sadrian 90221163Sadrianstatic uma_zone_t racct_zone; 91221163Sadrian 92221163Sadrianstatic void racct_sub_racct(struct racct *dest, const struct racct *src); 93221163Sadrianstatic void racct_sub_cred_locked(struct ucred *cred, int resource, 94221163Sadrian uint64_t amount); 95221163Sadrianstatic void racct_add_cred_locked(struct ucred *cred, int resource, 96221163Sadrian uint64_t amount); 97221163Sadrian 98221163SadrianSDT_PROVIDER_DEFINE(racct); 99221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int", 100221163Sadrian "uint64_t"); 101221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure, 102221163Sadrian "struct proc *", "int", "uint64_t"); 103221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *", 104221163Sadrian "int", "uint64_t"); 105221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *", 106221163Sadrian "int", "uint64_t"); 107221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int", 108221163Sadrian "uint64_t"); 109221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure, 110221163Sadrian "struct proc *", "int", "uint64_t"); 111221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int", 112221163Sadrian "uint64_t"); 113221163SadrianSDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *", 114221163Sadrian "int", "uint64_t"); 115221163SadrianSDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *"); 116221163SadrianSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *"); 117221163SadrianSDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *", 118221163Sadrian "struct racct *"); 119221163SadrianSDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure, 120221163Sadrian "struct racct *", "struct racct *"); 121221163SadrianSDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *", 122221163Sadrian "struct racct *"); 123221163Sadrian 124221163Sadrianint racct_types[] = { 125221163Sadrian [RACCT_CPU] = 126221163Sadrian RACCT_IN_MILLIONS, 127221163Sadrian [RACCT_DATA] = 128221163Sadrian RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 129221163Sadrian [RACCT_STACK] = 130221163Sadrian RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 131221163Sadrian [RACCT_CORE] = 132221163Sadrian RACCT_DENIABLE, 133221163Sadrian [RACCT_RSS] = 134221163Sadrian RACCT_RECLAIMABLE, 135221163Sadrian [RACCT_MEMLOCK] = 136221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE, 137221163Sadrian [RACCT_NPROC] = 138221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE, 139221163Sadrian [RACCT_NOFILE] = 140221163Sadrian RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 141221163Sadrian [RACCT_VMEM] = 142221163Sadrian RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 143221163Sadrian [RACCT_NPTS] = 144221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 145221163Sadrian [RACCT_SWAP] = 146221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 147221163Sadrian [RACCT_NTHR] = 148221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE, 149221163Sadrian [RACCT_MSGQQUEUED] = 150221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 151221163Sadrian [RACCT_MSGQSIZE] = 152221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 153221163Sadrian [RACCT_NMSGQ] = 154221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 155221163Sadrian [RACCT_NSEM] = 156221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 157221163Sadrian [RACCT_NSEMOP] = 158221163Sadrian RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 159221163Sadrian [RACCT_NSHM] = 160221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 161221163Sadrian [RACCT_SHMSIZE] = 162221163Sadrian RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 163221163Sadrian [RACCT_WALLCLOCK] = 164221163Sadrian RACCT_IN_MILLIONS, 165221163Sadrian [RACCT_PCTCPU] = 166221163Sadrian RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS }; 167221163Sadrian 168221163Sadrianstatic const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 169221163Sadrian 170221163Sadrian#ifdef SCHED_4BSD 171221163Sadrian/* 172221163Sadrian * Contains intermediate values for %cpu calculations to avoid using floating 173221163Sadrian * point in the kernel. 174221163Sadrian * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 175221163Sadrian * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 176221163Sadrian * zero so the calculations are more straightforward. 177221163Sadrian */ 178221163Sadrianfixpt_t ccpu_exp[] = { 179221163Sadrian [0] = FSCALE * 1, 180221163Sadrian [1] = FSCALE * 0.95122942450071400909, 181221163Sadrian [2] = FSCALE * 0.90483741803595957316, 182221163Sadrian [3] = FSCALE * 0.86070797642505780722, 183221163Sadrian [4] = FSCALE * 0.81873075307798185866, 184221163Sadrian [5] = FSCALE * 0.77880078307140486824, 185221163Sadrian [6] = FSCALE * 0.74081822068171786606, 186221163Sadrian [7] = FSCALE * 0.70468808971871343435, 187221163Sadrian [8] = FSCALE * 0.67032004603563930074, 188221163Sadrian [9] = FSCALE * 0.63762815162177329314, 189221163Sadrian [10] = FSCALE * 0.60653065971263342360, 190221163Sadrian [11] = FSCALE * 0.57694981038048669531, 191221163Sadrian [12] = FSCALE * 0.54881163609402643262, 192221163Sadrian [13] = FSCALE * 0.52204577676101604789, 193221163Sadrian [14] = FSCALE * 0.49658530379140951470, 194221163Sadrian [15] = FSCALE * 0.47236655274101470713, 195221163Sadrian [16] = FSCALE * 0.44932896411722159143, 196221163Sadrian [17] = FSCALE * 0.42741493194872666992, 197221163Sadrian [18] = FSCALE * 0.40656965974059911188, 198221163Sadrian [19] = FSCALE * 0.38674102345450120691, 199221163Sadrian [20] = FSCALE * 0.36787944117144232159, 200221163Sadrian [21] = FSCALE * 0.34993774911115535467, 201221163Sadrian [22] = FSCALE * 0.33287108369807955328, 202221163Sadrian [23] = FSCALE * 0.31663676937905321821, 203221163Sadrian [24] = FSCALE * 0.30119421191220209664, 204221163Sadrian [25] = FSCALE * 0.28650479686019010032, 205221163Sadrian [26] = FSCALE * 0.27253179303401260312, 206221163Sadrian [27] = FSCALE * 0.25924026064589150757, 207221163Sadrian [28] = FSCALE * 0.24659696394160647693, 208221163Sadrian [29] = FSCALE * 0.23457028809379765313, 209221163Sadrian [30] = FSCALE * 0.22313016014842982893, 210221163Sadrian [31] = FSCALE * 0.21224797382674305771, 211221163Sadrian [32] = FSCALE * 0.20189651799465540848, 212221163Sadrian [33] = FSCALE * 0.19204990862075411423, 213221163Sadrian [34] = FSCALE * 0.18268352405273465022, 214221163Sadrian [35] = FSCALE * 0.17377394345044512668, 215221163Sadrian [36] = FSCALE * 0.16529888822158653829, 216221163Sadrian [37] = FSCALE * 0.15723716631362761621, 217221163Sadrian [38] = FSCALE * 0.14956861922263505264, 218221163Sadrian [39] = FSCALE * 0.14227407158651357185, 219221163Sadrian [40] = FSCALE * 0.13533528323661269189, 220221163Sadrian [41] = FSCALE * 0.12873490358780421886, 221221163Sadrian [42] = FSCALE * 0.12245642825298191021, 222221163Sadrian [43] = FSCALE * 0.11648415777349695786, 223221163Sadrian [44] = FSCALE * 0.11080315836233388333, 224221163Sadrian [45] = FSCALE * 0.10539922456186433678, 225221163Sadrian [46] = FSCALE * 0.10025884372280373372, 226221163Sadrian [47] = FSCALE * 0.09536916221554961888, 227221163Sadrian [48] = FSCALE * 0.09071795328941250337, 228221163Sadrian [49] = FSCALE * 0.08629358649937051097, 229221163Sadrian [50] = FSCALE * 0.08208499862389879516, 230221163Sadrian [51] = FSCALE * 0.07808166600115315231, 231221163Sadrian [52] = FSCALE * 0.07427357821433388042, 232221163Sadrian [53] = FSCALE * 0.07065121306042958674, 233221163Sadrian [54] = FSCALE * 0.06720551273974976512, 234221163Sadrian [55] = FSCALE * 0.06392786120670757270, 235221163Sadrian [56] = FSCALE * 0.06081006262521796499, 236221163Sadrian [57] = FSCALE * 0.05784432087483846296, 237221163Sadrian [58] = FSCALE * 0.05502322005640722902, 238221163Sadrian [59] = FSCALE * 0.05233970594843239308, 239221596Sadrian [60] = FSCALE * 0.04978706836786394297, 240221596Sadrian [61] = FSCALE * 0.04735892439114092119, 241221163Sadrian [62] = FSCALE * 0.04504920239355780606, 242221596Sadrian [63] = FSCALE * 0.04285212686704017991, 243221163Sadrian [64] = FSCALE * 0.04076220397836621516, 244221163Sadrian [65] = FSCALE * 0.03877420783172200988, 245221163Sadrian [66] = FSCALE * 0.03688316740124000544, 246221163Sadrian [67] = FSCALE * 0.03508435410084502588, 247221163Sadrian [68] = FSCALE * 0.03337326996032607948, 248221163Sadrian [69] = FSCALE * 0.03174563637806794323, 249221163Sadrian [70] = FSCALE * 0.03019738342231850073, 250221163Sadrian [71] = FSCALE * 0.02872463965423942912, 251221163Sadrian [72] = FSCALE * 0.02732372244729256080, 252221163Sadrian [73] = FSCALE * 0.02599112877875534358, 253221163Sadrian [74] = FSCALE * 0.02472352647033939120, 254221163Sadrian [75] = FSCALE * 0.02351774585600910823, 255221163Sadrian [76] = FSCALE * 0.02237077185616559577, 256221163Sadrian [77] = FSCALE * 0.02127973643837716938, 257221163Sadrian [78] = FSCALE * 0.02024191144580438847, 258221163Sadrian [79] = FSCALE * 0.01925470177538692429, 259221163Sadrian [80] = FSCALE * 0.01831563888873418029, 260221163Sadrian [81] = FSCALE * 0.01742237463949351138, 261221163Sadrian [82] = FSCALE * 0.01657267540176124754, 262221163Sadrian [83] = FSCALE * 0.01576441648485449082, 263221163Sadrian [84] = FSCALE * 0.01499557682047770621, 264221163Sadrian [85] = FSCALE * 0.01426423390899925527, 265221163Sadrian [86] = FSCALE * 0.01356855901220093175, 266221163Sadrian [87] = FSCALE * 0.01290681258047986886, 267221163Sadrian [88] = FSCALE * 0.01227733990306844117, 268221163Sadrian [89] = FSCALE * 0.01167856697039544521, 269221163Sadrian [90] = FSCALE * 0.01110899653824230649, 270221163Sadrian [91] = FSCALE * 0.01056720438385265337, 271221163Sadrian [92] = FSCALE * 0.01005183574463358164, 272221163Sadrian [93] = FSCALE * 0.00956160193054350793, 273221163Sadrian [94] = FSCALE * 0.00909527710169581709, 274221163Sadrian [95] = FSCALE * 0.00865169520312063417, 275221163Sadrian [96] = FSCALE * 0.00822974704902002884, 276221163Sadrian [97] = FSCALE * 0.00782837754922577143, 277221163Sadrian [98] = FSCALE * 0.00744658307092434051, 278221163Sadrian [99] = FSCALE * 0.00708340892905212004, 279221163Sadrian [100] = FSCALE * 0.00673794699908546709, 280221163Sadrian [101] = FSCALE * 0.00640933344625638184, 281221163Sadrian [102] = FSCALE * 0.00609674656551563610, 282221163Sadrian [103] = FSCALE * 0.00579940472684214321, 283221163Sadrian [104] = FSCALE * 0.00551656442076077241, 284221163Sadrian [105] = FSCALE * 0.00524751839918138427, 285221163Sadrian [106] = FSCALE * 0.00499159390691021621, 286221163Sadrian [107] = FSCALE * 0.00474815099941147558, 287221163Sadrian [108] = FSCALE * 0.00451658094261266798, 288221163Sadrian [109] = FSCALE * 0.00429630469075234057, 289221163Sadrian [110] = FSCALE * 0.00408677143846406699, 290221163Sadrian}; 291221163Sadrian#endif 292221163Sadrian 293221603Sadrian#define CCPU_EXP_MAX 110 294221603Sadrian 295221603Sadrian/* 296221603Sadrian * This function is analogical to the getpcpu() function in the ps(1) command. 297221603Sadrian * They should both calculate in the same way so that the racct %cpu 298221603Sadrian * calculations are consistent with the values showed by the ps(1) tool. 299221603Sadrian * The calculations are more complex in the 4BSD scheduler because of the value 300221603Sadrian * of the ccpu variable. In ULE it is defined to be zero which saves us some 301221163Sadrian * work. 302221163Sadrian */ 303221163Sadrianstatic uint64_t 304221163Sadrianracct_getpcpu(struct proc *p, u_int pcpu) 305221163Sadrian{ 306221163Sadrian u_int swtime; 307221163Sadrian#ifdef SCHED_4BSD 308221163Sadrian fixpt_t pctcpu, pctcpu_next; 309221163Sadrian#endif 310221163Sadrian#ifdef SMP 311221163Sadrian struct pcpu *pc; 312 int found; 313#endif 314 fixpt_t p_pctcpu; 315 struct thread *td; 316 317 /* 318 * If the process is swapped out, we count its %cpu usage as zero. 319 * This behaviour is consistent with the userland ps(1) tool. 320 */ 321 if ((p->p_flag & P_INMEM) == 0) 322 return (0); 323 swtime = (ticks - p->p_swtick) / hz; 324 325 /* 326 * For short-lived processes, the sched_pctcpu() returns small 327 * values even for cpu intensive processes. Therefore we use 328 * our own estimate in this case. 329 */ 330 if (swtime < RACCT_PCPU_SECS) 331 return (pcpu); 332 333 p_pctcpu = 0; 334 FOREACH_THREAD_IN_PROC(p, td) { 335 if (td == PCPU_GET(idlethread)) 336 continue; 337#ifdef SMP 338 found = 0; 339 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 340 if (td == pc->pc_idlethread) { 341 found = 1; 342 break; 343 } 344 } 345 if (found) 346 continue; 347#endif 348 thread_lock(td); 349#ifdef SCHED_4BSD 350 pctcpu = sched_pctcpu(td); 351 /* Count also the yet unfinished second. */ 352 pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 353 pctcpu_next += sched_pctcpu_delta(td); 354 p_pctcpu += max(pctcpu, pctcpu_next); 355#else 356 /* 357 * In ULE the %cpu statistics are updated on every 358 * sched_pctcpu() call. So special calculations to 359 * account for the latest (unfinished) second are 360 * not needed. 361 */ 362 p_pctcpu += sched_pctcpu(td); 363#endif 364 thread_unlock(td); 365 } 366 367#ifdef SCHED_4BSD 368 if (swtime <= CCPU_EXP_MAX) 369 return ((100 * (uint64_t)p_pctcpu * 1000000) / 370 (FSCALE - ccpu_exp[swtime])); 371#endif 372 373 return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 374} 375 376static void 377racct_add_racct(struct racct *dest, const struct racct *src) 378{ 379 int i; 380 381 mtx_assert(&racct_lock, MA_OWNED); 382 383 /* 384 * Update resource usage in dest. 385 */ 386 for (i = 0; i <= RACCT_MAX; i++) { 387 KASSERT(dest->r_resources[i] >= 0, 388 ("racct propagation meltdown: dest < 0")); 389 KASSERT(src->r_resources[i] >= 0, 390 ("racct propagation meltdown: src < 0")); 391 dest->r_resources[i] += src->r_resources[i]; 392 } 393} 394 395static void 396racct_sub_racct(struct racct *dest, const struct racct *src) 397{ 398 int i; 399 400 mtx_assert(&racct_lock, MA_OWNED); 401 402 /* 403 * Update resource usage in dest. 404 */ 405 for (i = 0; i <= RACCT_MAX; i++) { 406 if (!RACCT_IS_SLOPPY(i)) { 407 KASSERT(dest->r_resources[i] >= 0, 408 ("racct propagation meltdown: dest < 0")); 409 KASSERT(src->r_resources[i] >= 0, 410 ("racct propagation meltdown: src < 0")); 411 KASSERT(src->r_resources[i] <= dest->r_resources[i], 412 ("racct propagation meltdown: src > dest")); 413 } 414 if (RACCT_CAN_DROP(i)) { 415 dest->r_resources[i] -= src->r_resources[i]; 416 if (dest->r_resources[i] < 0) { 417 KASSERT(RACCT_IS_SLOPPY(i), 418 ("racct_sub_racct: usage < 0")); 419 dest->r_resources[i] = 0; 420 } 421 } 422 } 423} 424 425void 426racct_create(struct racct **racctp) 427{ 428 429 SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0); 430 431 KASSERT(*racctp == NULL, ("racct already allocated")); 432 433 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 434} 435 436static void 437racct_destroy_locked(struct racct **racctp) 438{ 439 int i; 440 struct racct *racct; 441 442 SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0); 443 444 mtx_assert(&racct_lock, MA_OWNED); 445 KASSERT(racctp != NULL, ("NULL racctp")); 446 KASSERT(*racctp != NULL, ("NULL racct")); 447 448 racct = *racctp; 449 450 for (i = 0; i <= RACCT_MAX; i++) { 451 if (RACCT_IS_SLOPPY(i)) 452 continue; 453 if (!RACCT_IS_RECLAIMABLE(i)) 454 continue; 455 KASSERT(racct->r_resources[i] == 0, 456 ("destroying non-empty racct: " 457 "%ju allocated for resource %d\n", 458 racct->r_resources[i], i)); 459 } 460 uma_zfree(racct_zone, racct); 461 *racctp = NULL; 462} 463 464void 465racct_destroy(struct racct **racct) 466{ 467 468 mtx_lock(&racct_lock); 469 racct_destroy_locked(racct); 470 mtx_unlock(&racct_lock); 471} 472 473/* 474 * Increase consumption of 'resource' by 'amount' for 'racct' 475 * and all its parents. Differently from other cases, 'amount' here 476 * may be less than zero. 477 */ 478static void 479racct_alloc_resource(struct racct *racct, int resource, 480 uint64_t amount) 481{ 482 483 mtx_assert(&racct_lock, MA_OWNED); 484 KASSERT(racct != NULL, ("NULL racct")); 485 486 racct->r_resources[resource] += amount; 487 if (racct->r_resources[resource] < 0) { 488 KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), 489 ("racct_alloc_resource: usage < 0")); 490 racct->r_resources[resource] = 0; 491 } 492 493 /* 494 * There are some cases where the racct %cpu resource would grow 495 * beyond 100%. 496 * For example in racct_proc_exit() we add the process %cpu usage 497 * to the ucred racct containers. If too many processes terminated 498 * in a short time span, the ucred %cpu resource could grow too much. 499 * Also, the 4BSD scheduler sometimes returns for a thread more than 500 * 100% cpu usage. So we set a boundary here to 100%. 501 */ 502 if ((resource == RACCT_PCTCPU) && 503 (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000)) 504 racct->r_resources[RACCT_PCTCPU] = 100 * 1000000; 505} 506 507static int 508racct_add_locked(struct proc *p, int resource, uint64_t amount) 509{ 510#ifdef RCTL 511 int error; 512#endif 513 514 SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0); 515 516 /* 517 * We need proc lock to dereference p->p_ucred. 518 */ 519 PROC_LOCK_ASSERT(p, MA_OWNED); 520 521#ifdef RCTL 522 error = rctl_enforce(p, resource, amount); 523 if (error && RACCT_IS_DENIABLE(resource)) { 524 SDT_PROBE(racct, kernel, rusage, add_failure, p, resource, 525 amount, 0, 0); 526 return (error); 527 } 528#endif 529 racct_alloc_resource(p->p_racct, resource, amount); 530 racct_add_cred_locked(p->p_ucred, resource, amount); 531 532 return (0); 533} 534 535/* 536 * Increase allocation of 'resource' by 'amount' for process 'p'. 537 * Return 0 if it's below limits, or errno, if it's not. 538 */ 539int 540racct_add(struct proc *p, int resource, uint64_t amount) 541{ 542 int error; 543 544 mtx_lock(&racct_lock); 545 error = racct_add_locked(p, resource, amount); 546 mtx_unlock(&racct_lock); 547 return (error); 548} 549 550static void 551racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 552{ 553 struct prison *pr; 554 555 SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount, 556 0, 0); 557 558 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 559 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 560 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 561 amount); 562 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount); 563} 564 565/* 566 * Increase allocation of 'resource' by 'amount' for credential 'cred'. 567 * Doesn't check for limits and never fails. 568 * 569 * XXX: Shouldn't this ever return an error? 570 */ 571void 572racct_add_cred(struct ucred *cred, int resource, uint64_t amount) 573{ 574 575 mtx_lock(&racct_lock); 576 racct_add_cred_locked(cred, resource, amount); 577 mtx_unlock(&racct_lock); 578} 579 580/* 581 * Increase allocation of 'resource' by 'amount' for process 'p'. 582 * Doesn't check for limits and never fails. 583 */ 584void 585racct_add_force(struct proc *p, int resource, uint64_t amount) 586{ 587 588 SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0); 589 590 /* 591 * We need proc lock to dereference p->p_ucred. 592 */ 593 PROC_LOCK_ASSERT(p, MA_OWNED); 594 595 mtx_lock(&racct_lock); 596 racct_alloc_resource(p->p_racct, resource, amount); 597 mtx_unlock(&racct_lock); 598 racct_add_cred(p->p_ucred, resource, amount); 599} 600 601static int 602racct_set_locked(struct proc *p, int resource, uint64_t amount) 603{ 604 int64_t old_amount, decayed_amount; 605 int64_t diff_proc, diff_cred; 606#ifdef RCTL 607 int error; 608#endif 609 610 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 611 612 /* 613 * We need proc lock to dereference p->p_ucred. 614 */ 615 PROC_LOCK_ASSERT(p, MA_OWNED); 616 617 old_amount = p->p_racct->r_resources[resource]; 618 /* 619 * The diffs may be negative. 620 */ 621 diff_proc = amount - old_amount; 622 if (RACCT_IS_DECAYING(resource)) { 623 /* 624 * Resources in per-credential racct containers may decay. 625 * If this is the case, we need to calculate the difference 626 * between the new amount and the proportional value of the 627 * old amount that has decayed in the ucred racct containers. 628 */ 629 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 630 diff_cred = amount - decayed_amount; 631 } else 632 diff_cred = diff_proc; 633#ifdef notyet 634 KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 635 ("racct_set: usage of non-droppable resource %d dropping", 636 resource)); 637#endif 638#ifdef RCTL 639 if (diff_proc > 0) { 640 error = rctl_enforce(p, resource, diff_proc); 641 if (error && RACCT_IS_DENIABLE(resource)) { 642 SDT_PROBE(racct, kernel, rusage, set_failure, p, 643 resource, amount, 0, 0); 644 return (error); 645 } 646 } 647#endif 648 racct_alloc_resource(p->p_racct, resource, diff_proc); 649 if (diff_cred > 0) 650 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 651 else if (diff_cred < 0) 652 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 653 654 return (0); 655} 656 657/* 658 * Set allocation of 'resource' to 'amount' for process 'p'. 659 * Return 0 if it's below limits, or errno, if it's not. 660 * 661 * Note that decreasing the allocation always returns 0, 662 * even if it's above the limit. 663 */ 664int 665racct_set(struct proc *p, int resource, uint64_t amount) 666{ 667 int error; 668 669 mtx_lock(&racct_lock); 670 error = racct_set_locked(p, resource, amount); 671 mtx_unlock(&racct_lock); 672 return (error); 673} 674 675static void 676racct_set_force_locked(struct proc *p, int resource, uint64_t amount) 677{ 678 int64_t old_amount, decayed_amount; 679 int64_t diff_proc, diff_cred; 680 681 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 682 683 /* 684 * We need proc lock to dereference p->p_ucred. 685 */ 686 PROC_LOCK_ASSERT(p, MA_OWNED); 687 688 old_amount = p->p_racct->r_resources[resource]; 689 /* 690 * The diffs may be negative. 691 */ 692 diff_proc = amount - old_amount; 693 if (RACCT_IS_DECAYING(resource)) { 694 /* 695 * Resources in per-credential racct containers may decay. 696 * If this is the case, we need to calculate the difference 697 * between the new amount and the proportional value of the 698 * old amount that has decayed in the ucred racct containers. 699 */ 700 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 701 diff_cred = amount - decayed_amount; 702 } else 703 diff_cred = diff_proc; 704 705 racct_alloc_resource(p->p_racct, resource, diff_proc); 706 if (diff_cred > 0) 707 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 708 else if (diff_cred < 0) 709 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 710} 711 712void 713racct_set_force(struct proc *p, int resource, uint64_t amount) 714{ 715 mtx_lock(&racct_lock); 716 racct_set_force_locked(p, resource, amount); 717 mtx_unlock(&racct_lock); 718} 719 720/* 721 * Returns amount of 'resource' the process 'p' can keep allocated. 722 * Allocating more than that would be denied, unless the resource 723 * is marked undeniable. Amount of already allocated resource does 724 * not matter. 725 */ 726uint64_t 727racct_get_limit(struct proc *p, int resource) 728{ 729 730#ifdef RCTL 731 return (rctl_get_limit(p, resource)); 732#else 733 return (UINT64_MAX); 734#endif 735} 736 737/* 738 * Returns amount of 'resource' the process 'p' can keep allocated. 739 * Allocating more than that would be denied, unless the resource 740 * is marked undeniable. Amount of already allocated resource does 741 * matter. 742 */ 743uint64_t 744racct_get_available(struct proc *p, int resource) 745{ 746 747#ifdef RCTL 748 return (rctl_get_available(p, resource)); 749#else 750 return (UINT64_MAX); 751#endif 752} 753 754/* 755 * Returns amount of the %cpu resource that process 'p' can add to its %cpu 756 * utilization. Adding more than that would lead to the process being 757 * throttled. 758 */ 759static int64_t 760racct_pcpu_available(struct proc *p) 761{ 762 763#ifdef RCTL 764 return (rctl_pcpu_available(p)); 765#else 766 return (INT64_MAX); 767#endif 768} 769 770/* 771 * Decrease allocation of 'resource' by 'amount' for process 'p'. 772 */ 773void 774racct_sub(struct proc *p, int resource, uint64_t amount) 775{ 776 777 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); 778 779 /* 780 * We need proc lock to dereference p->p_ucred. 781 */ 782 PROC_LOCK_ASSERT(p, MA_OWNED); 783 KASSERT(RACCT_CAN_DROP(resource), 784 ("racct_sub: called for non-droppable resource %d", resource)); 785 786 mtx_lock(&racct_lock); 787 KASSERT(amount <= p->p_racct->r_resources[resource], 788 ("racct_sub: freeing %ju of resource %d, which is more " 789 "than allocated %jd for %s (pid %d)", amount, resource, 790 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 791 792 racct_alloc_resource(p->p_racct, resource, -amount); 793 racct_sub_cred_locked(p->p_ucred, resource, amount); 794 mtx_unlock(&racct_lock); 795} 796 797static void 798racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 799{ 800 struct prison *pr; 801 802 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount, 803 0, 0); 804 805#ifdef notyet 806 KASSERT(RACCT_CAN_DROP(resource), 807 ("racct_sub_cred: called for resource %d which can not drop", 808 resource)); 809#endif 810 811 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 812 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 813 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 814 -amount); 815 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); 816} 817 818/* 819 * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 820 */ 821void 822racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 823{ 824 825 mtx_lock(&racct_lock); 826 racct_sub_cred_locked(cred, resource, amount); 827 mtx_unlock(&racct_lock); 828} 829 830/* 831 * Inherit resource usage information from the parent process. 832 */ 833int 834racct_proc_fork(struct proc *parent, struct proc *child) 835{ 836 int i, error = 0; 837 838 /* 839 * Create racct for the child process. 840 */ 841 racct_create(&child->p_racct); 842 843 PROC_LOCK(parent); 844 PROC_LOCK(child); 845 mtx_lock(&racct_lock); 846 847#ifdef RCTL 848 error = rctl_proc_fork(parent, child); 849 if (error != 0) 850 goto out; 851#endif 852 853 /* Init process cpu time. */ 854 child->p_prev_runtime = 0; 855 child->p_throttled = 0; 856 857 /* 858 * Inherit resource usage. 859 */ 860 for (i = 0; i <= RACCT_MAX; i++) { 861 if (parent->p_racct->r_resources[i] == 0 || 862 !RACCT_IS_INHERITABLE(i)) 863 continue; 864 865 error = racct_set_locked(child, i, 866 parent->p_racct->r_resources[i]); 867 if (error != 0) 868 goto out; 869 } 870 871 error = racct_add_locked(child, RACCT_NPROC, 1); 872 error += racct_add_locked(child, RACCT_NTHR, 1); 873 874out: 875 mtx_unlock(&racct_lock); 876 PROC_UNLOCK(child); 877 PROC_UNLOCK(parent); 878 879 if (error != 0) 880 racct_proc_exit(child); 881 882 return (error); 883} 884 885/* 886 * Called at the end of fork1(), to handle rules that require the process 887 * to be fully initialized. 888 */ 889void 890racct_proc_fork_done(struct proc *child) 891{ 892 893#ifdef RCTL 894 PROC_LOCK(child); 895 mtx_lock(&racct_lock); 896 rctl_enforce(child, RACCT_NPROC, 0); 897 rctl_enforce(child, RACCT_NTHR, 0); 898 mtx_unlock(&racct_lock); 899 PROC_UNLOCK(child); 900#endif 901} 902 903void 904racct_proc_exit(struct proc *p) 905{ 906 int i; 907 uint64_t runtime; 908 struct timeval wallclock; 909 uint64_t pct_estimate, pct; 910 911 PROC_LOCK(p); 912 /* 913 * We don't need to calculate rux, proc_reap() has already done this. 914 */ 915 runtime = cputick2usec(p->p_rux.rux_runtime); 916#ifdef notyet 917 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 918#else 919 if (runtime < p->p_prev_runtime) 920 runtime = p->p_prev_runtime; 921#endif 922 microuptime(&wallclock); 923 timevalsub(&wallclock, &p->p_stats->p_start); 924 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 925 pct_estimate = (1000000 * runtime * 100) / 926 ((uint64_t)wallclock.tv_sec * 1000000 + 927 wallclock.tv_usec); 928 } else 929 pct_estimate = 0; 930 pct = racct_getpcpu(p, pct_estimate); 931 932 mtx_lock(&racct_lock); 933 racct_set_locked(p, RACCT_CPU, runtime); 934 racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); 935 936 for (i = 0; i <= RACCT_MAX; i++) { 937 if (p->p_racct->r_resources[i] == 0) 938 continue; 939 if (!RACCT_IS_RECLAIMABLE(i)) 940 continue; 941 racct_set_locked(p, i, 0); 942 } 943 944 mtx_unlock(&racct_lock); 945 PROC_UNLOCK(p); 946 947#ifdef RCTL 948 rctl_racct_release(p->p_racct); 949#endif 950 racct_destroy(&p->p_racct); 951} 952 953/* 954 * Called after credentials change, to move resource utilisation 955 * between raccts. 956 */ 957void 958racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 959 struct ucred *newcred) 960{ 961 struct uidinfo *olduip, *newuip; 962 struct loginclass *oldlc, *newlc; 963 struct prison *oldpr, *newpr, *pr; 964 965 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 966 967 newuip = newcred->cr_ruidinfo; 968 olduip = oldcred->cr_ruidinfo; 969 newlc = newcred->cr_loginclass; 970 oldlc = oldcred->cr_loginclass; 971 newpr = newcred->cr_prison; 972 oldpr = oldcred->cr_prison; 973 974 mtx_lock(&racct_lock); 975 if (newuip != olduip) { 976 racct_sub_racct(olduip->ui_racct, p->p_racct); 977 racct_add_racct(newuip->ui_racct, p->p_racct); 978 } 979 if (newlc != oldlc) { 980 racct_sub_racct(oldlc->lc_racct, p->p_racct); 981 racct_add_racct(newlc->lc_racct, p->p_racct); 982 } 983 if (newpr != oldpr) { 984 for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 985 racct_sub_racct(pr->pr_prison_racct->prr_racct, 986 p->p_racct); 987 for (pr = newpr; pr != NULL; pr = pr->pr_parent) 988 racct_add_racct(pr->pr_prison_racct->prr_racct, 989 p->p_racct); 990 } 991 mtx_unlock(&racct_lock); 992 993#ifdef RCTL 994 rctl_proc_ucred_changed(p, newcred); 995#endif 996} 997 998void 999racct_move(struct racct *dest, struct racct *src) 1000{ 1001 1002 mtx_lock(&racct_lock); 1003 1004 racct_add_racct(dest, src); 1005 racct_sub_racct(src, src); 1006 1007 mtx_unlock(&racct_lock); 1008} 1009 1010static void 1011racct_proc_throttle(struct proc *p) 1012{ 1013 struct thread *td; 1014#ifdef SMP 1015 int cpuid; 1016#endif 1017 1018 PROC_LOCK_ASSERT(p, MA_OWNED); 1019 1020 /* 1021 * Do not block kernel processes. Also do not block processes with 1022 * low %cpu utilization to improve interactivity. 1023 */ 1024 if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) || 1025 (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold)) 1026 return; 1027 p->p_throttled = 1; 1028 1029 FOREACH_THREAD_IN_PROC(p, td) { 1030 switch (td->td_state) { 1031 case TDS_RUNQ: 1032 /* 1033 * If the thread is on the scheduler run-queue, we can 1034 * not just remove it from there. So we set the flag 1035 * TDF_NEEDRESCHED for the thread, so that once it is 1036 * running, it is taken off the cpu as soon as possible. 1037 */ 1038 thread_lock(td); 1039 td->td_flags |= TDF_NEEDRESCHED; 1040 thread_unlock(td); 1041 break; 1042 case TDS_RUNNING: 1043 /* 1044 * If the thread is running, we request a context 1045 * switch for it by setting the TDF_NEEDRESCHED flag. 1046 */ 1047 thread_lock(td); 1048 td->td_flags |= TDF_NEEDRESCHED; 1049#ifdef SMP 1050 cpuid = td->td_oncpu; 1051 if ((cpuid != NOCPU) && (td != curthread)) 1052 ipi_cpu(cpuid, IPI_AST); 1053#endif 1054 thread_unlock(td); 1055 break; 1056 default: 1057 break; 1058 } 1059 } 1060} 1061 1062static void 1063racct_proc_wakeup(struct proc *p) 1064{ 1065 PROC_LOCK_ASSERT(p, MA_OWNED); 1066 1067 if (p->p_throttled) { 1068 p->p_throttled = 0; 1069 wakeup(p->p_racct); 1070 } 1071} 1072 1073static void 1074racct_decay_resource(struct racct *racct, void * res, void* dummy) 1075{ 1076 int resource; 1077 int64_t r_old, r_new; 1078 1079 resource = *(int *)res; 1080 r_old = racct->r_resources[resource]; 1081 1082 /* If there is nothing to decay, just exit. */ 1083 if (r_old <= 0) 1084 return; 1085 1086 mtx_lock(&racct_lock); 1087 r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1088 racct->r_resources[resource] = r_new; 1089 mtx_unlock(&racct_lock); 1090} 1091 1092static void 1093racct_decay(int resource) 1094{ 1095 ui_racct_foreach(racct_decay_resource, &resource, NULL); 1096 loginclass_racct_foreach(racct_decay_resource, &resource, NULL); 1097 prison_racct_foreach(racct_decay_resource, &resource, NULL); 1098} 1099 1100static void 1101racctd(void) 1102{ 1103 struct thread *td; 1104 struct proc *p; 1105 struct timeval wallclock; 1106 uint64_t runtime; 1107 uint64_t pct, pct_estimate; 1108 1109 for (;;) { 1110 racct_decay(RACCT_PCTCPU); 1111 1112 sx_slock(&allproc_lock); 1113 1114 LIST_FOREACH(p, &zombproc, p_list) { 1115 PROC_LOCK(p); 1116 racct_set(p, RACCT_PCTCPU, 0); 1117 PROC_UNLOCK(p); 1118 } 1119 1120 FOREACH_PROC_IN_SYSTEM(p) { 1121 PROC_LOCK(p); 1122 if (p->p_state != PRS_NORMAL) { 1123 PROC_UNLOCK(p); 1124 continue; 1125 } 1126 1127 microuptime(&wallclock); 1128 timevalsub(&wallclock, &p->p_stats->p_start); 1129 PROC_SLOCK(p); 1130 FOREACH_THREAD_IN_PROC(p, td) 1131 ruxagg(p, td); 1132 runtime = cputick2usec(p->p_rux.rux_runtime); 1133 PROC_SUNLOCK(p); 1134#ifdef notyet 1135 KASSERT(runtime >= p->p_prev_runtime, 1136 ("runtime < p_prev_runtime")); 1137#else 1138 if (runtime < p->p_prev_runtime) 1139 runtime = p->p_prev_runtime; 1140#endif 1141 p->p_prev_runtime = runtime; 1142 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1143 pct_estimate = (1000000 * runtime * 100) / 1144 ((uint64_t)wallclock.tv_sec * 1000000 + 1145 wallclock.tv_usec); 1146 } else 1147 pct_estimate = 0; 1148 pct = racct_getpcpu(p, pct_estimate); 1149 mtx_lock(&racct_lock); 1150 racct_set_force_locked(p, RACCT_PCTCPU, pct); 1151 racct_set_locked(p, RACCT_CPU, runtime); 1152 racct_set_locked(p, RACCT_WALLCLOCK, 1153 (uint64_t)wallclock.tv_sec * 1000000 + 1154 wallclock.tv_usec); 1155 mtx_unlock(&racct_lock); 1156 PROC_UNLOCK(p); 1157 } 1158 1159 /* 1160 * To ensure that processes are throttled in a fair way, we need 1161 * to iterate over all processes again and check the limits 1162 * for %cpu resource only after ucred racct containers have been 1163 * properly filled. 1164 */ 1165 FOREACH_PROC_IN_SYSTEM(p) { 1166 PROC_LOCK(p); 1167 if (p->p_state != PRS_NORMAL) { 1168 PROC_UNLOCK(p); 1169 continue; 1170 } 1171 1172 if (racct_pcpu_available(p) <= 0) 1173 racct_proc_throttle(p); 1174 else if (p->p_throttled) 1175 racct_proc_wakeup(p); 1176 PROC_UNLOCK(p); 1177 } 1178 sx_sunlock(&allproc_lock); 1179 pause("-", hz); 1180 } 1181} 1182 1183static struct kproc_desc racctd_kp = { 1184 "racctd", 1185 racctd, 1186 NULL 1187}; 1188SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp); 1189 1190static void 1191racct_init(void) 1192{ 1193 1194 racct_zone = uma_zcreate("racct", sizeof(struct racct), 1195 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1196 /* 1197 * XXX: Move this somewhere. 1198 */ 1199 prison0.pr_prison_racct = prison_racct_find("0"); 1200} 1201SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 1202 1203#else /* !RACCT */ 1204 1205int 1206racct_add(struct proc *p, int resource, uint64_t amount) 1207{ 1208 1209 return (0); 1210} 1211 1212void 1213racct_add_cred(struct ucred *cred, int resource, uint64_t amount) 1214{ 1215} 1216 1217void 1218racct_add_force(struct proc *p, int resource, uint64_t amount) 1219{ 1220 1221 return; 1222} 1223 1224int 1225racct_set(struct proc *p, int resource, uint64_t amount) 1226{ 1227 1228 return (0); 1229} 1230 1231void 1232racct_set_force(struct proc *p, int resource, uint64_t amount) 1233{ 1234} 1235 1236void 1237racct_sub(struct proc *p, int resource, uint64_t amount) 1238{ 1239} 1240 1241void 1242racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 1243{ 1244} 1245 1246uint64_t 1247racct_get_limit(struct proc *p, int resource) 1248{ 1249 1250 return (UINT64_MAX); 1251} 1252 1253uint64_t 1254racct_get_available(struct proc *p, int resource) 1255{ 1256 1257 return (UINT64_MAX); 1258} 1259 1260void 1261racct_create(struct racct **racctp) 1262{ 1263} 1264 1265void 1266racct_destroy(struct racct **racctp) 1267{ 1268} 1269 1270int 1271racct_proc_fork(struct proc *parent, struct proc *child) 1272{ 1273 1274 return (0); 1275} 1276 1277void 1278racct_proc_fork_done(struct proc *child) 1279{ 1280} 1281 1282void 1283racct_proc_exit(struct proc *p) 1284{ 1285} 1286 1287#endif /* !RACCT */ 1288