kern_racct.c revision 260817
11556Srgrimes/*- 21556Srgrimes * Copyright (c) 2010 The FreeBSD Foundation 31556Srgrimes * All rights reserved. 41556Srgrimes * 51556Srgrimes * This software was developed by Edward Tomasz Napierala under sponsorship 61556Srgrimes * from the FreeBSD Foundation. 71556Srgrimes * 81556Srgrimes * Redistribution and use in source and binary forms, with or without 91556Srgrimes * modification, are permitted provided that the following conditions 101556Srgrimes * are met: 111556Srgrimes * 1. Redistributions of source code must retain the above copyright 121556Srgrimes * notice, this list of conditions and the following disclaimer. 131556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141556Srgrimes * notice, this list of conditions and the following disclaimer in the 151556Srgrimes * documentation and/or other materials provided with the distribution. 161556Srgrimes * 171556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 181556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201556Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 211556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271556Srgrimes * SUCH DAMAGE. 281556Srgrimes * 291556Srgrimes * $FreeBSD: stable/10/sys/kern/kern_racct.c 260817 2014-01-17 10:58:59Z avg $ 301556Srgrimes */ 311556Srgrimes 321556Srgrimes#include <sys/cdefs.h> 331556Srgrimes__FBSDID("$FreeBSD: stable/10/sys/kern/kern_racct.c 260817 2014-01-17 10:58:59Z avg $"); 3436150Scharnier 3536150Scharnier#include "opt_kdtrace.h" 3636150Scharnier#include "opt_sched.h" 371556Srgrimes 3899110Sobrien#include <sys/param.h> 3999110Sobrien#include <sys/systm.h> 401556Srgrimes#include <sys/eventhandler.h> 4117987Speter#include <sys/jail.h> 4296922Stjr#include <sys/kernel.h> 4317987Speter#include <sys/kthread.h> 4417987Speter#include <sys/lock.h> 4517987Speter#include <sys/loginclass.h> 4617987Speter#include <sys/malloc.h> 4717987Speter#include <sys/mutex.h> 4817987Speter#include <sys/proc.h> 4917987Speter#include <sys/racct.h> 501556Srgrimes#include <sys/resourcevar.h> 511556Srgrimes#include <sys/sbuf.h> 521556Srgrimes#include <sys/sched.h> 531556Srgrimes#include <sys/sdt.h> 541556Srgrimes#include <sys/smp.h> 551556Srgrimes#include <sys/sx.h> 561556Srgrimes#include <sys/sysctl.h> 571556Srgrimes#include <sys/sysent.h> 581556Srgrimes#include <sys/sysproto.h> 591556Srgrimes#include <sys/umtx.h> 601556Srgrimes#include <machine/smp.h> 611556Srgrimes 6296922Stjr#ifdef RCTL 631556Srgrimes#include <sys/rctl.h> 641556Srgrimes#endif 651556Srgrimes 66199953Sjilles#ifdef RACCT 671556Srgrimes 681556SrgrimesFEATURE(racct, "Resource Accounting"); 691556Srgrimes 701556Srgrimes/* 71100315Stjr * Do not block processes that have their %cpu usage <= pcpu_threshold. 72262951Sjmmv */ 731556Srgrimesstatic int pcpu_threshold = 1; 741556Srgrimes 751556SrgrimesSYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting"); 76253658SjillesSYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 771556Srgrimes 0, "Processes with higher %cpu usage than this value can be throttled."); 788855Srgrimes 791556Srgrimes/* 801556Srgrimes * How many seconds it takes to use the scheduler %cpu calculations. When a 818855Srgrimes * process starts, we compute its %cpu usage by dividing its runtime by the 821556Srgrimes * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 83213760Sobrien * provided by the scheduler. 841556Srgrimes */ 85213811Sobrien#define RACCT_PCPU_SECS 3 86213811Sobrien 871556Srgrimesstatic struct mtx racct_lock; 881556SrgrimesMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 891556Srgrimes 901556Srgrimesstatic uma_zone_t racct_zone; 911556Srgrimes 921556Srgrimesstatic void racct_sub_racct(struct racct *dest, const struct racct *src); 931556Srgrimesstatic void racct_sub_cred_locked(struct ucred *cred, int resource, 941556Srgrimes uint64_t amount); 95264478Sjillesstatic void racct_add_cred_locked(struct ucred *cred, int resource, 96264478Sjilles uint64_t amount); 97264478Sjilles 98264478SjillesSDT_PROVIDER_DEFINE(racct); 99264478SjillesSDT_PROBE_DEFINE3(racct, kernel, rusage, add, "struct proc *", "int", 100264478Sjilles "uint64_t"); 101264478SjillesSDT_PROBE_DEFINE3(racct, kernel, rusage, add__failure, 1021556Srgrimes "struct proc *", "int", "uint64_t"); 1031556SrgrimesSDT_PROBE_DEFINE3(racct, kernel, rusage, add__cred, "struct ucred *", 1041556Srgrimes "int", "uint64_t"); 10590111SimpSDT_PROBE_DEFINE3(racct, kernel, rusage, add__force, "struct proc *", 10690111Simp "int", "uint64_t"); 1071556SrgrimesSDT_PROBE_DEFINE3(racct, kernel, rusage, set, "struct proc *", "int", 10817987Speter "uint64_t"); 1091556SrgrimesSDT_PROBE_DEFINE3(racct, kernel, rusage, set__failure, 1101556Srgrimes "struct proc *", "int", "uint64_t"); 11120425SsteveSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, "struct proc *", "int", 1121556Srgrimes "uint64_t"); 113264478SjillesSDT_PROBE_DEFINE3(racct, kernel, rusage, sub__cred, "struct ucred *", 1141556Srgrimes "int", "uint64_t"); 1151556SrgrimesSDT_PROBE_DEFINE1(racct, kernel, racct, create, "struct racct *"); 1161556SrgrimesSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, "struct racct *"); 1171556SrgrimesSDT_PROBE_DEFINE2(racct, kernel, racct, join, "struct racct *", 1181556Srgrimes "struct racct *"); 1191556SrgrimesSDT_PROBE_DEFINE2(racct, kernel, racct, join__failure, 1201556Srgrimes "struct racct *", "struct racct *"); 121262951SjmmvSDT_PROBE_DEFINE2(racct, kernel, racct, leave, "struct racct *", 1221556Srgrimes "struct racct *"); 1231556Srgrimes 1241556Srgrimesint racct_types[] = { 1251556Srgrimes [RACCT_CPU] = 1261556Srgrimes RACCT_IN_MILLIONS, 127262951Sjmmv [RACCT_DATA] = 128262951Sjmmv RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 12911601Sjoerg [RACCT_STACK] = 13011601Sjoerg RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 13120425Ssteve [RACCT_CORE] = 13225231Ssteve RACCT_DENIABLE, 1331556Srgrimes [RACCT_RSS] = 1341556Srgrimes RACCT_RECLAIMABLE, 135250267Sjilles [RACCT_MEMLOCK] = 13625231Ssteve RACCT_RECLAIMABLE | RACCT_DENIABLE, 13725231Ssteve [RACCT_NPROC] = 138199953Sjilles RACCT_RECLAIMABLE | RACCT_DENIABLE, 139199953Sjilles [RACCT_NOFILE] = 14025231Ssteve RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 14125231Ssteve [RACCT_VMEM] = 14225231Ssteve RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 14325231Ssteve [RACCT_NPTS] = 14425231Ssteve RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 145250267Sjilles [RACCT_SWAP] = 146199953Sjilles RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 1471556Srgrimes [RACCT_NTHR] = 1481556Srgrimes RACCT_RECLAIMABLE | RACCT_DENIABLE, 149199953Sjilles [RACCT_MSGQQUEUED] = 150264478Sjilles RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 151264478Sjilles [RACCT_MSGQSIZE] = 1521556Srgrimes RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 1531556Srgrimes [RACCT_NMSGQ] = 1541556Srgrimes RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 1551556Srgrimes [RACCT_NSEM] = 1561556Srgrimes RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 157264478Sjilles [RACCT_NSEMOP] = 1581556Srgrimes RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 1591556Srgrimes [RACCT_NSHM] = 1601556Srgrimes RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 161213811Sobrien [RACCT_SHMSIZE] = 16290111Simp RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 16390111Simp [RACCT_WALLCLOCK] = 16496922Stjr RACCT_IN_MILLIONS, 1651556Srgrimes [RACCT_PCTCPU] = 166264168Sjilles RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS }; 1671556Srgrimes 168216851Sjillesstatic const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 1691556Srgrimes 1701556Srgrimes#ifdef SCHED_4BSD 1711556Srgrimes/* 1721556Srgrimes * Contains intermediate values for %cpu calculations to avoid using floating 1731556Srgrimes * point in the kernel. 1741556Srgrimes * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 175104132Stjr * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 1761556Srgrimes * zero so the calculations are more straightforward. 1771556Srgrimes */ 178216851Sjillesfixpt_t ccpu_exp[] = { 179216851Sjilles [0] = FSCALE * 1, 180216851Sjilles [1] = FSCALE * 0.95122942450071400909, 181216851Sjilles [2] = FSCALE * 0.90483741803595957316, 182216851Sjilles [3] = FSCALE * 0.86070797642505780722, 1831556Srgrimes [4] = FSCALE * 0.81873075307798185866, 1841556Srgrimes [5] = FSCALE * 0.77880078307140486824, 1851556Srgrimes [6] = FSCALE * 0.74081822068171786606, 18666612Sbrian [7] = FSCALE * 0.70468808971871343435, 18766612Sbrian [8] = FSCALE * 0.67032004603563930074, 18866612Sbrian [9] = FSCALE * 0.63762815162177329314, 189104132Stjr [10] = FSCALE * 0.60653065971263342360, 19066612Sbrian [11] = FSCALE * 0.57694981038048669531, 1911556Srgrimes [12] = FSCALE * 0.54881163609402643262, 192194560Sjilles [13] = FSCALE * 0.52204577676101604789, 193194560Sjilles [14] = FSCALE * 0.49658530379140951470, 194194560Sjilles [15] = FSCALE * 0.47236655274101470713, 195194560Sjilles [16] = FSCALE * 0.44932896411722159143, 196194560Sjilles [17] = FSCALE * 0.42741493194872666992, 197194560Sjilles [18] = FSCALE * 0.40656965974059911188, 198194560Sjilles [19] = FSCALE * 0.38674102345450120691, 199194560Sjilles [20] = FSCALE * 0.36787944117144232159, 200194560Sjilles [21] = FSCALE * 0.34993774911115535467, 201194560Sjilles [22] = FSCALE * 0.33287108369807955328, 202194560Sjilles [23] = FSCALE * 0.31663676937905321821, 203194560Sjilles [24] = FSCALE * 0.30119421191220209664, 204194560Sjilles [25] = FSCALE * 0.28650479686019010032, 205194560Sjilles [26] = FSCALE * 0.27253179303401260312, 206194560Sjilles [27] = FSCALE * 0.25924026064589150757, 207194560Sjilles [28] = FSCALE * 0.24659696394160647693, 208194560Sjilles [29] = FSCALE * 0.23457028809379765313, 209194560Sjilles [30] = FSCALE * 0.22313016014842982893, 210194560Sjilles [31] = FSCALE * 0.21224797382674305771, 21196922Stjr [32] = FSCALE * 0.20189651799465540848, 21296922Stjr [33] = FSCALE * 0.19204990862075411423, 21396922Stjr [34] = FSCALE * 0.18268352405273465022, 214104132Stjr [35] = FSCALE * 0.17377394345044512668, 21596922Stjr [36] = FSCALE * 0.16529888822158653829, 2161556Srgrimes [37] = FSCALE * 0.15723716631362761621, 2171556Srgrimes [38] = FSCALE * 0.14956861922263505264, 2181556Srgrimes [39] = FSCALE * 0.14227407158651357185, 219104132Stjr [40] = FSCALE * 0.13533528323661269189, 2201556Srgrimes [41] = FSCALE * 0.12873490358780421886, 2211556Srgrimes [42] = FSCALE * 0.12245642825298191021, 2221556Srgrimes [43] = FSCALE * 0.11648415777349695786, 2231556Srgrimes [44] = FSCALE * 0.11080315836233388333, 2241556Srgrimes [45] = FSCALE * 0.10539922456186433678, 2251556Srgrimes [46] = FSCALE * 0.10025884372280373372, 226214290Sjilles [47] = FSCALE * 0.09536916221554961888, 227214290Sjilles [48] = FSCALE * 0.09071795328941250337, 228214290Sjilles [49] = FSCALE * 0.08629358649937051097, 229214290Sjilles [50] = FSCALE * 0.08208499862389879516, 230214290Sjilles [51] = FSCALE * 0.07808166600115315231, 231124780Sdes [52] = FSCALE * 0.07427357821433388042, 23299645Stjr [53] = FSCALE * 0.07065121306042958674, 233124780Sdes [54] = FSCALE * 0.06720551273974976512, 2341556Srgrimes [55] = FSCALE * 0.06392786120670757270, 2351556Srgrimes [56] = FSCALE * 0.06081006262521796499, 2361556Srgrimes [57] = FSCALE * 0.05784432087483846296, 2371556Srgrimes [58] = FSCALE * 0.05502322005640722902, 2381556Srgrimes [59] = FSCALE * 0.05233970594843239308, 2391556Srgrimes [60] = FSCALE * 0.04978706836786394297, 2401556Srgrimes [61] = FSCALE * 0.04735892439114092119, 2411556Srgrimes [62] = FSCALE * 0.04504920239355780606, 2421556Srgrimes [63] = FSCALE * 0.04285212686704017991, 2431556Srgrimes [64] = FSCALE * 0.04076220397836621516, 2441556Srgrimes [65] = FSCALE * 0.03877420783172200988, 2451556Srgrimes [66] = FSCALE * 0.03688316740124000544, 2461556Srgrimes [67] = FSCALE * 0.03508435410084502588, 2471556Srgrimes [68] = FSCALE * 0.03337326996032607948, 2481556Srgrimes [69] = FSCALE * 0.03174563637806794323, 2491556Srgrimes [70] = FSCALE * 0.03019738342231850073, 2501556Srgrimes [71] = FSCALE * 0.02872463965423942912, 251213811Sobrien [72] = FSCALE * 0.02732372244729256080, 25290111Simp [73] = FSCALE * 0.02599112877875534358, 25390111Simp [74] = FSCALE * 0.02472352647033939120, 254264168Sjilles [75] = FSCALE * 0.02351774585600910823, 2551556Srgrimes [76] = FSCALE * 0.02237077185616559577, 256249034Sjilles [77] = FSCALE * 0.02127973643837716938, 257249034Sjilles [78] = FSCALE * 0.02024191144580438847, 258249034Sjilles [79] = FSCALE * 0.01925470177538692429, 2591556Srgrimes [80] = FSCALE * 0.01831563888873418029, 2601556Srgrimes [81] = FSCALE * 0.01742237463949351138, 26153891Scracauer [82] = FSCALE * 0.01657267540176124754, 262246288Sjilles [83] = FSCALE * 0.01576441648485449082, 263246288Sjilles [84] = FSCALE * 0.01499557682047770621, 264246288Sjilles [85] = FSCALE * 0.01426423390899925527, 265246288Sjilles [86] = FSCALE * 0.01356855901220093175, 266246288Sjilles [87] = FSCALE * 0.01290681258047986886, 267246288Sjilles [88] = FSCALE * 0.01227733990306844117, 268249034Sjilles [89] = FSCALE * 0.01167856697039544521, 269246288Sjilles [90] = FSCALE * 0.01110899653824230649, 270249034Sjilles [91] = FSCALE * 0.01056720438385265337, 271249034Sjilles [92] = FSCALE * 0.01005183574463358164, 272249034Sjilles [93] = FSCALE * 0.00956160193054350793, 273249034Sjilles [94] = FSCALE * 0.00909527710169581709, 274249034Sjilles [95] = FSCALE * 0.00865169520312063417, 275249034Sjilles [96] = FSCALE * 0.00822974704902002884, 276249034Sjilles [97] = FSCALE * 0.00782837754922577143, 277249034Sjilles [98] = FSCALE * 0.00744658307092434051, 2781556Srgrimes [99] = FSCALE * 0.00708340892905212004, 279246288Sjilles [100] = FSCALE * 0.00673794699908546709, 2801556Srgrimes [101] = FSCALE * 0.00640933344625638184, 2811556Srgrimes [102] = FSCALE * 0.00609674656551563610, 2821556Srgrimes [103] = FSCALE * 0.00579940472684214321, 2831556Srgrimes [104] = FSCALE * 0.00551656442076077241, 2841556Srgrimes [105] = FSCALE * 0.00524751839918138427, 2851556Srgrimes [106] = FSCALE * 0.00499159390691021621, 2861556Srgrimes [107] = FSCALE * 0.00474815099941147558, 287249034Sjilles [108] = FSCALE * 0.00451658094261266798, 2881556Srgrimes [109] = FSCALE * 0.00429630469075234057, 2891556Srgrimes [110] = FSCALE * 0.00408677143846406699, 2901556Srgrimes}; 2911556Srgrimes#endif 2921556Srgrimes 2931556Srgrimes#define CCPU_EXP_MAX 110 2941556Srgrimes 2951556Srgrimes/* 2961556Srgrimes * This function is analogical to the getpcpu() function in the ps(1) command. 2971556Srgrimes * They should both calculate in the same way so that the racct %cpu 2981556Srgrimes * calculations are consistent with the values showed by the ps(1) tool. 2991556Srgrimes * The calculations are more complex in the 4BSD scheduler because of the value 3001556Srgrimes * of the ccpu variable. In ULE it is defined to be zero which saves us some 3011556Srgrimes * work. 30290111Simp */ 30390111Simpstatic uint64_t 30425231Ssteveracct_getpcpu(struct proc *p, u_int pcpu) 3051556Srgrimes{ 3061556Srgrimes u_int swtime; 3071556Srgrimes#ifdef SCHED_4BSD 3081556Srgrimes fixpt_t pctcpu, pctcpu_next; 3091556Srgrimes#endif 310124780Sdes#ifdef SMP 3111556Srgrimes struct pcpu *pc; 312124780Sdes int found; 313124780Sdes#endif 3141556Srgrimes fixpt_t p_pctcpu; 3151556Srgrimes struct thread *td; 3161556Srgrimes 3171556Srgrimes /* 318262951Sjmmv * If the process is swapped out, we count its %cpu usage as zero. 3191556Srgrimes * This behaviour is consistent with the userland ps(1) tool. 3201556Srgrimes */ 3211556Srgrimes if ((p->p_flag & P_INMEM) == 0) 3221556Srgrimes return (0); 3231556Srgrimes swtime = (ticks - p->p_swtick) / hz; 3241556Srgrimes 3251556Srgrimes /* 32690111Simp * For short-lived processes, the sched_pctcpu() returns small 32790111Simp * values even for cpu intensive processes. Therefore we use 3281556Srgrimes * our own estimate in this case. 3291556Srgrimes */ 3301556Srgrimes if (swtime < RACCT_PCPU_SECS) 3311556Srgrimes return (pcpu); 3321556Srgrimes 3331556Srgrimes p_pctcpu = 0; 3341556Srgrimes FOREACH_THREAD_IN_PROC(p, td) { 3351556Srgrimes if (td == PCPU_GET(idlethread)) 33690111Simp continue; 33790111Simp#ifdef SMP 33825231Ssteve found = 0; 3391556Srgrimes STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 3401556Srgrimes if (td == pc->pc_idlethread) { 3411556Srgrimes found = 1; 3421556Srgrimes break; 3431556Srgrimes } 3441556Srgrimes } 3451556Srgrimes if (found) 3461556Srgrimes continue; 3471556Srgrimes#endif 3481556Srgrimes thread_lock(td); 3491556Srgrimes#ifdef SCHED_4BSD 350 pctcpu = sched_pctcpu(td); 351 /* Count also the yet unfinished second. */ 352 pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 353 pctcpu_next += sched_pctcpu_delta(td); 354 p_pctcpu += max(pctcpu, pctcpu_next); 355#else 356 /* 357 * In ULE the %cpu statistics are updated on every 358 * sched_pctcpu() call. So special calculations to 359 * account for the latest (unfinished) second are 360 * not needed. 361 */ 362 p_pctcpu += sched_pctcpu(td); 363#endif 364 thread_unlock(td); 365 } 366 367#ifdef SCHED_4BSD 368 if (swtime <= CCPU_EXP_MAX) 369 return ((100 * (uint64_t)p_pctcpu * 1000000) / 370 (FSCALE - ccpu_exp[swtime])); 371#endif 372 373 return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 374} 375 376static void 377racct_add_racct(struct racct *dest, const struct racct *src) 378{ 379 int i; 380 381 mtx_assert(&racct_lock, MA_OWNED); 382 383 /* 384 * Update resource usage in dest. 385 */ 386 for (i = 0; i <= RACCT_MAX; i++) { 387 KASSERT(dest->r_resources[i] >= 0, 388 ("%s: resource %d propagation meltdown: dest < 0", 389 __func__, i)); 390 KASSERT(src->r_resources[i] >= 0, 391 ("%s: resource %d propagation meltdown: src < 0", 392 __func__, i)); 393 dest->r_resources[i] += src->r_resources[i]; 394 } 395} 396 397static void 398racct_sub_racct(struct racct *dest, const struct racct *src) 399{ 400 int i; 401 402 mtx_assert(&racct_lock, MA_OWNED); 403 404 /* 405 * Update resource usage in dest. 406 */ 407 for (i = 0; i <= RACCT_MAX; i++) { 408 if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) { 409 KASSERT(dest->r_resources[i] >= 0, 410 ("%s: resource %d propagation meltdown: dest < 0", 411 __func__, i)); 412 KASSERT(src->r_resources[i] >= 0, 413 ("%s: resource %d propagation meltdown: src < 0", 414 __func__, i)); 415 KASSERT(src->r_resources[i] <= dest->r_resources[i], 416 ("%s: resource %d propagation meltdown: src > dest", 417 __func__, i)); 418 } 419 if (RACCT_CAN_DROP(i)) { 420 dest->r_resources[i] -= src->r_resources[i]; 421 if (dest->r_resources[i] < 0) { 422 KASSERT(RACCT_IS_SLOPPY(i) || 423 RACCT_IS_DECAYING(i), 424 ("%s: resource %d usage < 0", __func__, i)); 425 dest->r_resources[i] = 0; 426 } 427 } 428 } 429} 430 431void 432racct_create(struct racct **racctp) 433{ 434 435 SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0); 436 437 KASSERT(*racctp == NULL, ("racct already allocated")); 438 439 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 440} 441 442static void 443racct_destroy_locked(struct racct **racctp) 444{ 445 int i; 446 struct racct *racct; 447 448 SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0); 449 450 mtx_assert(&racct_lock, MA_OWNED); 451 KASSERT(racctp != NULL, ("NULL racctp")); 452 KASSERT(*racctp != NULL, ("NULL racct")); 453 454 racct = *racctp; 455 456 for (i = 0; i <= RACCT_MAX; i++) { 457 if (RACCT_IS_SLOPPY(i)) 458 continue; 459 if (!RACCT_IS_RECLAIMABLE(i)) 460 continue; 461 KASSERT(racct->r_resources[i] == 0, 462 ("destroying non-empty racct: " 463 "%ju allocated for resource %d\n", 464 racct->r_resources[i], i)); 465 } 466 uma_zfree(racct_zone, racct); 467 *racctp = NULL; 468} 469 470void 471racct_destroy(struct racct **racct) 472{ 473 474 mtx_lock(&racct_lock); 475 racct_destroy_locked(racct); 476 mtx_unlock(&racct_lock); 477} 478 479/* 480 * Increase consumption of 'resource' by 'amount' for 'racct' 481 * and all its parents. Differently from other cases, 'amount' here 482 * may be less than zero. 483 */ 484static void 485racct_alloc_resource(struct racct *racct, int resource, 486 uint64_t amount) 487{ 488 489 mtx_assert(&racct_lock, MA_OWNED); 490 KASSERT(racct != NULL, ("NULL racct")); 491 492 racct->r_resources[resource] += amount; 493 if (racct->r_resources[resource] < 0) { 494 KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), 495 ("%s: resource %d usage < 0", __func__, resource)); 496 racct->r_resources[resource] = 0; 497 } 498 499 /* 500 * There are some cases where the racct %cpu resource would grow 501 * beyond 100%. 502 * For example in racct_proc_exit() we add the process %cpu usage 503 * to the ucred racct containers. If too many processes terminated 504 * in a short time span, the ucred %cpu resource could grow too much. 505 * Also, the 4BSD scheduler sometimes returns for a thread more than 506 * 100% cpu usage. So we set a boundary here to 100%. 507 */ 508 if ((resource == RACCT_PCTCPU) && 509 (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000)) 510 racct->r_resources[RACCT_PCTCPU] = 100 * 1000000; 511} 512 513static int 514racct_add_locked(struct proc *p, int resource, uint64_t amount) 515{ 516#ifdef RCTL 517 int error; 518#endif 519 520 SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0); 521 522 /* 523 * We need proc lock to dereference p->p_ucred. 524 */ 525 PROC_LOCK_ASSERT(p, MA_OWNED); 526 527#ifdef RCTL 528 error = rctl_enforce(p, resource, amount); 529 if (error && RACCT_IS_DENIABLE(resource)) { 530 SDT_PROBE(racct, kernel, rusage, add__failure, p, resource, 531 amount, 0, 0); 532 return (error); 533 } 534#endif 535 racct_alloc_resource(p->p_racct, resource, amount); 536 racct_add_cred_locked(p->p_ucred, resource, amount); 537 538 return (0); 539} 540 541/* 542 * Increase allocation of 'resource' by 'amount' for process 'p'. 543 * Return 0 if it's below limits, or errno, if it's not. 544 */ 545int 546racct_add(struct proc *p, int resource, uint64_t amount) 547{ 548 int error; 549 550 mtx_lock(&racct_lock); 551 error = racct_add_locked(p, resource, amount); 552 mtx_unlock(&racct_lock); 553 return (error); 554} 555 556static void 557racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 558{ 559 struct prison *pr; 560 561 SDT_PROBE(racct, kernel, rusage, add__cred, cred, resource, amount, 562 0, 0); 563 564 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 565 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 566 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 567 amount); 568 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount); 569} 570 571/* 572 * Increase allocation of 'resource' by 'amount' for credential 'cred'. 573 * Doesn't check for limits and never fails. 574 * 575 * XXX: Shouldn't this ever return an error? 576 */ 577void 578racct_add_cred(struct ucred *cred, int resource, uint64_t amount) 579{ 580 581 mtx_lock(&racct_lock); 582 racct_add_cred_locked(cred, resource, amount); 583 mtx_unlock(&racct_lock); 584} 585 586/* 587 * Increase allocation of 'resource' by 'amount' for process 'p'. 588 * Doesn't check for limits and never fails. 589 */ 590void 591racct_add_force(struct proc *p, int resource, uint64_t amount) 592{ 593 594 SDT_PROBE(racct, kernel, rusage, add__force, p, resource, amount, 0, 0); 595 596 /* 597 * We need proc lock to dereference p->p_ucred. 598 */ 599 PROC_LOCK_ASSERT(p, MA_OWNED); 600 601 mtx_lock(&racct_lock); 602 racct_alloc_resource(p->p_racct, resource, amount); 603 mtx_unlock(&racct_lock); 604 racct_add_cred(p->p_ucred, resource, amount); 605} 606 607static int 608racct_set_locked(struct proc *p, int resource, uint64_t amount) 609{ 610 int64_t old_amount, decayed_amount; 611 int64_t diff_proc, diff_cred; 612#ifdef RCTL 613 int error; 614#endif 615 616 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 617 618 /* 619 * We need proc lock to dereference p->p_ucred. 620 */ 621 PROC_LOCK_ASSERT(p, MA_OWNED); 622 623 old_amount = p->p_racct->r_resources[resource]; 624 /* 625 * The diffs may be negative. 626 */ 627 diff_proc = amount - old_amount; 628 if (RACCT_IS_DECAYING(resource)) { 629 /* 630 * Resources in per-credential racct containers may decay. 631 * If this is the case, we need to calculate the difference 632 * between the new amount and the proportional value of the 633 * old amount that has decayed in the ucred racct containers. 634 */ 635 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 636 diff_cred = amount - decayed_amount; 637 } else 638 diff_cred = diff_proc; 639#ifdef notyet 640 KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 641 ("%s: usage of non-droppable resource %d dropping", __func__, 642 resource)); 643#endif 644#ifdef RCTL 645 if (diff_proc > 0) { 646 error = rctl_enforce(p, resource, diff_proc); 647 if (error && RACCT_IS_DENIABLE(resource)) { 648 SDT_PROBE(racct, kernel, rusage, set__failure, p, 649 resource, amount, 0, 0); 650 return (error); 651 } 652 } 653#endif 654 racct_alloc_resource(p->p_racct, resource, diff_proc); 655 if (diff_cred > 0) 656 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 657 else if (diff_cred < 0) 658 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 659 660 return (0); 661} 662 663/* 664 * Set allocation of 'resource' to 'amount' for process 'p'. 665 * Return 0 if it's below limits, or errno, if it's not. 666 * 667 * Note that decreasing the allocation always returns 0, 668 * even if it's above the limit. 669 */ 670int 671racct_set(struct proc *p, int resource, uint64_t amount) 672{ 673 int error; 674 675 mtx_lock(&racct_lock); 676 error = racct_set_locked(p, resource, amount); 677 mtx_unlock(&racct_lock); 678 return (error); 679} 680 681static void 682racct_set_force_locked(struct proc *p, int resource, uint64_t amount) 683{ 684 int64_t old_amount, decayed_amount; 685 int64_t diff_proc, diff_cred; 686 687 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 688 689 /* 690 * We need proc lock to dereference p->p_ucred. 691 */ 692 PROC_LOCK_ASSERT(p, MA_OWNED); 693 694 old_amount = p->p_racct->r_resources[resource]; 695 /* 696 * The diffs may be negative. 697 */ 698 diff_proc = amount - old_amount; 699 if (RACCT_IS_DECAYING(resource)) { 700 /* 701 * Resources in per-credential racct containers may decay. 702 * If this is the case, we need to calculate the difference 703 * between the new amount and the proportional value of the 704 * old amount that has decayed in the ucred racct containers. 705 */ 706 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 707 diff_cred = amount - decayed_amount; 708 } else 709 diff_cred = diff_proc; 710 711 racct_alloc_resource(p->p_racct, resource, diff_proc); 712 if (diff_cred > 0) 713 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 714 else if (diff_cred < 0) 715 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 716} 717 718void 719racct_set_force(struct proc *p, int resource, uint64_t amount) 720{ 721 mtx_lock(&racct_lock); 722 racct_set_force_locked(p, resource, amount); 723 mtx_unlock(&racct_lock); 724} 725 726/* 727 * Returns amount of 'resource' the process 'p' can keep allocated. 728 * Allocating more than that would be denied, unless the resource 729 * is marked undeniable. Amount of already allocated resource does 730 * not matter. 731 */ 732uint64_t 733racct_get_limit(struct proc *p, int resource) 734{ 735 736#ifdef RCTL 737 return (rctl_get_limit(p, resource)); 738#else 739 return (UINT64_MAX); 740#endif 741} 742 743/* 744 * Returns amount of 'resource' the process 'p' can keep allocated. 745 * Allocating more than that would be denied, unless the resource 746 * is marked undeniable. Amount of already allocated resource does 747 * matter. 748 */ 749uint64_t 750racct_get_available(struct proc *p, int resource) 751{ 752 753#ifdef RCTL 754 return (rctl_get_available(p, resource)); 755#else 756 return (UINT64_MAX); 757#endif 758} 759 760/* 761 * Returns amount of the %cpu resource that process 'p' can add to its %cpu 762 * utilization. Adding more than that would lead to the process being 763 * throttled. 764 */ 765static int64_t 766racct_pcpu_available(struct proc *p) 767{ 768 769#ifdef RCTL 770 return (rctl_pcpu_available(p)); 771#else 772 return (INT64_MAX); 773#endif 774} 775 776/* 777 * Decrease allocation of 'resource' by 'amount' for process 'p'. 778 */ 779void 780racct_sub(struct proc *p, int resource, uint64_t amount) 781{ 782 783 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); 784 785 /* 786 * We need proc lock to dereference p->p_ucred. 787 */ 788 PROC_LOCK_ASSERT(p, MA_OWNED); 789 KASSERT(RACCT_CAN_DROP(resource), 790 ("%s: called for non-droppable resource %d", __func__, resource)); 791 792 mtx_lock(&racct_lock); 793 KASSERT(amount <= p->p_racct->r_resources[resource], 794 ("%s: freeing %ju of resource %d, which is more " 795 "than allocated %jd for %s (pid %d)", __func__, amount, resource, 796 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 797 798 racct_alloc_resource(p->p_racct, resource, -amount); 799 racct_sub_cred_locked(p->p_ucred, resource, amount); 800 mtx_unlock(&racct_lock); 801} 802 803static void 804racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 805{ 806 struct prison *pr; 807 808 SDT_PROBE(racct, kernel, rusage, sub__cred, cred, resource, amount, 809 0, 0); 810 811#ifdef notyet 812 KASSERT(RACCT_CAN_DROP(resource), 813 ("%s: called for resource %d which can not drop", __func__, 814 resource)); 815#endif 816 817 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 818 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 819 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 820 -amount); 821 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); 822} 823 824/* 825 * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 826 */ 827void 828racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 829{ 830 831 mtx_lock(&racct_lock); 832 racct_sub_cred_locked(cred, resource, amount); 833 mtx_unlock(&racct_lock); 834} 835 836/* 837 * Inherit resource usage information from the parent process. 838 */ 839int 840racct_proc_fork(struct proc *parent, struct proc *child) 841{ 842 int i, error = 0; 843 844 /* 845 * Create racct for the child process. 846 */ 847 racct_create(&child->p_racct); 848 849 PROC_LOCK(parent); 850 PROC_LOCK(child); 851 mtx_lock(&racct_lock); 852 853#ifdef RCTL 854 error = rctl_proc_fork(parent, child); 855 if (error != 0) 856 goto out; 857#endif 858 859 /* Init process cpu time. */ 860 child->p_prev_runtime = 0; 861 child->p_throttled = 0; 862 863 /* 864 * Inherit resource usage. 865 */ 866 for (i = 0; i <= RACCT_MAX; i++) { 867 if (parent->p_racct->r_resources[i] == 0 || 868 !RACCT_IS_INHERITABLE(i)) 869 continue; 870 871 error = racct_set_locked(child, i, 872 parent->p_racct->r_resources[i]); 873 if (error != 0) 874 goto out; 875 } 876 877 error = racct_add_locked(child, RACCT_NPROC, 1); 878 error += racct_add_locked(child, RACCT_NTHR, 1); 879 880out: 881 mtx_unlock(&racct_lock); 882 PROC_UNLOCK(child); 883 PROC_UNLOCK(parent); 884 885 if (error != 0) 886 racct_proc_exit(child); 887 888 return (error); 889} 890 891/* 892 * Called at the end of fork1(), to handle rules that require the process 893 * to be fully initialized. 894 */ 895void 896racct_proc_fork_done(struct proc *child) 897{ 898 899#ifdef RCTL 900 PROC_LOCK(child); 901 mtx_lock(&racct_lock); 902 rctl_enforce(child, RACCT_NPROC, 0); 903 rctl_enforce(child, RACCT_NTHR, 0); 904 mtx_unlock(&racct_lock); 905 PROC_UNLOCK(child); 906#endif 907} 908 909void 910racct_proc_exit(struct proc *p) 911{ 912 int i; 913 uint64_t runtime; 914 struct timeval wallclock; 915 uint64_t pct_estimate, pct; 916 917 PROC_LOCK(p); 918 /* 919 * We don't need to calculate rux, proc_reap() has already done this. 920 */ 921 runtime = cputick2usec(p->p_rux.rux_runtime); 922#ifdef notyet 923 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 924#else 925 if (runtime < p->p_prev_runtime) 926 runtime = p->p_prev_runtime; 927#endif 928 microuptime(&wallclock); 929 timevalsub(&wallclock, &p->p_stats->p_start); 930 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 931 pct_estimate = (1000000 * runtime * 100) / 932 ((uint64_t)wallclock.tv_sec * 1000000 + 933 wallclock.tv_usec); 934 } else 935 pct_estimate = 0; 936 pct = racct_getpcpu(p, pct_estimate); 937 938 mtx_lock(&racct_lock); 939 racct_set_locked(p, RACCT_CPU, runtime); 940 racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); 941 942 for (i = 0; i <= RACCT_MAX; i++) { 943 if (p->p_racct->r_resources[i] == 0) 944 continue; 945 if (!RACCT_IS_RECLAIMABLE(i)) 946 continue; 947 racct_set_locked(p, i, 0); 948 } 949 950 mtx_unlock(&racct_lock); 951 PROC_UNLOCK(p); 952 953#ifdef RCTL 954 rctl_racct_release(p->p_racct); 955#endif 956 racct_destroy(&p->p_racct); 957} 958 959/* 960 * Called after credentials change, to move resource utilisation 961 * between raccts. 962 */ 963void 964racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 965 struct ucred *newcred) 966{ 967 struct uidinfo *olduip, *newuip; 968 struct loginclass *oldlc, *newlc; 969 struct prison *oldpr, *newpr, *pr; 970 971 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 972 973 newuip = newcred->cr_ruidinfo; 974 olduip = oldcred->cr_ruidinfo; 975 newlc = newcred->cr_loginclass; 976 oldlc = oldcred->cr_loginclass; 977 newpr = newcred->cr_prison; 978 oldpr = oldcred->cr_prison; 979 980 mtx_lock(&racct_lock); 981 if (newuip != olduip) { 982 racct_sub_racct(olduip->ui_racct, p->p_racct); 983 racct_add_racct(newuip->ui_racct, p->p_racct); 984 } 985 if (newlc != oldlc) { 986 racct_sub_racct(oldlc->lc_racct, p->p_racct); 987 racct_add_racct(newlc->lc_racct, p->p_racct); 988 } 989 if (newpr != oldpr) { 990 for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 991 racct_sub_racct(pr->pr_prison_racct->prr_racct, 992 p->p_racct); 993 for (pr = newpr; pr != NULL; pr = pr->pr_parent) 994 racct_add_racct(pr->pr_prison_racct->prr_racct, 995 p->p_racct); 996 } 997 mtx_unlock(&racct_lock); 998 999#ifdef RCTL 1000 rctl_proc_ucred_changed(p, newcred); 1001#endif 1002} 1003 1004void 1005racct_move(struct racct *dest, struct racct *src) 1006{ 1007 1008 mtx_lock(&racct_lock); 1009 1010 racct_add_racct(dest, src); 1011 racct_sub_racct(src, src); 1012 1013 mtx_unlock(&racct_lock); 1014} 1015 1016static void 1017racct_proc_throttle(struct proc *p) 1018{ 1019 struct thread *td; 1020#ifdef SMP 1021 int cpuid; 1022#endif 1023 1024 PROC_LOCK_ASSERT(p, MA_OWNED); 1025 1026 /* 1027 * Do not block kernel processes. Also do not block processes with 1028 * low %cpu utilization to improve interactivity. 1029 */ 1030 if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) || 1031 (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold)) 1032 return; 1033 p->p_throttled = 1; 1034 1035 FOREACH_THREAD_IN_PROC(p, td) { 1036 thread_lock(td); 1037 switch (td->td_state) { 1038 case TDS_RUNQ: 1039 /* 1040 * If the thread is on the scheduler run-queue, we can 1041 * not just remove it from there. So we set the flag 1042 * TDF_NEEDRESCHED for the thread, so that once it is 1043 * running, it is taken off the cpu as soon as possible. 1044 */ 1045 td->td_flags |= TDF_NEEDRESCHED; 1046 break; 1047 case TDS_RUNNING: 1048 /* 1049 * If the thread is running, we request a context 1050 * switch for it by setting the TDF_NEEDRESCHED flag. 1051 */ 1052 td->td_flags |= TDF_NEEDRESCHED; 1053#ifdef SMP 1054 cpuid = td->td_oncpu; 1055 if ((cpuid != NOCPU) && (td != curthread)) 1056 ipi_cpu(cpuid, IPI_AST); 1057#endif 1058 break; 1059 default: 1060 break; 1061 } 1062 thread_unlock(td); 1063 } 1064} 1065 1066static void 1067racct_proc_wakeup(struct proc *p) 1068{ 1069 PROC_LOCK_ASSERT(p, MA_OWNED); 1070 1071 if (p->p_throttled) { 1072 p->p_throttled = 0; 1073 wakeup(p->p_racct); 1074 } 1075} 1076 1077static void 1078racct_decay_resource(struct racct *racct, void * res, void* dummy) 1079{ 1080 int resource; 1081 int64_t r_old, r_new; 1082 1083 resource = *(int *)res; 1084 r_old = racct->r_resources[resource]; 1085 1086 /* If there is nothing to decay, just exit. */ 1087 if (r_old <= 0) 1088 return; 1089 1090 mtx_lock(&racct_lock); 1091 r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1092 racct->r_resources[resource] = r_new; 1093 mtx_unlock(&racct_lock); 1094} 1095 1096static void 1097racct_decay(int resource) 1098{ 1099 ui_racct_foreach(racct_decay_resource, &resource, NULL); 1100 loginclass_racct_foreach(racct_decay_resource, &resource, NULL); 1101 prison_racct_foreach(racct_decay_resource, &resource, NULL); 1102} 1103 1104static void 1105racctd(void) 1106{ 1107 struct thread *td; 1108 struct proc *p; 1109 struct timeval wallclock; 1110 uint64_t runtime; 1111 uint64_t pct, pct_estimate; 1112 1113 for (;;) { 1114 racct_decay(RACCT_PCTCPU); 1115 1116 sx_slock(&allproc_lock); 1117 1118 LIST_FOREACH(p, &zombproc, p_list) { 1119 PROC_LOCK(p); 1120 racct_set(p, RACCT_PCTCPU, 0); 1121 PROC_UNLOCK(p); 1122 } 1123 1124 FOREACH_PROC_IN_SYSTEM(p) { 1125 PROC_LOCK(p); 1126 if (p->p_state != PRS_NORMAL) { 1127 PROC_UNLOCK(p); 1128 continue; 1129 } 1130 1131 microuptime(&wallclock); 1132 timevalsub(&wallclock, &p->p_stats->p_start); 1133 PROC_SLOCK(p); 1134 FOREACH_THREAD_IN_PROC(p, td) 1135 ruxagg(p, td); 1136 runtime = cputick2usec(p->p_rux.rux_runtime); 1137 PROC_SUNLOCK(p); 1138#ifdef notyet 1139 KASSERT(runtime >= p->p_prev_runtime, 1140 ("runtime < p_prev_runtime")); 1141#else 1142 if (runtime < p->p_prev_runtime) 1143 runtime = p->p_prev_runtime; 1144#endif 1145 p->p_prev_runtime = runtime; 1146 if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1147 pct_estimate = (1000000 * runtime * 100) / 1148 ((uint64_t)wallclock.tv_sec * 1000000 + 1149 wallclock.tv_usec); 1150 } else 1151 pct_estimate = 0; 1152 pct = racct_getpcpu(p, pct_estimate); 1153 mtx_lock(&racct_lock); 1154 racct_set_force_locked(p, RACCT_PCTCPU, pct); 1155 racct_set_locked(p, RACCT_CPU, runtime); 1156 racct_set_locked(p, RACCT_WALLCLOCK, 1157 (uint64_t)wallclock.tv_sec * 1000000 + 1158 wallclock.tv_usec); 1159 mtx_unlock(&racct_lock); 1160 PROC_UNLOCK(p); 1161 } 1162 1163 /* 1164 * To ensure that processes are throttled in a fair way, we need 1165 * to iterate over all processes again and check the limits 1166 * for %cpu resource only after ucred racct containers have been 1167 * properly filled. 1168 */ 1169 FOREACH_PROC_IN_SYSTEM(p) { 1170 PROC_LOCK(p); 1171 if (p->p_state != PRS_NORMAL) { 1172 PROC_UNLOCK(p); 1173 continue; 1174 } 1175 1176 if (racct_pcpu_available(p) <= 0) 1177 racct_proc_throttle(p); 1178 else if (p->p_throttled) 1179 racct_proc_wakeup(p); 1180 PROC_UNLOCK(p); 1181 } 1182 sx_sunlock(&allproc_lock); 1183 pause("-", hz); 1184 } 1185} 1186 1187static struct kproc_desc racctd_kp = { 1188 "racctd", 1189 racctd, 1190 NULL 1191}; 1192SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp); 1193 1194static void 1195racct_init(void) 1196{ 1197 1198 racct_zone = uma_zcreate("racct", sizeof(struct racct), 1199 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1200 /* 1201 * XXX: Move this somewhere. 1202 */ 1203 prison0.pr_prison_racct = prison_racct_find("0"); 1204} 1205SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 1206 1207#else /* !RACCT */ 1208 1209int 1210racct_add(struct proc *p, int resource, uint64_t amount) 1211{ 1212 1213 return (0); 1214} 1215 1216void 1217racct_add_cred(struct ucred *cred, int resource, uint64_t amount) 1218{ 1219} 1220 1221void 1222racct_add_force(struct proc *p, int resource, uint64_t amount) 1223{ 1224 1225 return; 1226} 1227 1228int 1229racct_set(struct proc *p, int resource, uint64_t amount) 1230{ 1231 1232 return (0); 1233} 1234 1235void 1236racct_set_force(struct proc *p, int resource, uint64_t amount) 1237{ 1238} 1239 1240void 1241racct_sub(struct proc *p, int resource, uint64_t amount) 1242{ 1243} 1244 1245void 1246racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 1247{ 1248} 1249 1250uint64_t 1251racct_get_limit(struct proc *p, int resource) 1252{ 1253 1254 return (UINT64_MAX); 1255} 1256 1257uint64_t 1258racct_get_available(struct proc *p, int resource) 1259{ 1260 1261 return (UINT64_MAX); 1262} 1263 1264void 1265racct_create(struct racct **racctp) 1266{ 1267} 1268 1269void 1270racct_destroy(struct racct **racctp) 1271{ 1272} 1273 1274int 1275racct_proc_fork(struct proc *parent, struct proc *child) 1276{ 1277 1278 return (0); 1279} 1280 1281void 1282racct_proc_fork_done(struct proc *child) 1283{ 1284} 1285 1286void 1287racct_proc_exit(struct proc *p) 1288{ 1289} 1290 1291#endif /* !RACCT */ 1292