1220137Strasz/*- 2220137Strasz * Copyright (c) 2010 The FreeBSD Foundation 3220137Strasz * All rights reserved. 4220137Strasz * 5220137Strasz * This software was developed by Edward Tomasz Napierala under sponsorship 6220137Strasz * from the FreeBSD Foundation. 7220137Strasz * 8220137Strasz * Redistribution and use in source and binary forms, with or without 9220137Strasz * modification, are permitted provided that the following conditions 10220137Strasz * are met: 11220137Strasz * 1. Redistributions of source code must retain the above copyright 12220137Strasz * notice, this list of conditions and the following disclaimer. 13220137Strasz * 2. Redistributions in binary form must reproduce the above copyright 14220137Strasz * notice, this list of conditions and the following disclaimer in the 15220137Strasz * documentation and/or other materials provided with the distribution. 16220137Strasz * 17220137Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18220137Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19220137Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20220137Strasz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21220137Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22220137Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23220137Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24220137Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25220137Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26220137Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27220137Strasz * SUCH DAMAGE. 28220137Strasz * 29220137Strasz * $FreeBSD: releng/10.3/sys/kern/kern_racct.c 293473 2016-01-09 14:08:10Z dchagin $ 30220137Strasz */ 31220137Strasz 32220137Strasz#include <sys/cdefs.h> 33220137Strasz__FBSDID("$FreeBSD: releng/10.3/sys/kern/kern_racct.c 293473 2016-01-09 14:08:10Z dchagin $"); 34220137Strasz 35220137Strasz#include "opt_kdtrace.h" 36242139Strasz#include "opt_sched.h" 37220137Strasz 38220137Strasz#include <sys/param.h> 39228430Savg#include <sys/systm.h> 40220137Strasz#include <sys/eventhandler.h> 41220137Strasz#include <sys/jail.h> 42220137Strasz#include <sys/kernel.h> 43220137Strasz#include <sys/kthread.h> 44220137Strasz#include <sys/lock.h> 45220137Strasz#include <sys/loginclass.h> 46220137Strasz#include <sys/malloc.h> 47220137Strasz#include <sys/mutex.h> 48220137Strasz#include <sys/proc.h> 49220137Strasz#include <sys/racct.h> 50220137Strasz#include <sys/resourcevar.h> 51220137Strasz#include <sys/sbuf.h> 52220137Strasz#include <sys/sched.h> 53220137Strasz#include <sys/sdt.h> 54242139Strasz#include <sys/smp.h> 55220137Strasz#include <sys/sx.h> 56242139Strasz#include <sys/sysctl.h> 57220137Strasz#include <sys/sysent.h> 58220137Strasz#include <sys/sysproto.h> 59220137Strasz#include <sys/umtx.h> 60242139Strasz#include <machine/smp.h> 61220137Strasz 62220137Strasz#ifdef RCTL 63220137Strasz#include <sys/rctl.h> 64220137Strasz#endif 65220137Strasz 66220137Strasz#ifdef RACCT 67220137Strasz 68220137StraszFEATURE(racct, "Resource Accounting"); 69220137Strasz 70242139Strasz/* 71242139Strasz * Do not block processes that have their %cpu usage <= pcpu_threshold. 72242139Strasz */ 73242139Straszstatic int pcpu_threshold = 1; 74284665Strasz#ifdef RACCT_DEFAULT_TO_DISABLED 75284665Straszint racct_enable = 0; 76284665Strasz#else 77284665Straszint racct_enable = 1; 78284665Strasz#endif 79242139Strasz 80242139StraszSYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting"); 81286322StraszTUNABLE_INT("kern.racct.enable", &racct_enable); 82284665StraszSYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable, 83284665Strasz 0, "Enable RACCT/RCTL"); 84242139StraszSYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 85242139Strasz 0, "Processes with higher %cpu usage than this value can be throttled."); 86242139Strasz 87242139Strasz/* 88242139Strasz * How many seconds it takes to use the scheduler %cpu calculations. When a 89242139Strasz * process starts, we compute its %cpu usage by dividing its runtime by the 90242139Strasz * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 91242139Strasz * provided by the scheduler. 92242139Strasz */ 93242139Strasz#define RACCT_PCPU_SECS 3 94242139Strasz 95220137Straszstatic struct mtx racct_lock; 96220137StraszMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 97220137Strasz 98220137Straszstatic uma_zone_t racct_zone; 99220137Strasz 100220137Straszstatic void racct_sub_racct(struct racct *dest, const struct racct *src); 101220137Straszstatic void racct_sub_cred_locked(struct ucred *cred, int resource, 102220137Strasz uint64_t amount); 103220137Straszstatic void racct_add_cred_locked(struct ucred *cred, int resource, 104220137Strasz uint64_t amount); 105220137Strasz 106220137StraszSDT_PROVIDER_DEFINE(racct); 107260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add, "struct proc *", "int", 108220137Strasz "uint64_t"); 109260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add__failure, 110220137Strasz "struct proc *", "int", "uint64_t"); 111260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add__cred, "struct ucred *", 112220137Strasz "int", "uint64_t"); 113260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add__force, "struct proc *", 114220137Strasz "int", "uint64_t"); 115260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, set, "struct proc *", "int", 116220137Strasz "uint64_t"); 117260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, set__failure, 118220137Strasz "struct proc *", "int", "uint64_t"); 119260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, "struct proc *", "int", 120220137Strasz "uint64_t"); 121260817SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, sub__cred, "struct ucred *", 122220137Strasz "int", "uint64_t"); 123260817SavgSDT_PROBE_DEFINE1(racct, kernel, racct, create, "struct racct *"); 124260817SavgSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, "struct racct *"); 125260817SavgSDT_PROBE_DEFINE2(racct, kernel, racct, join, "struct racct *", 126220137Strasz "struct racct *"); 127260817SavgSDT_PROBE_DEFINE2(racct, kernel, racct, join__failure, 128220137Strasz "struct racct *", "struct racct *"); 129260817SavgSDT_PROBE_DEFINE2(racct, kernel, racct, leave, "struct racct *", 130220137Strasz "struct racct *"); 131220137Strasz 132220137Straszint racct_types[] = { 133220137Strasz [RACCT_CPU] = 134224036Strasz RACCT_IN_MILLIONS, 135220137Strasz [RACCT_DATA] = 136220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 137220137Strasz [RACCT_STACK] = 138220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 139220137Strasz [RACCT_CORE] = 140220137Strasz RACCT_DENIABLE, 141220137Strasz [RACCT_RSS] = 142220137Strasz RACCT_RECLAIMABLE, 143220137Strasz [RACCT_MEMLOCK] = 144220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 145220137Strasz [RACCT_NPROC] = 146220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 147220137Strasz [RACCT_NOFILE] = 148220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 149220137Strasz [RACCT_VMEM] = 150220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 151220137Strasz [RACCT_NPTS] = 152220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 153220137Strasz [RACCT_SWAP] = 154220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 155220137Strasz [RACCT_NTHR] = 156220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 157220137Strasz [RACCT_MSGQQUEUED] = 158220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 159220137Strasz [RACCT_MSGQSIZE] = 160220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 161220137Strasz [RACCT_NMSGQ] = 162220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 163220137Strasz [RACCT_NSEM] = 164220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 165220137Strasz [RACCT_NSEMOP] = 166220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 167220137Strasz [RACCT_NSHM] = 168220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 169220137Strasz [RACCT_SHMSIZE] = 170220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 171220137Strasz [RACCT_WALLCLOCK] = 172242139Strasz RACCT_IN_MILLIONS, 173242139Strasz [RACCT_PCTCPU] = 174242139Strasz RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS }; 175220137Strasz 176242139Straszstatic const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 177242139Strasz 178242139Strasz#ifdef SCHED_4BSD 179242139Strasz/* 180242139Strasz * Contains intermediate values for %cpu calculations to avoid using floating 181242139Strasz * point in the kernel. 182242139Strasz * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 183242139Strasz * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 184242139Strasz * zero so the calculations are more straightforward. 185242139Strasz */ 186242139Straszfixpt_t ccpu_exp[] = { 187242139Strasz [0] = FSCALE * 1, 188242139Strasz [1] = FSCALE * 0.95122942450071400909, 189242139Strasz [2] = FSCALE * 0.90483741803595957316, 190242139Strasz [3] = FSCALE * 0.86070797642505780722, 191242139Strasz [4] = FSCALE * 0.81873075307798185866, 192242139Strasz [5] = FSCALE * 0.77880078307140486824, 193242139Strasz [6] = FSCALE * 0.74081822068171786606, 194242139Strasz [7] = FSCALE * 0.70468808971871343435, 195242139Strasz [8] = FSCALE * 0.67032004603563930074, 196242139Strasz [9] = FSCALE * 0.63762815162177329314, 197242139Strasz [10] = FSCALE * 0.60653065971263342360, 198242139Strasz [11] = FSCALE * 0.57694981038048669531, 199242139Strasz [12] = FSCALE * 0.54881163609402643262, 200242139Strasz [13] = FSCALE * 0.52204577676101604789, 201242139Strasz [14] = FSCALE * 0.49658530379140951470, 202242139Strasz [15] = FSCALE * 0.47236655274101470713, 203242139Strasz [16] = FSCALE * 0.44932896411722159143, 204242139Strasz [17] = FSCALE * 0.42741493194872666992, 205242139Strasz [18] = FSCALE * 0.40656965974059911188, 206242139Strasz [19] = FSCALE * 0.38674102345450120691, 207242139Strasz [20] = FSCALE * 0.36787944117144232159, 208242139Strasz [21] = FSCALE * 0.34993774911115535467, 209242139Strasz [22] = FSCALE * 0.33287108369807955328, 210242139Strasz [23] = FSCALE * 0.31663676937905321821, 211242139Strasz [24] = FSCALE * 0.30119421191220209664, 212242139Strasz [25] = FSCALE * 0.28650479686019010032, 213242139Strasz [26] = FSCALE * 0.27253179303401260312, 214242139Strasz [27] = FSCALE * 0.25924026064589150757, 215242139Strasz [28] = FSCALE * 0.24659696394160647693, 216242139Strasz [29] = FSCALE * 0.23457028809379765313, 217242139Strasz [30] = FSCALE * 0.22313016014842982893, 218242139Strasz [31] = FSCALE * 0.21224797382674305771, 219242139Strasz [32] = FSCALE * 0.20189651799465540848, 220242139Strasz [33] = FSCALE * 0.19204990862075411423, 221242139Strasz [34] = FSCALE * 0.18268352405273465022, 222242139Strasz [35] = FSCALE * 0.17377394345044512668, 223242139Strasz [36] = FSCALE * 0.16529888822158653829, 224242139Strasz [37] = FSCALE * 0.15723716631362761621, 225242139Strasz [38] = FSCALE * 0.14956861922263505264, 226242139Strasz [39] = FSCALE * 0.14227407158651357185, 227242139Strasz [40] = FSCALE * 0.13533528323661269189, 228242139Strasz [41] = FSCALE * 0.12873490358780421886, 229242139Strasz [42] = FSCALE * 0.12245642825298191021, 230242139Strasz [43] = FSCALE * 0.11648415777349695786, 231242139Strasz [44] = FSCALE * 0.11080315836233388333, 232242139Strasz [45] = FSCALE * 0.10539922456186433678, 233242139Strasz [46] = FSCALE * 0.10025884372280373372, 234242139Strasz [47] = FSCALE * 0.09536916221554961888, 235242139Strasz [48] = FSCALE * 0.09071795328941250337, 236242139Strasz [49] = FSCALE * 0.08629358649937051097, 237242139Strasz [50] = FSCALE * 0.08208499862389879516, 238242139Strasz [51] = FSCALE * 0.07808166600115315231, 239242139Strasz [52] = FSCALE * 0.07427357821433388042, 240242139Strasz [53] = FSCALE * 0.07065121306042958674, 241242139Strasz [54] = FSCALE * 0.06720551273974976512, 242242139Strasz [55] = FSCALE * 0.06392786120670757270, 243242139Strasz [56] = FSCALE * 0.06081006262521796499, 244242139Strasz [57] = FSCALE * 0.05784432087483846296, 245242139Strasz [58] = FSCALE * 0.05502322005640722902, 246242139Strasz [59] = FSCALE * 0.05233970594843239308, 247242139Strasz [60] = FSCALE * 0.04978706836786394297, 248242139Strasz [61] = FSCALE * 0.04735892439114092119, 249242139Strasz [62] = FSCALE * 0.04504920239355780606, 250242139Strasz [63] = FSCALE * 0.04285212686704017991, 251242139Strasz [64] = FSCALE * 0.04076220397836621516, 252242139Strasz [65] = FSCALE * 0.03877420783172200988, 253242139Strasz [66] = FSCALE * 0.03688316740124000544, 254242139Strasz [67] = FSCALE * 0.03508435410084502588, 255242139Strasz [68] = FSCALE * 0.03337326996032607948, 256242139Strasz [69] = FSCALE * 0.03174563637806794323, 257242139Strasz [70] = FSCALE * 0.03019738342231850073, 258242139Strasz [71] = FSCALE * 0.02872463965423942912, 259242139Strasz [72] = FSCALE * 0.02732372244729256080, 260242139Strasz [73] = FSCALE * 0.02599112877875534358, 261242139Strasz [74] = FSCALE * 0.02472352647033939120, 262242139Strasz [75] = FSCALE * 0.02351774585600910823, 263242139Strasz [76] = FSCALE * 0.02237077185616559577, 264242139Strasz [77] = FSCALE * 0.02127973643837716938, 265242139Strasz [78] = FSCALE * 0.02024191144580438847, 266242139Strasz [79] = FSCALE * 0.01925470177538692429, 267242139Strasz [80] = FSCALE * 0.01831563888873418029, 268242139Strasz [81] = FSCALE * 0.01742237463949351138, 269242139Strasz [82] = FSCALE * 0.01657267540176124754, 270242139Strasz [83] = FSCALE * 0.01576441648485449082, 271242139Strasz [84] = FSCALE * 0.01499557682047770621, 272242139Strasz [85] = FSCALE * 0.01426423390899925527, 273242139Strasz [86] = FSCALE * 0.01356855901220093175, 274242139Strasz [87] = FSCALE * 0.01290681258047986886, 275242139Strasz [88] = FSCALE * 0.01227733990306844117, 276242139Strasz [89] = FSCALE * 0.01167856697039544521, 277242139Strasz [90] = FSCALE * 0.01110899653824230649, 278242139Strasz [91] = FSCALE * 0.01056720438385265337, 279242139Strasz [92] = FSCALE * 0.01005183574463358164, 280242139Strasz [93] = FSCALE * 0.00956160193054350793, 281242139Strasz [94] = FSCALE * 0.00909527710169581709, 282242139Strasz [95] = FSCALE * 0.00865169520312063417, 283242139Strasz [96] = FSCALE * 0.00822974704902002884, 284242139Strasz [97] = FSCALE * 0.00782837754922577143, 285242139Strasz [98] = FSCALE * 0.00744658307092434051, 286242139Strasz [99] = FSCALE * 0.00708340892905212004, 287242139Strasz [100] = FSCALE * 0.00673794699908546709, 288242139Strasz [101] = FSCALE * 0.00640933344625638184, 289242139Strasz [102] = FSCALE * 0.00609674656551563610, 290242139Strasz [103] = FSCALE * 0.00579940472684214321, 291242139Strasz [104] = FSCALE * 0.00551656442076077241, 292242139Strasz [105] = FSCALE * 0.00524751839918138427, 293242139Strasz [106] = FSCALE * 0.00499159390691021621, 294242139Strasz [107] = FSCALE * 0.00474815099941147558, 295242139Strasz [108] = FSCALE * 0.00451658094261266798, 296242139Strasz [109] = FSCALE * 0.00429630469075234057, 297242139Strasz [110] = FSCALE * 0.00408677143846406699, 298242139Strasz}; 299242139Strasz#endif 300242139Strasz 301242139Strasz#define CCPU_EXP_MAX 110 302242139Strasz 303242139Strasz/* 304242139Strasz * This function is analogical to the getpcpu() function in the ps(1) command. 305242139Strasz * They should both calculate in the same way so that the racct %cpu 306242139Strasz * calculations are consistent with the values showed by the ps(1) tool. 307242139Strasz * The calculations are more complex in the 4BSD scheduler because of the value 308242139Strasz * of the ccpu variable. In ULE it is defined to be zero which saves us some 309242139Strasz * work. 310242139Strasz */ 311242139Straszstatic uint64_t 312242139Straszracct_getpcpu(struct proc *p, u_int pcpu) 313242139Strasz{ 314242139Strasz u_int swtime; 315242139Strasz#ifdef SCHED_4BSD 316242139Strasz fixpt_t pctcpu, pctcpu_next; 317242139Strasz#endif 318242139Strasz#ifdef SMP 319242139Strasz struct pcpu *pc; 320242139Strasz int found; 321242139Strasz#endif 322242139Strasz fixpt_t p_pctcpu; 323242139Strasz struct thread *td; 324242139Strasz 325284665Strasz ASSERT_RACCT_ENABLED(); 326284665Strasz 327242139Strasz /* 328242139Strasz * If the process is swapped out, we count its %cpu usage as zero. 329242139Strasz * This behaviour is consistent with the userland ps(1) tool. 330242139Strasz */ 331242139Strasz if ((p->p_flag & P_INMEM) == 0) 332242139Strasz return (0); 333242139Strasz swtime = (ticks - p->p_swtick) / hz; 334242139Strasz 335242139Strasz /* 336242139Strasz * For short-lived processes, the sched_pctcpu() returns small 337242139Strasz * values even for cpu intensive processes. Therefore we use 338242139Strasz * our own estimate in this case. 339242139Strasz */ 340242139Strasz if (swtime < RACCT_PCPU_SECS) 341242139Strasz return (pcpu); 342242139Strasz 343242139Strasz p_pctcpu = 0; 344242139Strasz FOREACH_THREAD_IN_PROC(p, td) { 345242139Strasz if (td == PCPU_GET(idlethread)) 346242139Strasz continue; 347242139Strasz#ifdef SMP 348242139Strasz found = 0; 349242139Strasz STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 350242139Strasz if (td == pc->pc_idlethread) { 351242139Strasz found = 1; 352242139Strasz break; 353242139Strasz } 354242139Strasz } 355242139Strasz if (found) 356242139Strasz continue; 357242139Strasz#endif 358242139Strasz thread_lock(td); 359242139Strasz#ifdef SCHED_4BSD 360242139Strasz pctcpu = sched_pctcpu(td); 361242139Strasz /* Count also the yet unfinished second. */ 362242139Strasz pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 363242139Strasz pctcpu_next += sched_pctcpu_delta(td); 364242139Strasz p_pctcpu += max(pctcpu, pctcpu_next); 365242139Strasz#else 366242139Strasz /* 367242139Strasz * In ULE the %cpu statistics are updated on every 368242139Strasz * sched_pctcpu() call. So special calculations to 369242139Strasz * account for the latest (unfinished) second are 370242139Strasz * not needed. 371242139Strasz */ 372242139Strasz p_pctcpu += sched_pctcpu(td); 373242139Strasz#endif 374242139Strasz thread_unlock(td); 375242139Strasz } 376242139Strasz 377242139Strasz#ifdef SCHED_4BSD 378242139Strasz if (swtime <= CCPU_EXP_MAX) 379242139Strasz return ((100 * (uint64_t)p_pctcpu * 1000000) / 380242139Strasz (FSCALE - ccpu_exp[swtime])); 381242139Strasz#endif 382242139Strasz 383242139Strasz return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 384242139Strasz} 385242139Strasz 386220137Straszstatic void 387220137Straszracct_add_racct(struct racct *dest, const struct racct *src) 388220137Strasz{ 389220137Strasz int i; 390220137Strasz 391284665Strasz ASSERT_RACCT_ENABLED(); 392220137Strasz mtx_assert(&racct_lock, MA_OWNED); 393220137Strasz 394220137Strasz /* 395220137Strasz * Update resource usage in dest. 396220137Strasz */ 397220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 398220137Strasz KASSERT(dest->r_resources[i] >= 0, 399243088Strasz ("%s: resource %d propagation meltdown: dest < 0", 400243088Strasz __func__, i)); 401220137Strasz KASSERT(src->r_resources[i] >= 0, 402243088Strasz ("%s: resource %d propagation meltdown: src < 0", 403243088Strasz __func__, i)); 404220137Strasz dest->r_resources[i] += src->r_resources[i]; 405220137Strasz } 406220137Strasz} 407220137Strasz 408220137Straszstatic void 409220137Straszracct_sub_racct(struct racct *dest, const struct racct *src) 410220137Strasz{ 411220137Strasz int i; 412220137Strasz 413284665Strasz ASSERT_RACCT_ENABLED(); 414220137Strasz mtx_assert(&racct_lock, MA_OWNED); 415220137Strasz 416220137Strasz /* 417220137Strasz * Update resource usage in dest. 418220137Strasz */ 419220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 420243070Strasz if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) { 421220137Strasz KASSERT(dest->r_resources[i] >= 0, 422243088Strasz ("%s: resource %d propagation meltdown: dest < 0", 423243088Strasz __func__, i)); 424220137Strasz KASSERT(src->r_resources[i] >= 0, 425243088Strasz ("%s: resource %d propagation meltdown: src < 0", 426243088Strasz __func__, i)); 427220137Strasz KASSERT(src->r_resources[i] <= dest->r_resources[i], 428243088Strasz ("%s: resource %d propagation meltdown: src > dest", 429243088Strasz __func__, i)); 430220137Strasz } 431242139Strasz if (RACCT_CAN_DROP(i)) { 432220137Strasz dest->r_resources[i] -= src->r_resources[i]; 433220137Strasz if (dest->r_resources[i] < 0) { 434243070Strasz KASSERT(RACCT_IS_SLOPPY(i) || 435243070Strasz RACCT_IS_DECAYING(i), 436243088Strasz ("%s: resource %d usage < 0", __func__, i)); 437220137Strasz dest->r_resources[i] = 0; 438220137Strasz } 439220137Strasz } 440220137Strasz } 441220137Strasz} 442220137Strasz 443220137Straszvoid 444220137Straszracct_create(struct racct **racctp) 445220137Strasz{ 446220137Strasz 447284665Strasz if (!racct_enable) 448284665Strasz return; 449284665Strasz 450289798Savg SDT_PROBE1(racct, kernel, racct, create, racctp); 451220137Strasz 452220137Strasz KASSERT(*racctp == NULL, ("racct already allocated")); 453220137Strasz 454220137Strasz *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 455220137Strasz} 456220137Strasz 457220137Straszstatic void 458220137Straszracct_destroy_locked(struct racct **racctp) 459220137Strasz{ 460220137Strasz int i; 461220137Strasz struct racct *racct; 462220137Strasz 463284665Strasz ASSERT_RACCT_ENABLED(); 464284665Strasz 465289798Savg SDT_PROBE1(racct, kernel, racct, destroy, racctp); 466220137Strasz 467220137Strasz mtx_assert(&racct_lock, MA_OWNED); 468220137Strasz KASSERT(racctp != NULL, ("NULL racctp")); 469220137Strasz KASSERT(*racctp != NULL, ("NULL racct")); 470220137Strasz 471220137Strasz racct = *racctp; 472220137Strasz 473220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 474223844Strasz if (RACCT_IS_SLOPPY(i)) 475220137Strasz continue; 476223844Strasz if (!RACCT_IS_RECLAIMABLE(i)) 477220137Strasz continue; 478220137Strasz KASSERT(racct->r_resources[i] == 0, 479220137Strasz ("destroying non-empty racct: " 480220137Strasz "%ju allocated for resource %d\n", 481220137Strasz racct->r_resources[i], i)); 482220137Strasz } 483220137Strasz uma_zfree(racct_zone, racct); 484220137Strasz *racctp = NULL; 485220137Strasz} 486220137Strasz 487220137Straszvoid 488220137Straszracct_destroy(struct racct **racct) 489220137Strasz{ 490220137Strasz 491284665Strasz if (!racct_enable) 492284665Strasz return; 493284665Strasz 494220137Strasz mtx_lock(&racct_lock); 495220137Strasz racct_destroy_locked(racct); 496220137Strasz mtx_unlock(&racct_lock); 497220137Strasz} 498220137Strasz 499220137Strasz/* 500220137Strasz * Increase consumption of 'resource' by 'amount' for 'racct' 501220137Strasz * and all its parents. Differently from other cases, 'amount' here 502220137Strasz * may be less than zero. 503220137Strasz */ 504220137Straszstatic void 505288106Sjlhracct_adjust_resource(struct racct *racct, int resource, 506220137Strasz uint64_t amount) 507220137Strasz{ 508220137Strasz 509284665Strasz ASSERT_RACCT_ENABLED(); 510220137Strasz mtx_assert(&racct_lock, MA_OWNED); 511220137Strasz KASSERT(racct != NULL, ("NULL racct")); 512220137Strasz 513220137Strasz racct->r_resources[resource] += amount; 514220137Strasz if (racct->r_resources[resource] < 0) { 515242139Strasz KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), 516243088Strasz ("%s: resource %d usage < 0", __func__, resource)); 517220137Strasz racct->r_resources[resource] = 0; 518220137Strasz } 519242139Strasz 520242139Strasz /* 521242139Strasz * There are some cases where the racct %cpu resource would grow 522291100Sjpaetzel * beyond 100% per core. For example in racct_proc_exit() we add 523291100Sjpaetzel * the process %cpu usage to the ucred racct containers. If too 524291100Sjpaetzel * many processes terminated in a short time span, the ucred %cpu 525291100Sjpaetzel * resource could grow too much. Also, the 4BSD scheduler sometimes 526291100Sjpaetzel * returns for a thread more than 100% cpu usage. So we set a sane 527291100Sjpaetzel * boundary here to 100% * the maxumum number of CPUs. 528242139Strasz */ 529242139Strasz if ((resource == RACCT_PCTCPU) && 530291100Sjpaetzel (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU)) 531291100Sjpaetzel racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU; 532220137Strasz} 533220137Strasz 534225944Straszstatic int 535225944Straszracct_add_locked(struct proc *p, int resource, uint64_t amount) 536220137Strasz{ 537220137Strasz#ifdef RCTL 538220137Strasz int error; 539220137Strasz#endif 540220137Strasz 541284665Strasz ASSERT_RACCT_ENABLED(); 542284665Strasz 543289798Savg SDT_PROBE3(racct, kernel, rusage, add, p, resource, amount); 544220137Strasz 545220137Strasz /* 546220137Strasz * We need proc lock to dereference p->p_ucred. 547220137Strasz */ 548220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 549220137Strasz 550220137Strasz#ifdef RCTL 551220137Strasz error = rctl_enforce(p, resource, amount); 552223844Strasz if (error && RACCT_IS_DENIABLE(resource)) { 553289798Savg SDT_PROBE3(racct, kernel, rusage, add__failure, p, resource, 554289798Savg amount); 555220137Strasz return (error); 556220137Strasz } 557220137Strasz#endif 558288106Sjlh racct_adjust_resource(p->p_racct, resource, amount); 559220137Strasz racct_add_cred_locked(p->p_ucred, resource, amount); 560220137Strasz 561220137Strasz return (0); 562220137Strasz} 563220137Strasz 564225944Strasz/* 565225944Strasz * Increase allocation of 'resource' by 'amount' for process 'p'. 566225944Strasz * Return 0 if it's below limits, or errno, if it's not. 567225944Strasz */ 568225944Straszint 569225944Straszracct_add(struct proc *p, int resource, uint64_t amount) 570225944Strasz{ 571225944Strasz int error; 572225944Strasz 573284665Strasz if (!racct_enable) 574284665Strasz return (0); 575284665Strasz 576225944Strasz mtx_lock(&racct_lock); 577225944Strasz error = racct_add_locked(p, resource, amount); 578225944Strasz mtx_unlock(&racct_lock); 579225944Strasz return (error); 580225944Strasz} 581225944Strasz 582220137Straszstatic void 583220137Straszracct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 584220137Strasz{ 585220137Strasz struct prison *pr; 586220137Strasz 587284665Strasz ASSERT_RACCT_ENABLED(); 588284665Strasz 589289798Savg SDT_PROBE3(racct, kernel, rusage, add__cred, cred, resource, amount); 590220137Strasz 591288106Sjlh racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 592220137Strasz for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 593288106Sjlh racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 594221362Strasz amount); 595288106Sjlh racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount); 596220137Strasz} 597220137Strasz 598220137Strasz/* 599220137Strasz * Increase allocation of 'resource' by 'amount' for credential 'cred'. 600220137Strasz * Doesn't check for limits and never fails. 601220137Strasz * 602220137Strasz * XXX: Shouldn't this ever return an error? 603220137Strasz */ 604220137Straszvoid 605220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount) 606220137Strasz{ 607220137Strasz 608284665Strasz if (!racct_enable) 609284665Strasz return; 610284665Strasz 611220137Strasz mtx_lock(&racct_lock); 612220137Strasz racct_add_cred_locked(cred, resource, amount); 613220137Strasz mtx_unlock(&racct_lock); 614220137Strasz} 615220137Strasz 616220137Strasz/* 617220137Strasz * Increase allocation of 'resource' by 'amount' for process 'p'. 618220137Strasz * Doesn't check for limits and never fails. 619220137Strasz */ 620220137Straszvoid 621220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount) 622220137Strasz{ 623220137Strasz 624284665Strasz if (!racct_enable) 625284665Strasz return; 626284665Strasz 627289798Savg SDT_PROBE3(racct, kernel, rusage, add__force, p, resource, amount); 628220137Strasz 629220137Strasz /* 630220137Strasz * We need proc lock to dereference p->p_ucred. 631220137Strasz */ 632220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 633220137Strasz 634220137Strasz mtx_lock(&racct_lock); 635288106Sjlh racct_adjust_resource(p->p_racct, resource, amount); 636220137Strasz mtx_unlock(&racct_lock); 637220137Strasz racct_add_cred(p->p_ucred, resource, amount); 638220137Strasz} 639220137Strasz 640220137Straszstatic int 641220137Straszracct_set_locked(struct proc *p, int resource, uint64_t amount) 642220137Strasz{ 643242139Strasz int64_t old_amount, decayed_amount; 644242139Strasz int64_t diff_proc, diff_cred; 645220137Strasz#ifdef RCTL 646220137Strasz int error; 647220137Strasz#endif 648220137Strasz 649284665Strasz ASSERT_RACCT_ENABLED(); 650284665Strasz 651289798Savg SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount); 652220137Strasz 653220137Strasz /* 654220137Strasz * We need proc lock to dereference p->p_ucred. 655220137Strasz */ 656220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 657220137Strasz 658242139Strasz old_amount = p->p_racct->r_resources[resource]; 659242139Strasz /* 660242139Strasz * The diffs may be negative. 661242139Strasz */ 662242139Strasz diff_proc = amount - old_amount; 663242139Strasz if (RACCT_IS_DECAYING(resource)) { 664242139Strasz /* 665242139Strasz * Resources in per-credential racct containers may decay. 666242139Strasz * If this is the case, we need to calculate the difference 667242139Strasz * between the new amount and the proportional value of the 668242139Strasz * old amount that has decayed in the ucred racct containers. 669242139Strasz */ 670242139Strasz decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 671242139Strasz diff_cred = amount - decayed_amount; 672242139Strasz } else 673242139Strasz diff_cred = diff_proc; 674220137Strasz#ifdef notyet 675242139Strasz KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 676243088Strasz ("%s: usage of non-droppable resource %d dropping", __func__, 677220137Strasz resource)); 678220137Strasz#endif 679220137Strasz#ifdef RCTL 680242139Strasz if (diff_proc > 0) { 681242139Strasz error = rctl_enforce(p, resource, diff_proc); 682223844Strasz if (error && RACCT_IS_DENIABLE(resource)) { 683289798Savg SDT_PROBE3(racct, kernel, rusage, set__failure, p, 684289798Savg resource, amount); 685220137Strasz return (error); 686220137Strasz } 687220137Strasz } 688220137Strasz#endif 689288106Sjlh racct_adjust_resource(p->p_racct, resource, diff_proc); 690242139Strasz if (diff_cred > 0) 691242139Strasz racct_add_cred_locked(p->p_ucred, resource, diff_cred); 692242139Strasz else if (diff_cred < 0) 693242139Strasz racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 694220137Strasz 695220137Strasz return (0); 696220137Strasz} 697220137Strasz 698220137Strasz/* 699220137Strasz * Set allocation of 'resource' to 'amount' for process 'p'. 700220137Strasz * Return 0 if it's below limits, or errno, if it's not. 701220137Strasz * 702220137Strasz * Note that decreasing the allocation always returns 0, 703220137Strasz * even if it's above the limit. 704220137Strasz */ 705220137Straszint 706220137Straszracct_set(struct proc *p, int resource, uint64_t amount) 707220137Strasz{ 708220137Strasz int error; 709220137Strasz 710284665Strasz if (!racct_enable) 711284665Strasz return (0); 712284665Strasz 713220137Strasz mtx_lock(&racct_lock); 714220137Strasz error = racct_set_locked(p, resource, amount); 715220137Strasz mtx_unlock(&racct_lock); 716220137Strasz return (error); 717220137Strasz} 718220137Strasz 719242139Straszstatic void 720242139Straszracct_set_force_locked(struct proc *p, int resource, uint64_t amount) 721220137Strasz{ 722242139Strasz int64_t old_amount, decayed_amount; 723242139Strasz int64_t diff_proc, diff_cred; 724220137Strasz 725284665Strasz ASSERT_RACCT_ENABLED(); 726284665Strasz 727289798Savg SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount); 728220137Strasz 729220137Strasz /* 730220137Strasz * We need proc lock to dereference p->p_ucred. 731220137Strasz */ 732220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 733220137Strasz 734242139Strasz old_amount = p->p_racct->r_resources[resource]; 735242139Strasz /* 736242139Strasz * The diffs may be negative. 737242139Strasz */ 738242139Strasz diff_proc = amount - old_amount; 739242139Strasz if (RACCT_IS_DECAYING(resource)) { 740242139Strasz /* 741242139Strasz * Resources in per-credential racct containers may decay. 742242139Strasz * If this is the case, we need to calculate the difference 743242139Strasz * between the new amount and the proportional value of the 744242139Strasz * old amount that has decayed in the ucred racct containers. 745242139Strasz */ 746242139Strasz decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 747242139Strasz diff_cred = amount - decayed_amount; 748242139Strasz } else 749242139Strasz diff_cred = diff_proc; 750242139Strasz 751288106Sjlh racct_adjust_resource(p->p_racct, resource, diff_proc); 752242139Strasz if (diff_cred > 0) 753242139Strasz racct_add_cred_locked(p->p_ucred, resource, diff_cred); 754242139Strasz else if (diff_cred < 0) 755242139Strasz racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 756242139Strasz} 757242139Strasz 758242139Straszvoid 759242139Straszracct_set_force(struct proc *p, int resource, uint64_t amount) 760242139Strasz{ 761284665Strasz 762284665Strasz if (!racct_enable) 763284665Strasz return; 764284665Strasz 765220137Strasz mtx_lock(&racct_lock); 766242139Strasz racct_set_force_locked(p, resource, amount); 767220137Strasz mtx_unlock(&racct_lock); 768220137Strasz} 769220137Strasz 770220137Strasz/* 771220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated. 772220137Strasz * Allocating more than that would be denied, unless the resource 773220137Strasz * is marked undeniable. Amount of already allocated resource does 774220137Strasz * not matter. 775220137Strasz */ 776220137Straszuint64_t 777220137Straszracct_get_limit(struct proc *p, int resource) 778220137Strasz{ 779220137Strasz 780284665Strasz if (!racct_enable) 781284665Strasz return (UINT64_MAX); 782284665Strasz 783220137Strasz#ifdef RCTL 784220137Strasz return (rctl_get_limit(p, resource)); 785220137Strasz#else 786220137Strasz return (UINT64_MAX); 787220137Strasz#endif 788220137Strasz} 789220137Strasz 790220137Strasz/* 791220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated. 792220137Strasz * Allocating more than that would be denied, unless the resource 793220137Strasz * is marked undeniable. Amount of already allocated resource does 794220137Strasz * matter. 795220137Strasz */ 796220137Straszuint64_t 797220137Straszracct_get_available(struct proc *p, int resource) 798220137Strasz{ 799220137Strasz 800284665Strasz if (!racct_enable) 801284665Strasz return (UINT64_MAX); 802284665Strasz 803220137Strasz#ifdef RCTL 804220137Strasz return (rctl_get_available(p, resource)); 805220137Strasz#else 806220137Strasz return (UINT64_MAX); 807220137Strasz#endif 808220137Strasz} 809220137Strasz 810220137Strasz/* 811242139Strasz * Returns amount of the %cpu resource that process 'p' can add to its %cpu 812242139Strasz * utilization. Adding more than that would lead to the process being 813242139Strasz * throttled. 814242139Strasz */ 815242139Straszstatic int64_t 816242139Straszracct_pcpu_available(struct proc *p) 817242139Strasz{ 818242139Strasz 819284665Strasz ASSERT_RACCT_ENABLED(); 820284665Strasz 821242139Strasz#ifdef RCTL 822242139Strasz return (rctl_pcpu_available(p)); 823242139Strasz#else 824242139Strasz return (INT64_MAX); 825242139Strasz#endif 826242139Strasz} 827242139Strasz 828242139Strasz/* 829220137Strasz * Decrease allocation of 'resource' by 'amount' for process 'p'. 830220137Strasz */ 831220137Straszvoid 832220137Straszracct_sub(struct proc *p, int resource, uint64_t amount) 833220137Strasz{ 834220137Strasz 835284665Strasz if (!racct_enable) 836284665Strasz return; 837284665Strasz 838289798Savg SDT_PROBE3(racct, kernel, rusage, sub, p, resource, amount); 839220137Strasz 840220137Strasz /* 841220137Strasz * We need proc lock to dereference p->p_ucred. 842220137Strasz */ 843220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 844242139Strasz KASSERT(RACCT_CAN_DROP(resource), 845243088Strasz ("%s: called for non-droppable resource %d", __func__, resource)); 846220137Strasz 847220137Strasz mtx_lock(&racct_lock); 848220137Strasz KASSERT(amount <= p->p_racct->r_resources[resource], 849243088Strasz ("%s: freeing %ju of resource %d, which is more " 850243088Strasz "than allocated %jd for %s (pid %d)", __func__, amount, resource, 851220137Strasz (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 852220137Strasz 853288106Sjlh racct_adjust_resource(p->p_racct, resource, -amount); 854220137Strasz racct_sub_cred_locked(p->p_ucred, resource, amount); 855220137Strasz mtx_unlock(&racct_lock); 856220137Strasz} 857220137Strasz 858220137Straszstatic void 859220137Straszracct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 860220137Strasz{ 861220137Strasz struct prison *pr; 862220137Strasz 863284665Strasz ASSERT_RACCT_ENABLED(); 864284665Strasz 865289798Savg SDT_PROBE3(racct, kernel, rusage, sub__cred, cred, resource, amount); 866220137Strasz 867220137Strasz#ifdef notyet 868242139Strasz KASSERT(RACCT_CAN_DROP(resource), 869243088Strasz ("%s: called for resource %d which can not drop", __func__, 870220137Strasz resource)); 871220137Strasz#endif 872220137Strasz 873288106Sjlh racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 874220137Strasz for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 875288106Sjlh racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, 876221362Strasz -amount); 877288106Sjlh racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount); 878220137Strasz} 879220137Strasz 880220137Strasz/* 881220137Strasz * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 882220137Strasz */ 883220137Straszvoid 884220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 885220137Strasz{ 886220137Strasz 887284665Strasz if (!racct_enable) 888284665Strasz return; 889284665Strasz 890220137Strasz mtx_lock(&racct_lock); 891220137Strasz racct_sub_cred_locked(cred, resource, amount); 892220137Strasz mtx_unlock(&racct_lock); 893220137Strasz} 894220137Strasz 895220137Strasz/* 896220137Strasz * Inherit resource usage information from the parent process. 897220137Strasz */ 898220137Straszint 899220137Straszracct_proc_fork(struct proc *parent, struct proc *child) 900220137Strasz{ 901220137Strasz int i, error = 0; 902220137Strasz 903284665Strasz if (!racct_enable) 904284665Strasz return (0); 905284665Strasz 906220137Strasz /* 907220137Strasz * Create racct for the child process. 908220137Strasz */ 909220137Strasz racct_create(&child->p_racct); 910220137Strasz 911220137Strasz PROC_LOCK(parent); 912220137Strasz PROC_LOCK(child); 913220137Strasz mtx_lock(&racct_lock); 914220137Strasz 915225981Strasz#ifdef RCTL 916225981Strasz error = rctl_proc_fork(parent, child); 917225981Strasz if (error != 0) 918225981Strasz goto out; 919225981Strasz#endif 920225981Strasz 921242139Strasz /* Init process cpu time. */ 922242139Strasz child->p_prev_runtime = 0; 923242139Strasz child->p_throttled = 0; 924242139Strasz 925220137Strasz /* 926220137Strasz * Inherit resource usage. 927220137Strasz */ 928220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 929220137Strasz if (parent->p_racct->r_resources[i] == 0 || 930223844Strasz !RACCT_IS_INHERITABLE(i)) 931220137Strasz continue; 932220137Strasz 933220137Strasz error = racct_set_locked(child, i, 934220137Strasz parent->p_racct->r_resources[i]); 935225938Strasz if (error != 0) 936220137Strasz goto out; 937220137Strasz } 938220137Strasz 939225944Strasz error = racct_add_locked(child, RACCT_NPROC, 1); 940225944Strasz error += racct_add_locked(child, RACCT_NTHR, 1); 941225944Strasz 942220137Straszout: 943220137Strasz mtx_unlock(&racct_lock); 944220137Strasz PROC_UNLOCK(child); 945220137Strasz PROC_UNLOCK(parent); 946220137Strasz 947235787Strasz if (error != 0) 948235787Strasz racct_proc_exit(child); 949235787Strasz 950220137Strasz return (error); 951220137Strasz} 952220137Strasz 953225940Strasz/* 954225940Strasz * Called at the end of fork1(), to handle rules that require the process 955225940Strasz * to be fully initialized. 956225940Strasz */ 957220137Straszvoid 958225940Straszracct_proc_fork_done(struct proc *child) 959225940Strasz{ 960225940Strasz 961225940Strasz#ifdef RCTL 962284665Strasz if (!racct_enable) 963284665Strasz return; 964284665Strasz 965225940Strasz PROC_LOCK(child); 966225940Strasz mtx_lock(&racct_lock); 967225940Strasz rctl_enforce(child, RACCT_NPROC, 0); 968225940Strasz rctl_enforce(child, RACCT_NTHR, 0); 969225940Strasz mtx_unlock(&racct_lock); 970225940Strasz PROC_UNLOCK(child); 971225940Strasz#endif 972225940Strasz} 973225940Strasz 974225940Straszvoid 975220137Straszracct_proc_exit(struct proc *p) 976220137Strasz{ 977225364Strasz int i; 978220137Strasz uint64_t runtime; 979242139Strasz struct timeval wallclock; 980242139Strasz uint64_t pct_estimate, pct; 981220137Strasz 982284665Strasz if (!racct_enable) 983284665Strasz return; 984284665Strasz 985220137Strasz PROC_LOCK(p); 986220137Strasz /* 987220137Strasz * We don't need to calculate rux, proc_reap() has already done this. 988220137Strasz */ 989220137Strasz runtime = cputick2usec(p->p_rux.rux_runtime); 990220137Strasz#ifdef notyet 991220137Strasz KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 992220137Strasz#else 993220137Strasz if (runtime < p->p_prev_runtime) 994220137Strasz runtime = p->p_prev_runtime; 995220137Strasz#endif 996242139Strasz microuptime(&wallclock); 997242139Strasz timevalsub(&wallclock, &p->p_stats->p_start); 998242957Strasz if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 999242957Strasz pct_estimate = (1000000 * runtime * 100) / 1000242957Strasz ((uint64_t)wallclock.tv_sec * 1000000 + 1001242957Strasz wallclock.tv_usec); 1002242957Strasz } else 1003242957Strasz pct_estimate = 0; 1004242139Strasz pct = racct_getpcpu(p, pct_estimate); 1005242139Strasz 1006225364Strasz mtx_lock(&racct_lock); 1007225364Strasz racct_set_locked(p, RACCT_CPU, runtime); 1008242139Strasz racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); 1009220137Strasz 1010225364Strasz for (i = 0; i <= RACCT_MAX; i++) { 1011225364Strasz if (p->p_racct->r_resources[i] == 0) 1012225364Strasz continue; 1013225364Strasz if (!RACCT_IS_RECLAIMABLE(i)) 1014225364Strasz continue; 1015225364Strasz racct_set_locked(p, i, 0); 1016225364Strasz } 1017225364Strasz 1018225364Strasz mtx_unlock(&racct_lock); 1019220137Strasz PROC_UNLOCK(p); 1020220137Strasz 1021220137Strasz#ifdef RCTL 1022220137Strasz rctl_racct_release(p->p_racct); 1023220137Strasz#endif 1024220137Strasz racct_destroy(&p->p_racct); 1025220137Strasz} 1026220137Strasz 1027220137Strasz/* 1028220137Strasz * Called after credentials change, to move resource utilisation 1029220137Strasz * between raccts. 1030220137Strasz */ 1031220137Straszvoid 1032220137Straszracct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 1033220137Strasz struct ucred *newcred) 1034220137Strasz{ 1035220137Strasz struct uidinfo *olduip, *newuip; 1036220137Strasz struct loginclass *oldlc, *newlc; 1037220137Strasz struct prison *oldpr, *newpr, *pr; 1038220137Strasz 1039284665Strasz if (!racct_enable) 1040284665Strasz return; 1041284665Strasz 1042220137Strasz PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1043220137Strasz 1044220137Strasz newuip = newcred->cr_ruidinfo; 1045220137Strasz olduip = oldcred->cr_ruidinfo; 1046220137Strasz newlc = newcred->cr_loginclass; 1047220137Strasz oldlc = oldcred->cr_loginclass; 1048220137Strasz newpr = newcred->cr_prison; 1049220137Strasz oldpr = oldcred->cr_prison; 1050220137Strasz 1051220137Strasz mtx_lock(&racct_lock); 1052220137Strasz if (newuip != olduip) { 1053220137Strasz racct_sub_racct(olduip->ui_racct, p->p_racct); 1054220137Strasz racct_add_racct(newuip->ui_racct, p->p_racct); 1055220137Strasz } 1056220137Strasz if (newlc != oldlc) { 1057220137Strasz racct_sub_racct(oldlc->lc_racct, p->p_racct); 1058220137Strasz racct_add_racct(newlc->lc_racct, p->p_racct); 1059220137Strasz } 1060220137Strasz if (newpr != oldpr) { 1061220137Strasz for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 1062221362Strasz racct_sub_racct(pr->pr_prison_racct->prr_racct, 1063221362Strasz p->p_racct); 1064220137Strasz for (pr = newpr; pr != NULL; pr = pr->pr_parent) 1065221362Strasz racct_add_racct(pr->pr_prison_racct->prr_racct, 1066221362Strasz p->p_racct); 1067220137Strasz } 1068220137Strasz mtx_unlock(&racct_lock); 1069220137Strasz 1070220137Strasz#ifdef RCTL 1071220137Strasz rctl_proc_ucred_changed(p, newcred); 1072220137Strasz#endif 1073220137Strasz} 1074220137Strasz 1075232598Straszvoid 1076232598Straszracct_move(struct racct *dest, struct racct *src) 1077232598Strasz{ 1078232598Strasz 1079284665Strasz ASSERT_RACCT_ENABLED(); 1080284665Strasz 1081232598Strasz mtx_lock(&racct_lock); 1082232598Strasz 1083232598Strasz racct_add_racct(dest, src); 1084232598Strasz racct_sub_racct(src, src); 1085232598Strasz 1086232598Strasz mtx_unlock(&racct_lock); 1087232598Strasz} 1088232598Strasz 1089220137Straszstatic void 1090242139Straszracct_proc_throttle(struct proc *p) 1091242139Strasz{ 1092242139Strasz struct thread *td; 1093242139Strasz#ifdef SMP 1094242139Strasz int cpuid; 1095242139Strasz#endif 1096242139Strasz 1097284665Strasz ASSERT_RACCT_ENABLED(); 1098242139Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 1099242139Strasz 1100242139Strasz /* 1101242139Strasz * Do not block kernel processes. Also do not block processes with 1102242139Strasz * low %cpu utilization to improve interactivity. 1103242139Strasz */ 1104242139Strasz if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) || 1105242139Strasz (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold)) 1106242139Strasz return; 1107242139Strasz p->p_throttled = 1; 1108242139Strasz 1109242139Strasz FOREACH_THREAD_IN_PROC(p, td) { 1110248298Strasz thread_lock(td); 1111242139Strasz switch (td->td_state) { 1112242139Strasz case TDS_RUNQ: 1113242139Strasz /* 1114242139Strasz * If the thread is on the scheduler run-queue, we can 1115242139Strasz * not just remove it from there. So we set the flag 1116242139Strasz * TDF_NEEDRESCHED for the thread, so that once it is 1117242139Strasz * running, it is taken off the cpu as soon as possible. 1118242139Strasz */ 1119242139Strasz td->td_flags |= TDF_NEEDRESCHED; 1120242139Strasz break; 1121242139Strasz case TDS_RUNNING: 1122242139Strasz /* 1123242139Strasz * If the thread is running, we request a context 1124242139Strasz * switch for it by setting the TDF_NEEDRESCHED flag. 1125242139Strasz */ 1126242139Strasz td->td_flags |= TDF_NEEDRESCHED; 1127242139Strasz#ifdef SMP 1128242139Strasz cpuid = td->td_oncpu; 1129242139Strasz if ((cpuid != NOCPU) && (td != curthread)) 1130242139Strasz ipi_cpu(cpuid, IPI_AST); 1131242139Strasz#endif 1132242139Strasz break; 1133242139Strasz default: 1134242139Strasz break; 1135242139Strasz } 1136248298Strasz thread_unlock(td); 1137242139Strasz } 1138242139Strasz} 1139242139Strasz 1140242139Straszstatic void 1141242139Straszracct_proc_wakeup(struct proc *p) 1142242139Strasz{ 1143284665Strasz 1144284665Strasz ASSERT_RACCT_ENABLED(); 1145284665Strasz 1146242139Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 1147242139Strasz 1148242139Strasz if (p->p_throttled) { 1149242139Strasz p->p_throttled = 0; 1150242139Strasz wakeup(p->p_racct); 1151242139Strasz } 1152242139Strasz} 1153242139Strasz 1154242139Straszstatic void 1155242139Straszracct_decay_resource(struct racct *racct, void * res, void* dummy) 1156242139Strasz{ 1157242139Strasz int resource; 1158242139Strasz int64_t r_old, r_new; 1159242139Strasz 1160284665Strasz ASSERT_RACCT_ENABLED(); 1161284665Strasz 1162242139Strasz resource = *(int *)res; 1163242139Strasz r_old = racct->r_resources[resource]; 1164242139Strasz 1165242139Strasz /* If there is nothing to decay, just exit. */ 1166242139Strasz if (r_old <= 0) 1167242139Strasz return; 1168242139Strasz 1169242139Strasz mtx_lock(&racct_lock); 1170242139Strasz r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1171242139Strasz racct->r_resources[resource] = r_new; 1172242139Strasz mtx_unlock(&racct_lock); 1173242139Strasz} 1174242139Strasz 1175242139Straszstatic void 1176242139Straszracct_decay(int resource) 1177242139Strasz{ 1178284665Strasz 1179284665Strasz ASSERT_RACCT_ENABLED(); 1180284665Strasz 1181242139Strasz ui_racct_foreach(racct_decay_resource, &resource, NULL); 1182242139Strasz loginclass_racct_foreach(racct_decay_resource, &resource, NULL); 1183242139Strasz prison_racct_foreach(racct_decay_resource, &resource, NULL); 1184242139Strasz} 1185242139Strasz 1186242139Straszstatic void 1187220137Straszracctd(void) 1188220137Strasz{ 1189220137Strasz struct thread *td; 1190220137Strasz struct proc *p; 1191220137Strasz struct timeval wallclock; 1192220137Strasz uint64_t runtime; 1193242139Strasz uint64_t pct, pct_estimate; 1194220137Strasz 1195284665Strasz ASSERT_RACCT_ENABLED(); 1196284665Strasz 1197220137Strasz for (;;) { 1198242139Strasz racct_decay(RACCT_PCTCPU); 1199242139Strasz 1200220137Strasz sx_slock(&allproc_lock); 1201220137Strasz 1202242139Strasz LIST_FOREACH(p, &zombproc, p_list) { 1203242139Strasz PROC_LOCK(p); 1204242139Strasz racct_set(p, RACCT_PCTCPU, 0); 1205242139Strasz PROC_UNLOCK(p); 1206242139Strasz } 1207242139Strasz 1208220137Strasz FOREACH_PROC_IN_SYSTEM(p) { 1209242139Strasz PROC_LOCK(p); 1210242139Strasz if (p->p_state != PRS_NORMAL) { 1211242139Strasz PROC_UNLOCK(p); 1212220137Strasz continue; 1213242139Strasz } 1214220137Strasz 1215220137Strasz microuptime(&wallclock); 1216220137Strasz timevalsub(&wallclock, &p->p_stats->p_start); 1217293473Sdchagin PROC_STATLOCK(p); 1218232782Strasz FOREACH_THREAD_IN_PROC(p, td) 1219220137Strasz ruxagg(p, td); 1220220137Strasz runtime = cputick2usec(p->p_rux.rux_runtime); 1221293473Sdchagin PROC_STATUNLOCK(p); 1222220137Strasz#ifdef notyet 1223220137Strasz KASSERT(runtime >= p->p_prev_runtime, 1224220137Strasz ("runtime < p_prev_runtime")); 1225220137Strasz#else 1226220137Strasz if (runtime < p->p_prev_runtime) 1227220137Strasz runtime = p->p_prev_runtime; 1228220137Strasz#endif 1229220137Strasz p->p_prev_runtime = runtime; 1230242957Strasz if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1231242957Strasz pct_estimate = (1000000 * runtime * 100) / 1232242957Strasz ((uint64_t)wallclock.tv_sec * 1000000 + 1233242957Strasz wallclock.tv_usec); 1234242957Strasz } else 1235242957Strasz pct_estimate = 0; 1236242139Strasz pct = racct_getpcpu(p, pct_estimate); 1237220137Strasz mtx_lock(&racct_lock); 1238242139Strasz racct_set_force_locked(p, RACCT_PCTCPU, pct); 1239220137Strasz racct_set_locked(p, RACCT_CPU, runtime); 1240220137Strasz racct_set_locked(p, RACCT_WALLCLOCK, 1241233126Sjh (uint64_t)wallclock.tv_sec * 1000000 + 1242233126Sjh wallclock.tv_usec); 1243220137Strasz mtx_unlock(&racct_lock); 1244220137Strasz PROC_UNLOCK(p); 1245220137Strasz } 1246242139Strasz 1247242139Strasz /* 1248242139Strasz * To ensure that processes are throttled in a fair way, we need 1249242139Strasz * to iterate over all processes again and check the limits 1250242139Strasz * for %cpu resource only after ucred racct containers have been 1251242139Strasz * properly filled. 1252242139Strasz */ 1253242139Strasz FOREACH_PROC_IN_SYSTEM(p) { 1254242139Strasz PROC_LOCK(p); 1255242139Strasz if (p->p_state != PRS_NORMAL) { 1256242139Strasz PROC_UNLOCK(p); 1257242139Strasz continue; 1258242139Strasz } 1259242139Strasz 1260242139Strasz if (racct_pcpu_available(p) <= 0) 1261242139Strasz racct_proc_throttle(p); 1262242139Strasz else if (p->p_throttled) 1263242139Strasz racct_proc_wakeup(p); 1264242139Strasz PROC_UNLOCK(p); 1265242139Strasz } 1266220137Strasz sx_sunlock(&allproc_lock); 1267220137Strasz pause("-", hz); 1268220137Strasz } 1269220137Strasz} 1270220137Strasz 1271220137Straszstatic struct kproc_desc racctd_kp = { 1272220137Strasz "racctd", 1273220137Strasz racctd, 1274220137Strasz NULL 1275220137Strasz}; 1276220137Strasz 1277220137Straszstatic void 1278284665Straszracctd_init(void) 1279284665Strasz{ 1280284665Strasz if (!racct_enable) 1281284665Strasz return; 1282284665Strasz 1283284665Strasz kproc_start(&racctd_kp); 1284284665Strasz} 1285284665StraszSYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL); 1286284665Strasz 1287284665Straszstatic void 1288220137Straszracct_init(void) 1289220137Strasz{ 1290284665Strasz if (!racct_enable) 1291284665Strasz return; 1292220137Strasz 1293220137Strasz racct_zone = uma_zcreate("racct", sizeof(struct racct), 1294220137Strasz NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1295220137Strasz /* 1296220137Strasz * XXX: Move this somewhere. 1297220137Strasz */ 1298221362Strasz prison0.pr_prison_racct = prison_racct_find("0"); 1299220137Strasz} 1300220137StraszSYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 1301220137Strasz 1302220137Strasz#else /* !RACCT */ 1303220137Strasz 1304220137Straszint 1305220137Straszracct_add(struct proc *p, int resource, uint64_t amount) 1306220137Strasz{ 1307220137Strasz 1308220137Strasz return (0); 1309220137Strasz} 1310220137Strasz 1311220137Straszvoid 1312220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount) 1313220137Strasz{ 1314220137Strasz} 1315220137Strasz 1316220137Straszvoid 1317220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount) 1318220137Strasz{ 1319220137Strasz 1320220137Strasz return; 1321220137Strasz} 1322220137Strasz 1323220137Straszint 1324220137Straszracct_set(struct proc *p, int resource, uint64_t amount) 1325220137Strasz{ 1326220137Strasz 1327220137Strasz return (0); 1328220137Strasz} 1329220137Strasz 1330220137Straszvoid 1331220372Straszracct_set_force(struct proc *p, int resource, uint64_t amount) 1332220372Strasz{ 1333220372Strasz} 1334220372Strasz 1335220372Straszvoid 1336220137Straszracct_sub(struct proc *p, int resource, uint64_t amount) 1337220137Strasz{ 1338220137Strasz} 1339220137Strasz 1340220137Straszvoid 1341220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 1342220137Strasz{ 1343220137Strasz} 1344220137Strasz 1345220137Straszuint64_t 1346220137Straszracct_get_limit(struct proc *p, int resource) 1347220137Strasz{ 1348220137Strasz 1349220137Strasz return (UINT64_MAX); 1350220137Strasz} 1351220137Strasz 1352220372Straszuint64_t 1353220372Straszracct_get_available(struct proc *p, int resource) 1354220372Strasz{ 1355220372Strasz 1356220372Strasz return (UINT64_MAX); 1357220372Strasz} 1358220372Strasz 1359220137Straszvoid 1360220137Straszracct_create(struct racct **racctp) 1361220137Strasz{ 1362220137Strasz} 1363220137Strasz 1364220137Straszvoid 1365220137Straszracct_destroy(struct racct **racctp) 1366220137Strasz{ 1367220137Strasz} 1368220137Strasz 1369220137Straszint 1370220137Straszracct_proc_fork(struct proc *parent, struct proc *child) 1371220137Strasz{ 1372220137Strasz 1373220137Strasz return (0); 1374220137Strasz} 1375220137Strasz 1376220137Straszvoid 1377225940Straszracct_proc_fork_done(struct proc *child) 1378225940Strasz{ 1379225940Strasz} 1380225940Strasz 1381225940Straszvoid 1382220137Straszracct_proc_exit(struct proc *p) 1383220137Strasz{ 1384220137Strasz} 1385220137Strasz 1386220137Strasz#endif /* !RACCT */ 1387