kern_racct.c revision 258622
1220137Strasz/*- 2220137Strasz * Copyright (c) 2010 The FreeBSD Foundation 3220137Strasz * All rights reserved. 4220137Strasz * 5220137Strasz * This software was developed by Edward Tomasz Napierala under sponsorship 6220137Strasz * from the FreeBSD Foundation. 7220137Strasz * 8220137Strasz * Redistribution and use in source and binary forms, with or without 9220137Strasz * modification, are permitted provided that the following conditions 10220137Strasz * are met: 11220137Strasz * 1. Redistributions of source code must retain the above copyright 12220137Strasz * notice, this list of conditions and the following disclaimer. 13220137Strasz * 2. Redistributions in binary form must reproduce the above copyright 14220137Strasz * notice, this list of conditions and the following disclaimer in the 15220137Strasz * documentation and/or other materials provided with the distribution. 16220137Strasz * 17220137Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18220137Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19220137Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20220137Strasz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21220137Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22220137Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23220137Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24220137Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25220137Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26220137Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27220137Strasz * SUCH DAMAGE. 28220137Strasz * 29220137Strasz * $FreeBSD: head/sys/kern/kern_racct.c 258622 2013-11-26 08:46:27Z avg $ 30220137Strasz */ 31220137Strasz 32220137Strasz#include <sys/cdefs.h> 33220137Strasz__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 258622 2013-11-26 08:46:27Z avg $"); 34220137Strasz 35242139Strasz#include "opt_sched.h" 36220137Strasz 37220137Strasz#include <sys/param.h> 38228430Savg#include <sys/systm.h> 39220137Strasz#include <sys/eventhandler.h> 40220137Strasz#include <sys/jail.h> 41220137Strasz#include <sys/kernel.h> 42220137Strasz#include <sys/kthread.h> 43220137Strasz#include <sys/lock.h> 44220137Strasz#include <sys/loginclass.h> 45220137Strasz#include <sys/malloc.h> 46220137Strasz#include <sys/mutex.h> 47220137Strasz#include <sys/proc.h> 48220137Strasz#include <sys/racct.h> 49220137Strasz#include <sys/resourcevar.h> 50220137Strasz#include <sys/sbuf.h> 51220137Strasz#include <sys/sched.h> 52220137Strasz#include <sys/sdt.h> 53242139Strasz#include <sys/smp.h> 54220137Strasz#include <sys/sx.h> 55242139Strasz#include <sys/sysctl.h> 56220137Strasz#include <sys/sysent.h> 57220137Strasz#include <sys/sysproto.h> 58220137Strasz#include <sys/umtx.h> 59242139Strasz#include <machine/smp.h> 60220137Strasz 61220137Strasz#ifdef RCTL 62220137Strasz#include <sys/rctl.h> 63220137Strasz#endif 64220137Strasz 65220137Strasz#ifdef RACCT 66220137Strasz 67220137StraszFEATURE(racct, "Resource Accounting"); 68220137Strasz 69242139Strasz/* 70242139Strasz * Do not block processes that have their %cpu usage <= pcpu_threshold. 71242139Strasz */ 72242139Straszstatic int pcpu_threshold = 1; 73242139Strasz 74242139StraszSYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting"); 75242139StraszSYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 76242139Strasz 0, "Processes with higher %cpu usage than this value can be throttled."); 77242139Strasz 78242139Strasz/* 79242139Strasz * How many seconds it takes to use the scheduler %cpu calculations. When a 80242139Strasz * process starts, we compute its %cpu usage by dividing its runtime by the 81242139Strasz * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 82242139Strasz * provided by the scheduler. 83242139Strasz */ 84242139Strasz#define RACCT_PCPU_SECS 3 85242139Strasz 86220137Straszstatic struct mtx racct_lock; 87220137StraszMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 88220137Strasz 89220137Straszstatic uma_zone_t racct_zone; 90220137Strasz 91220137Straszstatic void racct_sub_racct(struct racct *dest, const struct racct *src); 92220137Straszstatic void racct_sub_cred_locked(struct ucred *cred, int resource, 93220137Strasz uint64_t amount); 94220137Straszstatic void racct_add_cred_locked(struct ucred *cred, int resource, 95220137Strasz uint64_t amount); 96220137Strasz 97220137StraszSDT_PROVIDER_DEFINE(racct); 98258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add, "struct proc *", "int", 99220137Strasz "uint64_t"); 100258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add__failure, 101220137Strasz "struct proc *", "int", "uint64_t"); 102258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add__cred, "struct ucred *", 103220137Strasz "int", "uint64_t"); 104258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, add__force, "struct proc *", 105220137Strasz "int", "uint64_t"); 106258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, set, "struct proc *", "int", 107220137Strasz "uint64_t"); 108258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, set__failure, 109220137Strasz "struct proc *", "int", "uint64_t"); 110258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, "struct proc *", "int", 111220137Strasz "uint64_t"); 112258622SavgSDT_PROBE_DEFINE3(racct, kernel, rusage, sub__cred, "struct ucred *", 113220137Strasz "int", "uint64_t"); 114258622SavgSDT_PROBE_DEFINE1(racct, kernel, racct, create, "struct racct *"); 115258622SavgSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, "struct racct *"); 116258622SavgSDT_PROBE_DEFINE2(racct, kernel, racct, join, "struct racct *", 117220137Strasz "struct racct *"); 118258622SavgSDT_PROBE_DEFINE2(racct, kernel, racct, join__failure, 119220137Strasz "struct racct *", "struct racct *"); 120258622SavgSDT_PROBE_DEFINE2(racct, kernel, racct, leave, "struct racct *", 121220137Strasz "struct racct *"); 122220137Strasz 123220137Straszint racct_types[] = { 124220137Strasz [RACCT_CPU] = 125224036Strasz RACCT_IN_MILLIONS, 126220137Strasz [RACCT_DATA] = 127220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 128220137Strasz [RACCT_STACK] = 129220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 130220137Strasz [RACCT_CORE] = 131220137Strasz RACCT_DENIABLE, 132220137Strasz [RACCT_RSS] = 133220137Strasz RACCT_RECLAIMABLE, 134220137Strasz [RACCT_MEMLOCK] = 135220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 136220137Strasz [RACCT_NPROC] = 137220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 138220137Strasz [RACCT_NOFILE] = 139220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 140220137Strasz [RACCT_VMEM] = 141220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 142220137Strasz [RACCT_NPTS] = 143220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 144220137Strasz [RACCT_SWAP] = 145220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 146220137Strasz [RACCT_NTHR] = 147220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 148220137Strasz [RACCT_MSGQQUEUED] = 149220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 150220137Strasz [RACCT_MSGQSIZE] = 151220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 152220137Strasz [RACCT_NMSGQ] = 153220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 154220137Strasz [RACCT_NSEM] = 155220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 156220137Strasz [RACCT_NSEMOP] = 157220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 158220137Strasz [RACCT_NSHM] = 159220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 160220137Strasz [RACCT_SHMSIZE] = 161220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 162220137Strasz [RACCT_WALLCLOCK] = 163242139Strasz RACCT_IN_MILLIONS, 164242139Strasz [RACCT_PCTCPU] = 165242139Strasz RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS }; 166220137Strasz 167242139Straszstatic const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 168242139Strasz 169242139Strasz#ifdef SCHED_4BSD 170242139Strasz/* 171242139Strasz * Contains intermediate values for %cpu calculations to avoid using floating 172242139Strasz * point in the kernel. 173242139Strasz * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 174242139Strasz * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 175242139Strasz * zero so the calculations are more straightforward. 176242139Strasz */ 177242139Straszfixpt_t ccpu_exp[] = { 178242139Strasz [0] = FSCALE * 1, 179242139Strasz [1] = FSCALE * 0.95122942450071400909, 180242139Strasz [2] = FSCALE * 0.90483741803595957316, 181242139Strasz [3] = FSCALE * 0.86070797642505780722, 182242139Strasz [4] = FSCALE * 0.81873075307798185866, 183242139Strasz [5] = FSCALE * 0.77880078307140486824, 184242139Strasz [6] = FSCALE * 0.74081822068171786606, 185242139Strasz [7] = FSCALE * 0.70468808971871343435, 186242139Strasz [8] = FSCALE * 0.67032004603563930074, 187242139Strasz [9] = FSCALE * 0.63762815162177329314, 188242139Strasz [10] = FSCALE * 0.60653065971263342360, 189242139Strasz [11] = FSCALE * 0.57694981038048669531, 190242139Strasz [12] = FSCALE * 0.54881163609402643262, 191242139Strasz [13] = FSCALE * 0.52204577676101604789, 192242139Strasz [14] = FSCALE * 0.49658530379140951470, 193242139Strasz [15] = FSCALE * 0.47236655274101470713, 194242139Strasz [16] = FSCALE * 0.44932896411722159143, 195242139Strasz [17] = FSCALE * 0.42741493194872666992, 196242139Strasz [18] = FSCALE * 0.40656965974059911188, 197242139Strasz [19] = FSCALE * 0.38674102345450120691, 198242139Strasz [20] = FSCALE * 0.36787944117144232159, 199242139Strasz [21] = FSCALE * 0.34993774911115535467, 200242139Strasz [22] = FSCALE * 0.33287108369807955328, 201242139Strasz [23] = FSCALE * 0.31663676937905321821, 202242139Strasz [24] = FSCALE * 0.30119421191220209664, 203242139Strasz [25] = FSCALE * 0.28650479686019010032, 204242139Strasz [26] = FSCALE * 0.27253179303401260312, 205242139Strasz [27] = FSCALE * 0.25924026064589150757, 206242139Strasz [28] = FSCALE * 0.24659696394160647693, 207242139Strasz [29] = FSCALE * 0.23457028809379765313, 208242139Strasz [30] = FSCALE * 0.22313016014842982893, 209242139Strasz [31] = FSCALE * 0.21224797382674305771, 210242139Strasz [32] = FSCALE * 0.20189651799465540848, 211242139Strasz [33] = FSCALE * 0.19204990862075411423, 212242139Strasz [34] = FSCALE * 0.18268352405273465022, 213242139Strasz [35] = FSCALE * 0.17377394345044512668, 214242139Strasz [36] = FSCALE * 0.16529888822158653829, 215242139Strasz [37] = FSCALE * 0.15723716631362761621, 216242139Strasz [38] = FSCALE * 0.14956861922263505264, 217242139Strasz [39] = FSCALE * 0.14227407158651357185, 218242139Strasz [40] = FSCALE * 0.13533528323661269189, 219242139Strasz [41] = FSCALE * 0.12873490358780421886, 220242139Strasz [42] = FSCALE * 0.12245642825298191021, 221242139Strasz [43] = FSCALE * 0.11648415777349695786, 222242139Strasz [44] = FSCALE * 0.11080315836233388333, 223242139Strasz [45] = FSCALE * 0.10539922456186433678, 224242139Strasz [46] = FSCALE * 0.10025884372280373372, 225242139Strasz [47] = FSCALE * 0.09536916221554961888, 226242139Strasz [48] = FSCALE * 0.09071795328941250337, 227242139Strasz [49] = FSCALE * 0.08629358649937051097, 228242139Strasz [50] = FSCALE * 0.08208499862389879516, 229242139Strasz [51] = FSCALE * 0.07808166600115315231, 230242139Strasz [52] = FSCALE * 0.07427357821433388042, 231242139Strasz [53] = FSCALE * 0.07065121306042958674, 232242139Strasz [54] = FSCALE * 0.06720551273974976512, 233242139Strasz [55] = FSCALE * 0.06392786120670757270, 234242139Strasz [56] = FSCALE * 0.06081006262521796499, 235242139Strasz [57] = FSCALE * 0.05784432087483846296, 236242139Strasz [58] = FSCALE * 0.05502322005640722902, 237242139Strasz [59] = FSCALE * 0.05233970594843239308, 238242139Strasz [60] = FSCALE * 0.04978706836786394297, 239242139Strasz [61] = FSCALE * 0.04735892439114092119, 240242139Strasz [62] = FSCALE * 0.04504920239355780606, 241242139Strasz [63] = FSCALE * 0.04285212686704017991, 242242139Strasz [64] = FSCALE * 0.04076220397836621516, 243242139Strasz [65] = FSCALE * 0.03877420783172200988, 244242139Strasz [66] = FSCALE * 0.03688316740124000544, 245242139Strasz [67] = FSCALE * 0.03508435410084502588, 246242139Strasz [68] = FSCALE * 0.03337326996032607948, 247242139Strasz [69] = FSCALE * 0.03174563637806794323, 248242139Strasz [70] = FSCALE * 0.03019738342231850073, 249242139Strasz [71] = FSCALE * 0.02872463965423942912, 250242139Strasz [72] = FSCALE * 0.02732372244729256080, 251242139Strasz [73] = FSCALE * 0.02599112877875534358, 252242139Strasz [74] = FSCALE * 0.02472352647033939120, 253242139Strasz [75] = FSCALE * 0.02351774585600910823, 254242139Strasz [76] = FSCALE * 0.02237077185616559577, 255242139Strasz [77] = FSCALE * 0.02127973643837716938, 256242139Strasz [78] = FSCALE * 0.02024191144580438847, 257242139Strasz [79] = FSCALE * 0.01925470177538692429, 258242139Strasz [80] = FSCALE * 0.01831563888873418029, 259242139Strasz [81] = FSCALE * 0.01742237463949351138, 260242139Strasz [82] = FSCALE * 0.01657267540176124754, 261242139Strasz [83] = FSCALE * 0.01576441648485449082, 262242139Strasz [84] = FSCALE * 0.01499557682047770621, 263242139Strasz [85] = FSCALE * 0.01426423390899925527, 264242139Strasz [86] = FSCALE * 0.01356855901220093175, 265242139Strasz [87] = FSCALE * 0.01290681258047986886, 266242139Strasz [88] = FSCALE * 0.01227733990306844117, 267242139Strasz [89] = FSCALE * 0.01167856697039544521, 268242139Strasz [90] = FSCALE * 0.01110899653824230649, 269242139Strasz [91] = FSCALE * 0.01056720438385265337, 270242139Strasz [92] = FSCALE * 0.01005183574463358164, 271242139Strasz [93] = FSCALE * 0.00956160193054350793, 272242139Strasz [94] = FSCALE * 0.00909527710169581709, 273242139Strasz [95] = FSCALE * 0.00865169520312063417, 274242139Strasz [96] = FSCALE * 0.00822974704902002884, 275242139Strasz [97] = FSCALE * 0.00782837754922577143, 276242139Strasz [98] = FSCALE * 0.00744658307092434051, 277242139Strasz [99] = FSCALE * 0.00708340892905212004, 278242139Strasz [100] = FSCALE * 0.00673794699908546709, 279242139Strasz [101] = FSCALE * 0.00640933344625638184, 280242139Strasz [102] = FSCALE * 0.00609674656551563610, 281242139Strasz [103] = FSCALE * 0.00579940472684214321, 282242139Strasz [104] = FSCALE * 0.00551656442076077241, 283242139Strasz [105] = FSCALE * 0.00524751839918138427, 284242139Strasz [106] = FSCALE * 0.00499159390691021621, 285242139Strasz [107] = FSCALE * 0.00474815099941147558, 286242139Strasz [108] = FSCALE * 0.00451658094261266798, 287242139Strasz [109] = FSCALE * 0.00429630469075234057, 288242139Strasz [110] = FSCALE * 0.00408677143846406699, 289242139Strasz}; 290242139Strasz#endif 291242139Strasz 292242139Strasz#define CCPU_EXP_MAX 110 293242139Strasz 294242139Strasz/* 295242139Strasz * This function is analogical to the getpcpu() function in the ps(1) command. 296242139Strasz * They should both calculate in the same way so that the racct %cpu 297242139Strasz * calculations are consistent with the values showed by the ps(1) tool. 298242139Strasz * The calculations are more complex in the 4BSD scheduler because of the value 299242139Strasz * of the ccpu variable. In ULE it is defined to be zero which saves us some 300242139Strasz * work. 301242139Strasz */ 302242139Straszstatic uint64_t 303242139Straszracct_getpcpu(struct proc *p, u_int pcpu) 304242139Strasz{ 305242139Strasz u_int swtime; 306242139Strasz#ifdef SCHED_4BSD 307242139Strasz fixpt_t pctcpu, pctcpu_next; 308242139Strasz#endif 309242139Strasz#ifdef SMP 310242139Strasz struct pcpu *pc; 311242139Strasz int found; 312242139Strasz#endif 313242139Strasz fixpt_t p_pctcpu; 314242139Strasz struct thread *td; 315242139Strasz 316242139Strasz /* 317242139Strasz * If the process is swapped out, we count its %cpu usage as zero. 318242139Strasz * This behaviour is consistent with the userland ps(1) tool. 319242139Strasz */ 320242139Strasz if ((p->p_flag & P_INMEM) == 0) 321242139Strasz return (0); 322242139Strasz swtime = (ticks - p->p_swtick) / hz; 323242139Strasz 324242139Strasz /* 325242139Strasz * For short-lived processes, the sched_pctcpu() returns small 326242139Strasz * values even for cpu intensive processes. Therefore we use 327242139Strasz * our own estimate in this case. 328242139Strasz */ 329242139Strasz if (swtime < RACCT_PCPU_SECS) 330242139Strasz return (pcpu); 331242139Strasz 332242139Strasz p_pctcpu = 0; 333242139Strasz FOREACH_THREAD_IN_PROC(p, td) { 334242139Strasz if (td == PCPU_GET(idlethread)) 335242139Strasz continue; 336242139Strasz#ifdef SMP 337242139Strasz found = 0; 338242139Strasz STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 339242139Strasz if (td == pc->pc_idlethread) { 340242139Strasz found = 1; 341242139Strasz break; 342242139Strasz } 343242139Strasz } 344242139Strasz if (found) 345242139Strasz continue; 346242139Strasz#endif 347242139Strasz thread_lock(td); 348242139Strasz#ifdef SCHED_4BSD 349242139Strasz pctcpu = sched_pctcpu(td); 350242139Strasz /* Count also the yet unfinished second. */ 351242139Strasz pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 352242139Strasz pctcpu_next += sched_pctcpu_delta(td); 353242139Strasz p_pctcpu += max(pctcpu, pctcpu_next); 354242139Strasz#else 355242139Strasz /* 356242139Strasz * In ULE the %cpu statistics are updated on every 357242139Strasz * sched_pctcpu() call. So special calculations to 358242139Strasz * account for the latest (unfinished) second are 359242139Strasz * not needed. 360242139Strasz */ 361242139Strasz p_pctcpu += sched_pctcpu(td); 362242139Strasz#endif 363242139Strasz thread_unlock(td); 364242139Strasz } 365242139Strasz 366242139Strasz#ifdef SCHED_4BSD 367242139Strasz if (swtime <= CCPU_EXP_MAX) 368242139Strasz return ((100 * (uint64_t)p_pctcpu * 1000000) / 369242139Strasz (FSCALE - ccpu_exp[swtime])); 370242139Strasz#endif 371242139Strasz 372242139Strasz return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 373242139Strasz} 374242139Strasz 375220137Straszstatic void 376220137Straszracct_add_racct(struct racct *dest, const struct racct *src) 377220137Strasz{ 378220137Strasz int i; 379220137Strasz 380220137Strasz mtx_assert(&racct_lock, MA_OWNED); 381220137Strasz 382220137Strasz /* 383220137Strasz * Update resource usage in dest. 384220137Strasz */ 385220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 386220137Strasz KASSERT(dest->r_resources[i] >= 0, 387243088Strasz ("%s: resource %d propagation meltdown: dest < 0", 388243088Strasz __func__, i)); 389220137Strasz KASSERT(src->r_resources[i] >= 0, 390243088Strasz ("%s: resource %d propagation meltdown: src < 0", 391243088Strasz __func__, i)); 392220137Strasz dest->r_resources[i] += src->r_resources[i]; 393220137Strasz } 394220137Strasz} 395220137Strasz 396220137Straszstatic void 397220137Straszracct_sub_racct(struct racct *dest, const struct racct *src) 398220137Strasz{ 399220137Strasz int i; 400220137Strasz 401220137Strasz mtx_assert(&racct_lock, MA_OWNED); 402220137Strasz 403220137Strasz /* 404220137Strasz * Update resource usage in dest. 405220137Strasz */ 406220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 407243070Strasz if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) { 408220137Strasz KASSERT(dest->r_resources[i] >= 0, 409243088Strasz ("%s: resource %d propagation meltdown: dest < 0", 410243088Strasz __func__, i)); 411220137Strasz KASSERT(src->r_resources[i] >= 0, 412243088Strasz ("%s: resource %d propagation meltdown: src < 0", 413243088Strasz __func__, i)); 414220137Strasz KASSERT(src->r_resources[i] <= dest->r_resources[i], 415243088Strasz ("%s: resource %d propagation meltdown: src > dest", 416243088Strasz __func__, i)); 417220137Strasz } 418242139Strasz if (RACCT_CAN_DROP(i)) { 419220137Strasz dest->r_resources[i] -= src->r_resources[i]; 420220137Strasz if (dest->r_resources[i] < 0) { 421243070Strasz KASSERT(RACCT_IS_SLOPPY(i) || 422243070Strasz RACCT_IS_DECAYING(i), 423243088Strasz ("%s: resource %d usage < 0", __func__, i)); 424220137Strasz dest->r_resources[i] = 0; 425220137Strasz } 426220137Strasz } 427220137Strasz } 428220137Strasz} 429220137Strasz 430220137Straszvoid 431220137Straszracct_create(struct racct **racctp) 432220137Strasz{ 433220137Strasz 434220137Strasz SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0); 435220137Strasz 436220137Strasz KASSERT(*racctp == NULL, ("racct already allocated")); 437220137Strasz 438220137Strasz *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 439220137Strasz} 440220137Strasz 441220137Straszstatic void 442220137Straszracct_destroy_locked(struct racct **racctp) 443220137Strasz{ 444220137Strasz int i; 445220137Strasz struct racct *racct; 446220137Strasz 447220137Strasz SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0); 448220137Strasz 449220137Strasz mtx_assert(&racct_lock, MA_OWNED); 450220137Strasz KASSERT(racctp != NULL, ("NULL racctp")); 451220137Strasz KASSERT(*racctp != NULL, ("NULL racct")); 452220137Strasz 453220137Strasz racct = *racctp; 454220137Strasz 455220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 456223844Strasz if (RACCT_IS_SLOPPY(i)) 457220137Strasz continue; 458223844Strasz if (!RACCT_IS_RECLAIMABLE(i)) 459220137Strasz continue; 460220137Strasz KASSERT(racct->r_resources[i] == 0, 461220137Strasz ("destroying non-empty racct: " 462220137Strasz "%ju allocated for resource %d\n", 463220137Strasz racct->r_resources[i], i)); 464220137Strasz } 465220137Strasz uma_zfree(racct_zone, racct); 466220137Strasz *racctp = NULL; 467220137Strasz} 468220137Strasz 469220137Straszvoid 470220137Straszracct_destroy(struct racct **racct) 471220137Strasz{ 472220137Strasz 473220137Strasz mtx_lock(&racct_lock); 474220137Strasz racct_destroy_locked(racct); 475220137Strasz mtx_unlock(&racct_lock); 476220137Strasz} 477220137Strasz 478220137Strasz/* 479220137Strasz * Increase consumption of 'resource' by 'amount' for 'racct' 480220137Strasz * and all its parents. Differently from other cases, 'amount' here 481220137Strasz * may be less than zero. 482220137Strasz */ 483220137Straszstatic void 484220137Straszracct_alloc_resource(struct racct *racct, int resource, 485220137Strasz uint64_t amount) 486220137Strasz{ 487220137Strasz 488220137Strasz mtx_assert(&racct_lock, MA_OWNED); 489220137Strasz KASSERT(racct != NULL, ("NULL racct")); 490220137Strasz 491220137Strasz racct->r_resources[resource] += amount; 492220137Strasz if (racct->r_resources[resource] < 0) { 493242139Strasz KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), 494243088Strasz ("%s: resource %d usage < 0", __func__, resource)); 495220137Strasz racct->r_resources[resource] = 0; 496220137Strasz } 497242139Strasz 498242139Strasz /* 499242139Strasz * There are some cases where the racct %cpu resource would grow 500242139Strasz * beyond 100%. 501242139Strasz * For example in racct_proc_exit() we add the process %cpu usage 502242139Strasz * to the ucred racct containers. If too many processes terminated 503242139Strasz * in a short time span, the ucred %cpu resource could grow too much. 504242139Strasz * Also, the 4BSD scheduler sometimes returns for a thread more than 505242139Strasz * 100% cpu usage. So we set a boundary here to 100%. 506242139Strasz */ 507242139Strasz if ((resource == RACCT_PCTCPU) && 508242139Strasz (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000)) 509242139Strasz racct->r_resources[RACCT_PCTCPU] = 100 * 1000000; 510220137Strasz} 511220137Strasz 512225944Straszstatic int 513225944Straszracct_add_locked(struct proc *p, int resource, uint64_t amount) 514220137Strasz{ 515220137Strasz#ifdef RCTL 516220137Strasz int error; 517220137Strasz#endif 518220137Strasz 519220137Strasz SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0); 520220137Strasz 521220137Strasz /* 522220137Strasz * We need proc lock to dereference p->p_ucred. 523220137Strasz */ 524220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 525220137Strasz 526220137Strasz#ifdef RCTL 527220137Strasz error = rctl_enforce(p, resource, amount); 528223844Strasz if (error && RACCT_IS_DENIABLE(resource)) { 529258622Savg SDT_PROBE(racct, kernel, rusage, add__failure, p, resource, 530220137Strasz amount, 0, 0); 531220137Strasz return (error); 532220137Strasz } 533220137Strasz#endif 534220137Strasz racct_alloc_resource(p->p_racct, resource, amount); 535220137Strasz racct_add_cred_locked(p->p_ucred, resource, amount); 536220137Strasz 537220137Strasz return (0); 538220137Strasz} 539220137Strasz 540225944Strasz/* 541225944Strasz * Increase allocation of 'resource' by 'amount' for process 'p'. 542225944Strasz * Return 0 if it's below limits, or errno, if it's not. 543225944Strasz */ 544225944Straszint 545225944Straszracct_add(struct proc *p, int resource, uint64_t amount) 546225944Strasz{ 547225944Strasz int error; 548225944Strasz 549225944Strasz mtx_lock(&racct_lock); 550225944Strasz error = racct_add_locked(p, resource, amount); 551225944Strasz mtx_unlock(&racct_lock); 552225944Strasz return (error); 553225944Strasz} 554225944Strasz 555220137Straszstatic void 556220137Straszracct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 557220137Strasz{ 558220137Strasz struct prison *pr; 559220137Strasz 560258622Savg SDT_PROBE(racct, kernel, rusage, add__cred, cred, resource, amount, 561220137Strasz 0, 0); 562220137Strasz 563220137Strasz racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 564220137Strasz for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 565221362Strasz racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 566221362Strasz amount); 567220137Strasz racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount); 568220137Strasz} 569220137Strasz 570220137Strasz/* 571220137Strasz * Increase allocation of 'resource' by 'amount' for credential 'cred'. 572220137Strasz * Doesn't check for limits and never fails. 573220137Strasz * 574220137Strasz * XXX: Shouldn't this ever return an error? 575220137Strasz */ 576220137Straszvoid 577220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount) 578220137Strasz{ 579220137Strasz 580220137Strasz mtx_lock(&racct_lock); 581220137Strasz racct_add_cred_locked(cred, resource, amount); 582220137Strasz mtx_unlock(&racct_lock); 583220137Strasz} 584220137Strasz 585220137Strasz/* 586220137Strasz * Increase allocation of 'resource' by 'amount' for process 'p'. 587220137Strasz * Doesn't check for limits and never fails. 588220137Strasz */ 589220137Straszvoid 590220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount) 591220137Strasz{ 592220137Strasz 593258622Savg SDT_PROBE(racct, kernel, rusage, add__force, p, resource, amount, 0, 0); 594220137Strasz 595220137Strasz /* 596220137Strasz * We need proc lock to dereference p->p_ucred. 597220137Strasz */ 598220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 599220137Strasz 600220137Strasz mtx_lock(&racct_lock); 601220137Strasz racct_alloc_resource(p->p_racct, resource, amount); 602220137Strasz mtx_unlock(&racct_lock); 603220137Strasz racct_add_cred(p->p_ucred, resource, amount); 604220137Strasz} 605220137Strasz 606220137Straszstatic int 607220137Straszracct_set_locked(struct proc *p, int resource, uint64_t amount) 608220137Strasz{ 609242139Strasz int64_t old_amount, decayed_amount; 610242139Strasz int64_t diff_proc, diff_cred; 611220137Strasz#ifdef RCTL 612220137Strasz int error; 613220137Strasz#endif 614220137Strasz 615220137Strasz SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 616220137Strasz 617220137Strasz /* 618220137Strasz * We need proc lock to dereference p->p_ucred. 619220137Strasz */ 620220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 621220137Strasz 622242139Strasz old_amount = p->p_racct->r_resources[resource]; 623242139Strasz /* 624242139Strasz * The diffs may be negative. 625242139Strasz */ 626242139Strasz diff_proc = amount - old_amount; 627242139Strasz if (RACCT_IS_DECAYING(resource)) { 628242139Strasz /* 629242139Strasz * Resources in per-credential racct containers may decay. 630242139Strasz * If this is the case, we need to calculate the difference 631242139Strasz * between the new amount and the proportional value of the 632242139Strasz * old amount that has decayed in the ucred racct containers. 633242139Strasz */ 634242139Strasz decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 635242139Strasz diff_cred = amount - decayed_amount; 636242139Strasz } else 637242139Strasz diff_cred = diff_proc; 638220137Strasz#ifdef notyet 639242139Strasz KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 640243088Strasz ("%s: usage of non-droppable resource %d dropping", __func__, 641220137Strasz resource)); 642220137Strasz#endif 643220137Strasz#ifdef RCTL 644242139Strasz if (diff_proc > 0) { 645242139Strasz error = rctl_enforce(p, resource, diff_proc); 646223844Strasz if (error && RACCT_IS_DENIABLE(resource)) { 647258622Savg SDT_PROBE(racct, kernel, rusage, set__failure, p, 648220137Strasz resource, amount, 0, 0); 649220137Strasz return (error); 650220137Strasz } 651220137Strasz } 652220137Strasz#endif 653242139Strasz racct_alloc_resource(p->p_racct, resource, diff_proc); 654242139Strasz if (diff_cred > 0) 655242139Strasz racct_add_cred_locked(p->p_ucred, resource, diff_cred); 656242139Strasz else if (diff_cred < 0) 657242139Strasz racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 658220137Strasz 659220137Strasz return (0); 660220137Strasz} 661220137Strasz 662220137Strasz/* 663220137Strasz * Set allocation of 'resource' to 'amount' for process 'p'. 664220137Strasz * Return 0 if it's below limits, or errno, if it's not. 665220137Strasz * 666220137Strasz * Note that decreasing the allocation always returns 0, 667220137Strasz * even if it's above the limit. 668220137Strasz */ 669220137Straszint 670220137Straszracct_set(struct proc *p, int resource, uint64_t amount) 671220137Strasz{ 672220137Strasz int error; 673220137Strasz 674220137Strasz mtx_lock(&racct_lock); 675220137Strasz error = racct_set_locked(p, resource, amount); 676220137Strasz mtx_unlock(&racct_lock); 677220137Strasz return (error); 678220137Strasz} 679220137Strasz 680242139Straszstatic void 681242139Straszracct_set_force_locked(struct proc *p, int resource, uint64_t amount) 682220137Strasz{ 683242139Strasz int64_t old_amount, decayed_amount; 684242139Strasz int64_t diff_proc, diff_cred; 685220137Strasz 686220137Strasz SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 687220137Strasz 688220137Strasz /* 689220137Strasz * We need proc lock to dereference p->p_ucred. 690220137Strasz */ 691220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 692220137Strasz 693242139Strasz old_amount = p->p_racct->r_resources[resource]; 694242139Strasz /* 695242139Strasz * The diffs may be negative. 696242139Strasz */ 697242139Strasz diff_proc = amount - old_amount; 698242139Strasz if (RACCT_IS_DECAYING(resource)) { 699242139Strasz /* 700242139Strasz * Resources in per-credential racct containers may decay. 701242139Strasz * If this is the case, we need to calculate the difference 702242139Strasz * between the new amount and the proportional value of the 703242139Strasz * old amount that has decayed in the ucred racct containers. 704242139Strasz */ 705242139Strasz decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 706242139Strasz diff_cred = amount - decayed_amount; 707242139Strasz } else 708242139Strasz diff_cred = diff_proc; 709242139Strasz 710242139Strasz racct_alloc_resource(p->p_racct, resource, diff_proc); 711242139Strasz if (diff_cred > 0) 712242139Strasz racct_add_cred_locked(p->p_ucred, resource, diff_cred); 713242139Strasz else if (diff_cred < 0) 714242139Strasz racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 715242139Strasz} 716242139Strasz 717242139Straszvoid 718242139Straszracct_set_force(struct proc *p, int resource, uint64_t amount) 719242139Strasz{ 720220137Strasz mtx_lock(&racct_lock); 721242139Strasz racct_set_force_locked(p, resource, amount); 722220137Strasz mtx_unlock(&racct_lock); 723220137Strasz} 724220137Strasz 725220137Strasz/* 726220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated. 727220137Strasz * Allocating more than that would be denied, unless the resource 728220137Strasz * is marked undeniable. Amount of already allocated resource does 729220137Strasz * not matter. 730220137Strasz */ 731220137Straszuint64_t 732220137Straszracct_get_limit(struct proc *p, int resource) 733220137Strasz{ 734220137Strasz 735220137Strasz#ifdef RCTL 736220137Strasz return (rctl_get_limit(p, resource)); 737220137Strasz#else 738220137Strasz return (UINT64_MAX); 739220137Strasz#endif 740220137Strasz} 741220137Strasz 742220137Strasz/* 743220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated. 744220137Strasz * Allocating more than that would be denied, unless the resource 745220137Strasz * is marked undeniable. Amount of already allocated resource does 746220137Strasz * matter. 747220137Strasz */ 748220137Straszuint64_t 749220137Straszracct_get_available(struct proc *p, int resource) 750220137Strasz{ 751220137Strasz 752220137Strasz#ifdef RCTL 753220137Strasz return (rctl_get_available(p, resource)); 754220137Strasz#else 755220137Strasz return (UINT64_MAX); 756220137Strasz#endif 757220137Strasz} 758220137Strasz 759220137Strasz/* 760242139Strasz * Returns amount of the %cpu resource that process 'p' can add to its %cpu 761242139Strasz * utilization. Adding more than that would lead to the process being 762242139Strasz * throttled. 763242139Strasz */ 764242139Straszstatic int64_t 765242139Straszracct_pcpu_available(struct proc *p) 766242139Strasz{ 767242139Strasz 768242139Strasz#ifdef RCTL 769242139Strasz return (rctl_pcpu_available(p)); 770242139Strasz#else 771242139Strasz return (INT64_MAX); 772242139Strasz#endif 773242139Strasz} 774242139Strasz 775242139Strasz/* 776220137Strasz * Decrease allocation of 'resource' by 'amount' for process 'p'. 777220137Strasz */ 778220137Straszvoid 779220137Straszracct_sub(struct proc *p, int resource, uint64_t amount) 780220137Strasz{ 781220137Strasz 782220137Strasz SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); 783220137Strasz 784220137Strasz /* 785220137Strasz * We need proc lock to dereference p->p_ucred. 786220137Strasz */ 787220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 788242139Strasz KASSERT(RACCT_CAN_DROP(resource), 789243088Strasz ("%s: called for non-droppable resource %d", __func__, resource)); 790220137Strasz 791220137Strasz mtx_lock(&racct_lock); 792220137Strasz KASSERT(amount <= p->p_racct->r_resources[resource], 793243088Strasz ("%s: freeing %ju of resource %d, which is more " 794243088Strasz "than allocated %jd for %s (pid %d)", __func__, amount, resource, 795220137Strasz (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 796220137Strasz 797220137Strasz racct_alloc_resource(p->p_racct, resource, -amount); 798220137Strasz racct_sub_cred_locked(p->p_ucred, resource, amount); 799220137Strasz mtx_unlock(&racct_lock); 800220137Strasz} 801220137Strasz 802220137Straszstatic void 803220137Straszracct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 804220137Strasz{ 805220137Strasz struct prison *pr; 806220137Strasz 807258622Savg SDT_PROBE(racct, kernel, rusage, sub__cred, cred, resource, amount, 808220137Strasz 0, 0); 809220137Strasz 810220137Strasz#ifdef notyet 811242139Strasz KASSERT(RACCT_CAN_DROP(resource), 812243088Strasz ("%s: called for resource %d which can not drop", __func__, 813220137Strasz resource)); 814220137Strasz#endif 815220137Strasz 816220137Strasz racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 817220137Strasz for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 818221362Strasz racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 819221362Strasz -amount); 820220137Strasz racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); 821220137Strasz} 822220137Strasz 823220137Strasz/* 824220137Strasz * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 825220137Strasz */ 826220137Straszvoid 827220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 828220137Strasz{ 829220137Strasz 830220137Strasz mtx_lock(&racct_lock); 831220137Strasz racct_sub_cred_locked(cred, resource, amount); 832220137Strasz mtx_unlock(&racct_lock); 833220137Strasz} 834220137Strasz 835220137Strasz/* 836220137Strasz * Inherit resource usage information from the parent process. 837220137Strasz */ 838220137Straszint 839220137Straszracct_proc_fork(struct proc *parent, struct proc *child) 840220137Strasz{ 841220137Strasz int i, error = 0; 842220137Strasz 843220137Strasz /* 844220137Strasz * Create racct for the child process. 845220137Strasz */ 846220137Strasz racct_create(&child->p_racct); 847220137Strasz 848220137Strasz PROC_LOCK(parent); 849220137Strasz PROC_LOCK(child); 850220137Strasz mtx_lock(&racct_lock); 851220137Strasz 852225981Strasz#ifdef RCTL 853225981Strasz error = rctl_proc_fork(parent, child); 854225981Strasz if (error != 0) 855225981Strasz goto out; 856225981Strasz#endif 857225981Strasz 858242139Strasz /* Init process cpu time. */ 859242139Strasz child->p_prev_runtime = 0; 860242139Strasz child->p_throttled = 0; 861242139Strasz 862220137Strasz /* 863220137Strasz * Inherit resource usage. 864220137Strasz */ 865220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 866220137Strasz if (parent->p_racct->r_resources[i] == 0 || 867223844Strasz !RACCT_IS_INHERITABLE(i)) 868220137Strasz continue; 869220137Strasz 870220137Strasz error = racct_set_locked(child, i, 871220137Strasz parent->p_racct->r_resources[i]); 872225938Strasz if (error != 0) 873220137Strasz goto out; 874220137Strasz } 875220137Strasz 876225944Strasz error = racct_add_locked(child, RACCT_NPROC, 1); 877225944Strasz error += racct_add_locked(child, RACCT_NTHR, 1); 878225944Strasz 879220137Straszout: 880220137Strasz mtx_unlock(&racct_lock); 881220137Strasz PROC_UNLOCK(child); 882220137Strasz PROC_UNLOCK(parent); 883220137Strasz 884235787Strasz if (error != 0) 885235787Strasz racct_proc_exit(child); 886235787Strasz 887220137Strasz return (error); 888220137Strasz} 889220137Strasz 890225940Strasz/* 891225940Strasz * Called at the end of fork1(), to handle rules that require the process 892225940Strasz * to be fully initialized. 893225940Strasz */ 894220137Straszvoid 895225940Straszracct_proc_fork_done(struct proc *child) 896225940Strasz{ 897225940Strasz 898225940Strasz#ifdef RCTL 899225940Strasz PROC_LOCK(child); 900225940Strasz mtx_lock(&racct_lock); 901225940Strasz rctl_enforce(child, RACCT_NPROC, 0); 902225940Strasz rctl_enforce(child, RACCT_NTHR, 0); 903225940Strasz mtx_unlock(&racct_lock); 904225940Strasz PROC_UNLOCK(child); 905225940Strasz#endif 906225940Strasz} 907225940Strasz 908225940Straszvoid 909220137Straszracct_proc_exit(struct proc *p) 910220137Strasz{ 911225364Strasz int i; 912220137Strasz uint64_t runtime; 913242139Strasz struct timeval wallclock; 914242139Strasz uint64_t pct_estimate, pct; 915220137Strasz 916220137Strasz PROC_LOCK(p); 917220137Strasz /* 918220137Strasz * We don't need to calculate rux, proc_reap() has already done this. 919220137Strasz */ 920220137Strasz runtime = cputick2usec(p->p_rux.rux_runtime); 921220137Strasz#ifdef notyet 922220137Strasz KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 923220137Strasz#else 924220137Strasz if (runtime < p->p_prev_runtime) 925220137Strasz runtime = p->p_prev_runtime; 926220137Strasz#endif 927242139Strasz microuptime(&wallclock); 928242139Strasz timevalsub(&wallclock, &p->p_stats->p_start); 929242957Strasz if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 930242957Strasz pct_estimate = (1000000 * runtime * 100) / 931242957Strasz ((uint64_t)wallclock.tv_sec * 1000000 + 932242957Strasz wallclock.tv_usec); 933242957Strasz } else 934242957Strasz pct_estimate = 0; 935242139Strasz pct = racct_getpcpu(p, pct_estimate); 936242139Strasz 937225364Strasz mtx_lock(&racct_lock); 938225364Strasz racct_set_locked(p, RACCT_CPU, runtime); 939242139Strasz racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); 940220137Strasz 941225364Strasz for (i = 0; i <= RACCT_MAX; i++) { 942225364Strasz if (p->p_racct->r_resources[i] == 0) 943225364Strasz continue; 944225364Strasz if (!RACCT_IS_RECLAIMABLE(i)) 945225364Strasz continue; 946225364Strasz racct_set_locked(p, i, 0); 947225364Strasz } 948225364Strasz 949225364Strasz mtx_unlock(&racct_lock); 950220137Strasz PROC_UNLOCK(p); 951220137Strasz 952220137Strasz#ifdef RCTL 953220137Strasz rctl_racct_release(p->p_racct); 954220137Strasz#endif 955220137Strasz racct_destroy(&p->p_racct); 956220137Strasz} 957220137Strasz 958220137Strasz/* 959220137Strasz * Called after credentials change, to move resource utilisation 960220137Strasz * between raccts. 961220137Strasz */ 962220137Straszvoid 963220137Straszracct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 964220137Strasz struct ucred *newcred) 965220137Strasz{ 966220137Strasz struct uidinfo *olduip, *newuip; 967220137Strasz struct loginclass *oldlc, *newlc; 968220137Strasz struct prison *oldpr, *newpr, *pr; 969220137Strasz 970220137Strasz PROC_LOCK_ASSERT(p, MA_NOTOWNED); 971220137Strasz 972220137Strasz newuip = newcred->cr_ruidinfo; 973220137Strasz olduip = oldcred->cr_ruidinfo; 974220137Strasz newlc = newcred->cr_loginclass; 975220137Strasz oldlc = oldcred->cr_loginclass; 976220137Strasz newpr = newcred->cr_prison; 977220137Strasz oldpr = oldcred->cr_prison; 978220137Strasz 979220137Strasz mtx_lock(&racct_lock); 980220137Strasz if (newuip != olduip) { 981220137Strasz racct_sub_racct(olduip->ui_racct, p->p_racct); 982220137Strasz racct_add_racct(newuip->ui_racct, p->p_racct); 983220137Strasz } 984220137Strasz if (newlc != oldlc) { 985220137Strasz racct_sub_racct(oldlc->lc_racct, p->p_racct); 986220137Strasz racct_add_racct(newlc->lc_racct, p->p_racct); 987220137Strasz } 988220137Strasz if (newpr != oldpr) { 989220137Strasz for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 990221362Strasz racct_sub_racct(pr->pr_prison_racct->prr_racct, 991221362Strasz p->p_racct); 992220137Strasz for (pr = newpr; pr != NULL; pr = pr->pr_parent) 993221362Strasz racct_add_racct(pr->pr_prison_racct->prr_racct, 994221362Strasz p->p_racct); 995220137Strasz } 996220137Strasz mtx_unlock(&racct_lock); 997220137Strasz 998220137Strasz#ifdef RCTL 999220137Strasz rctl_proc_ucred_changed(p, newcred); 1000220137Strasz#endif 1001220137Strasz} 1002220137Strasz 1003232598Straszvoid 1004232598Straszracct_move(struct racct *dest, struct racct *src) 1005232598Strasz{ 1006232598Strasz 1007232598Strasz mtx_lock(&racct_lock); 1008232598Strasz 1009232598Strasz racct_add_racct(dest, src); 1010232598Strasz racct_sub_racct(src, src); 1011232598Strasz 1012232598Strasz mtx_unlock(&racct_lock); 1013232598Strasz} 1014232598Strasz 1015220137Straszstatic void 1016242139Straszracct_proc_throttle(struct proc *p) 1017242139Strasz{ 1018242139Strasz struct thread *td; 1019242139Strasz#ifdef SMP 1020242139Strasz int cpuid; 1021242139Strasz#endif 1022242139Strasz 1023242139Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 1024242139Strasz 1025242139Strasz /* 1026242139Strasz * Do not block kernel processes. Also do not block processes with 1027242139Strasz * low %cpu utilization to improve interactivity. 1028242139Strasz */ 1029242139Strasz if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) || 1030242139Strasz (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold)) 1031242139Strasz return; 1032242139Strasz p->p_throttled = 1; 1033242139Strasz 1034242139Strasz FOREACH_THREAD_IN_PROC(p, td) { 1035248298Strasz thread_lock(td); 1036242139Strasz switch (td->td_state) { 1037242139Strasz case TDS_RUNQ: 1038242139Strasz /* 1039242139Strasz * If the thread is on the scheduler run-queue, we can 1040242139Strasz * not just remove it from there. So we set the flag 1041242139Strasz * TDF_NEEDRESCHED for the thread, so that once it is 1042242139Strasz * running, it is taken off the cpu as soon as possible. 1043242139Strasz */ 1044242139Strasz td->td_flags |= TDF_NEEDRESCHED; 1045242139Strasz break; 1046242139Strasz case TDS_RUNNING: 1047242139Strasz /* 1048242139Strasz * If the thread is running, we request a context 1049242139Strasz * switch for it by setting the TDF_NEEDRESCHED flag. 1050242139Strasz */ 1051242139Strasz td->td_flags |= TDF_NEEDRESCHED; 1052242139Strasz#ifdef SMP 1053242139Strasz cpuid = td->td_oncpu; 1054242139Strasz if ((cpuid != NOCPU) && (td != curthread)) 1055242139Strasz ipi_cpu(cpuid, IPI_AST); 1056242139Strasz#endif 1057242139Strasz break; 1058242139Strasz default: 1059242139Strasz break; 1060242139Strasz } 1061248298Strasz thread_unlock(td); 1062242139Strasz } 1063242139Strasz} 1064242139Strasz 1065242139Straszstatic void 1066242139Straszracct_proc_wakeup(struct proc *p) 1067242139Strasz{ 1068242139Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 1069242139Strasz 1070242139Strasz if (p->p_throttled) { 1071242139Strasz p->p_throttled = 0; 1072242139Strasz wakeup(p->p_racct); 1073242139Strasz } 1074242139Strasz} 1075242139Strasz 1076242139Straszstatic void 1077242139Straszracct_decay_resource(struct racct *racct, void * res, void* dummy) 1078242139Strasz{ 1079242139Strasz int resource; 1080242139Strasz int64_t r_old, r_new; 1081242139Strasz 1082242139Strasz resource = *(int *)res; 1083242139Strasz r_old = racct->r_resources[resource]; 1084242139Strasz 1085242139Strasz /* If there is nothing to decay, just exit. */ 1086242139Strasz if (r_old <= 0) 1087242139Strasz return; 1088242139Strasz 1089242139Strasz mtx_lock(&racct_lock); 1090242139Strasz r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1091242139Strasz racct->r_resources[resource] = r_new; 1092242139Strasz mtx_unlock(&racct_lock); 1093242139Strasz} 1094242139Strasz 1095242139Straszstatic void 1096242139Straszracct_decay(int resource) 1097242139Strasz{ 1098242139Strasz ui_racct_foreach(racct_decay_resource, &resource, NULL); 1099242139Strasz loginclass_racct_foreach(racct_decay_resource, &resource, NULL); 1100242139Strasz prison_racct_foreach(racct_decay_resource, &resource, NULL); 1101242139Strasz} 1102242139Strasz 1103242139Straszstatic void 1104220137Straszracctd(void) 1105220137Strasz{ 1106220137Strasz struct thread *td; 1107220137Strasz struct proc *p; 1108220137Strasz struct timeval wallclock; 1109220137Strasz uint64_t runtime; 1110242139Strasz uint64_t pct, pct_estimate; 1111220137Strasz 1112220137Strasz for (;;) { 1113242139Strasz racct_decay(RACCT_PCTCPU); 1114242139Strasz 1115220137Strasz sx_slock(&allproc_lock); 1116220137Strasz 1117242139Strasz LIST_FOREACH(p, &zombproc, p_list) { 1118242139Strasz PROC_LOCK(p); 1119242139Strasz racct_set(p, RACCT_PCTCPU, 0); 1120242139Strasz PROC_UNLOCK(p); 1121242139Strasz } 1122242139Strasz 1123220137Strasz FOREACH_PROC_IN_SYSTEM(p) { 1124242139Strasz PROC_LOCK(p); 1125242139Strasz if (p->p_state != PRS_NORMAL) { 1126242139Strasz PROC_UNLOCK(p); 1127220137Strasz continue; 1128242139Strasz } 1129220137Strasz 1130220137Strasz microuptime(&wallclock); 1131220137Strasz timevalsub(&wallclock, &p->p_stats->p_start); 1132220137Strasz PROC_SLOCK(p); 1133232782Strasz FOREACH_THREAD_IN_PROC(p, td) 1134220137Strasz ruxagg(p, td); 1135220137Strasz runtime = cputick2usec(p->p_rux.rux_runtime); 1136220137Strasz PROC_SUNLOCK(p); 1137220137Strasz#ifdef notyet 1138220137Strasz KASSERT(runtime >= p->p_prev_runtime, 1139220137Strasz ("runtime < p_prev_runtime")); 1140220137Strasz#else 1141220137Strasz if (runtime < p->p_prev_runtime) 1142220137Strasz runtime = p->p_prev_runtime; 1143220137Strasz#endif 1144220137Strasz p->p_prev_runtime = runtime; 1145242957Strasz if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { 1146242957Strasz pct_estimate = (1000000 * runtime * 100) / 1147242957Strasz ((uint64_t)wallclock.tv_sec * 1000000 + 1148242957Strasz wallclock.tv_usec); 1149242957Strasz } else 1150242957Strasz pct_estimate = 0; 1151242139Strasz pct = racct_getpcpu(p, pct_estimate); 1152220137Strasz mtx_lock(&racct_lock); 1153242139Strasz racct_set_force_locked(p, RACCT_PCTCPU, pct); 1154220137Strasz racct_set_locked(p, RACCT_CPU, runtime); 1155220137Strasz racct_set_locked(p, RACCT_WALLCLOCK, 1156233126Sjh (uint64_t)wallclock.tv_sec * 1000000 + 1157233126Sjh wallclock.tv_usec); 1158220137Strasz mtx_unlock(&racct_lock); 1159220137Strasz PROC_UNLOCK(p); 1160220137Strasz } 1161242139Strasz 1162242139Strasz /* 1163242139Strasz * To ensure that processes are throttled in a fair way, we need 1164242139Strasz * to iterate over all processes again and check the limits 1165242139Strasz * for %cpu resource only after ucred racct containers have been 1166242139Strasz * properly filled. 1167242139Strasz */ 1168242139Strasz FOREACH_PROC_IN_SYSTEM(p) { 1169242139Strasz PROC_LOCK(p); 1170242139Strasz if (p->p_state != PRS_NORMAL) { 1171242139Strasz PROC_UNLOCK(p); 1172242139Strasz continue; 1173242139Strasz } 1174242139Strasz 1175242139Strasz if (racct_pcpu_available(p) <= 0) 1176242139Strasz racct_proc_throttle(p); 1177242139Strasz else if (p->p_throttled) 1178242139Strasz racct_proc_wakeup(p); 1179242139Strasz PROC_UNLOCK(p); 1180242139Strasz } 1181220137Strasz sx_sunlock(&allproc_lock); 1182220137Strasz pause("-", hz); 1183220137Strasz } 1184220137Strasz} 1185220137Strasz 1186220137Straszstatic struct kproc_desc racctd_kp = { 1187220137Strasz "racctd", 1188220137Strasz racctd, 1189220137Strasz NULL 1190220137Strasz}; 1191220137StraszSYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp); 1192220137Strasz 1193220137Straszstatic void 1194220137Straszracct_init(void) 1195220137Strasz{ 1196220137Strasz 1197220137Strasz racct_zone = uma_zcreate("racct", sizeof(struct racct), 1198220137Strasz NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1199220137Strasz /* 1200220137Strasz * XXX: Move this somewhere. 1201220137Strasz */ 1202221362Strasz prison0.pr_prison_racct = prison_racct_find("0"); 1203220137Strasz} 1204220137StraszSYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 1205220137Strasz 1206220137Strasz#else /* !RACCT */ 1207220137Strasz 1208220137Straszint 1209220137Straszracct_add(struct proc *p, int resource, uint64_t amount) 1210220137Strasz{ 1211220137Strasz 1212220137Strasz return (0); 1213220137Strasz} 1214220137Strasz 1215220137Straszvoid 1216220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount) 1217220137Strasz{ 1218220137Strasz} 1219220137Strasz 1220220137Straszvoid 1221220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount) 1222220137Strasz{ 1223220137Strasz 1224220137Strasz return; 1225220137Strasz} 1226220137Strasz 1227220137Straszint 1228220137Straszracct_set(struct proc *p, int resource, uint64_t amount) 1229220137Strasz{ 1230220137Strasz 1231220137Strasz return (0); 1232220137Strasz} 1233220137Strasz 1234220137Straszvoid 1235220372Straszracct_set_force(struct proc *p, int resource, uint64_t amount) 1236220372Strasz{ 1237220372Strasz} 1238220372Strasz 1239220372Straszvoid 1240220137Straszracct_sub(struct proc *p, int resource, uint64_t amount) 1241220137Strasz{ 1242220137Strasz} 1243220137Strasz 1244220137Straszvoid 1245220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 1246220137Strasz{ 1247220137Strasz} 1248220137Strasz 1249220137Straszuint64_t 1250220137Straszracct_get_limit(struct proc *p, int resource) 1251220137Strasz{ 1252220137Strasz 1253220137Strasz return (UINT64_MAX); 1254220137Strasz} 1255220137Strasz 1256220372Straszuint64_t 1257220372Straszracct_get_available(struct proc *p, int resource) 1258220372Strasz{ 1259220372Strasz 1260220372Strasz return (UINT64_MAX); 1261220372Strasz} 1262220372Strasz 1263220137Straszvoid 1264220137Straszracct_create(struct racct **racctp) 1265220137Strasz{ 1266220137Strasz} 1267220137Strasz 1268220137Straszvoid 1269220137Straszracct_destroy(struct racct **racctp) 1270220137Strasz{ 1271220137Strasz} 1272220137Strasz 1273220137Straszint 1274220137Straszracct_proc_fork(struct proc *parent, struct proc *child) 1275220137Strasz{ 1276220137Strasz 1277220137Strasz return (0); 1278220137Strasz} 1279220137Strasz 1280220137Straszvoid 1281225940Straszracct_proc_fork_done(struct proc *child) 1282225940Strasz{ 1283225940Strasz} 1284225940Strasz 1285225940Straszvoid 1286220137Straszracct_proc_exit(struct proc *p) 1287220137Strasz{ 1288220137Strasz} 1289220137Strasz 1290220137Strasz#endif /* !RACCT */ 1291