kern_racct.c (235787) | kern_racct.c (242139) |
---|---|
1/*- 2 * Copyright (c) 2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without --- 12 unchanged lines hidden (view full) --- 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * | 1/*- 2 * Copyright (c) 2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without --- 12 unchanged lines hidden (view full) --- 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * |
29 * $FreeBSD: head/sys/kern/kern_racct.c 235787 2012-05-22 15:58:27Z trasz $ | 29 * $FreeBSD: head/sys/kern/kern_racct.c 242139 2012-10-26 16:01:08Z trasz $ |
30 */ 31 32#include <sys/cdefs.h> | 30 */ 31 32#include <sys/cdefs.h> |
33__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 235787 2012-05-22 15:58:27Z trasz $"); | 33__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 242139 2012-10-26 16:01:08Z trasz $"); |
34 35#include "opt_kdtrace.h" | 34 35#include "opt_kdtrace.h" |
36#include "opt_sched.h" |
|
36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/eventhandler.h> 40#include <sys/jail.h> 41#include <sys/kernel.h> 42#include <sys/kthread.h> 43#include <sys/lock.h> 44#include <sys/loginclass.h> 45#include <sys/malloc.h> 46#include <sys/mutex.h> 47#include <sys/proc.h> 48#include <sys/racct.h> 49#include <sys/resourcevar.h> 50#include <sys/sbuf.h> 51#include <sys/sched.h> 52#include <sys/sdt.h> | 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/eventhandler.h> 41#include <sys/jail.h> 42#include <sys/kernel.h> 43#include <sys/kthread.h> 44#include <sys/lock.h> 45#include <sys/loginclass.h> 46#include <sys/malloc.h> 47#include <sys/mutex.h> 48#include <sys/proc.h> 49#include <sys/racct.h> 50#include <sys/resourcevar.h> 51#include <sys/sbuf.h> 52#include <sys/sched.h> 53#include <sys/sdt.h> |
54#include <sys/smp.h> |
|
53#include <sys/sx.h> | 55#include <sys/sx.h> |
56#include <sys/sysctl.h> |
|
54#include <sys/sysent.h> 55#include <sys/sysproto.h> 56#include <sys/umtx.h> | 57#include <sys/sysent.h> 58#include <sys/sysproto.h> 59#include <sys/umtx.h> |
60#include <machine/smp.h> |
|
57 58#ifdef RCTL 59#include <sys/rctl.h> 60#endif 61 62#ifdef RACCT 63 64FEATURE(racct, "Resource Accounting"); 65 | 61 62#ifdef RCTL 63#include <sys/rctl.h> 64#endif 65 66#ifdef RACCT 67 68FEATURE(racct, "Resource Accounting"); 69 |
70/* 71 * Do not block processes that have their %cpu usage <= pcpu_threshold. 72 */ 73static int pcpu_threshold = 1; 74 75SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting"); 76SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 77 0, "Processes with higher %cpu usage than this value can be throttled."); 78 79/* 80 * How many seconds it takes to use the scheduler %cpu calculations. When a 81 * process starts, we compute its %cpu usage by dividing its runtime by the 82 * process wall clock time. After RACCT_PCPU_SECS pass, we use the value 83 * provided by the scheduler. 84 */ 85#define RACCT_PCPU_SECS 3 86 |
|
66static struct mtx racct_lock; 67MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 68 69static uma_zone_t racct_zone; 70 71static void racct_sub_racct(struct racct *dest, const struct racct *src); 72static void racct_sub_cred_locked(struct ucred *cred, int resource, 73 uint64_t amount); --- 61 unchanged lines hidden (view full) --- 135 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 136 [RACCT_NSEMOP] = 137 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 138 [RACCT_NSHM] = 139 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 140 [RACCT_SHMSIZE] = 141 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 142 [RACCT_WALLCLOCK] = | 87static struct mtx racct_lock; 88MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 89 90static uma_zone_t racct_zone; 91 92static void racct_sub_racct(struct racct *dest, const struct racct *src); 93static void racct_sub_cred_locked(struct ucred *cred, int resource, 94 uint64_t amount); --- 61 unchanged lines hidden (view full) --- 156 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 157 [RACCT_NSEMOP] = 158 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 159 [RACCT_NSHM] = 160 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 161 [RACCT_SHMSIZE] = 162 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 163 [RACCT_WALLCLOCK] = |
143 RACCT_IN_MILLIONS }; | 164 RACCT_IN_MILLIONS, 165 [RACCT_PCTCPU] = 166 RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS }; |
144 | 167 |
168static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; 169 170#ifdef SCHED_4BSD 171/* 172 * Contains intermediate values for %cpu calculations to avoid using floating 173 * point in the kernel. 174 * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) 175 * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to 176 * zero so the calculations are more straightforward. 177 */ 178fixpt_t ccpu_exp[] = { 179 [0] = FSCALE * 1, 180 [1] = FSCALE * 0.95122942450071400909, 181 [2] = FSCALE * 0.90483741803595957316, 182 [3] = FSCALE * 0.86070797642505780722, 183 [4] = FSCALE * 0.81873075307798185866, 184 [5] = FSCALE * 0.77880078307140486824, 185 [6] = FSCALE * 0.74081822068171786606, 186 [7] = FSCALE * 0.70468808971871343435, 187 [8] = FSCALE * 0.67032004603563930074, 188 [9] = FSCALE * 0.63762815162177329314, 189 [10] = FSCALE * 0.60653065971263342360, 190 [11] = FSCALE * 0.57694981038048669531, 191 [12] = FSCALE * 0.54881163609402643262, 192 [13] = FSCALE * 0.52204577676101604789, 193 [14] = FSCALE * 0.49658530379140951470, 194 [15] = FSCALE * 0.47236655274101470713, 195 [16] = FSCALE * 0.44932896411722159143, 196 [17] = FSCALE * 0.42741493194872666992, 197 [18] = FSCALE * 0.40656965974059911188, 198 [19] = FSCALE * 0.38674102345450120691, 199 [20] = FSCALE * 0.36787944117144232159, 200 [21] = FSCALE * 0.34993774911115535467, 201 [22] = FSCALE * 0.33287108369807955328, 202 [23] = FSCALE * 0.31663676937905321821, 203 [24] = FSCALE * 0.30119421191220209664, 204 [25] = FSCALE * 0.28650479686019010032, 205 [26] = FSCALE * 0.27253179303401260312, 206 [27] = FSCALE * 0.25924026064589150757, 207 [28] = FSCALE * 0.24659696394160647693, 208 [29] = FSCALE * 0.23457028809379765313, 209 [30] = FSCALE * 0.22313016014842982893, 210 [31] = FSCALE * 0.21224797382674305771, 211 [32] = FSCALE * 0.20189651799465540848, 212 [33] = FSCALE * 0.19204990862075411423, 213 [34] = FSCALE * 0.18268352405273465022, 214 [35] = FSCALE * 0.17377394345044512668, 215 [36] = FSCALE * 0.16529888822158653829, 216 [37] = FSCALE * 0.15723716631362761621, 217 [38] = FSCALE * 0.14956861922263505264, 218 [39] = FSCALE * 0.14227407158651357185, 219 [40] = FSCALE * 0.13533528323661269189, 220 [41] = FSCALE * 0.12873490358780421886, 221 [42] = FSCALE * 0.12245642825298191021, 222 [43] = FSCALE * 0.11648415777349695786, 223 [44] = FSCALE * 0.11080315836233388333, 224 [45] = FSCALE * 0.10539922456186433678, 225 [46] = FSCALE * 0.10025884372280373372, 226 [47] = FSCALE * 0.09536916221554961888, 227 [48] = FSCALE * 0.09071795328941250337, 228 [49] = FSCALE * 0.08629358649937051097, 229 [50] = FSCALE * 0.08208499862389879516, 230 [51] = FSCALE * 0.07808166600115315231, 231 [52] = FSCALE * 0.07427357821433388042, 232 [53] = FSCALE * 0.07065121306042958674, 233 [54] = FSCALE * 0.06720551273974976512, 234 [55] = FSCALE * 0.06392786120670757270, 235 [56] = FSCALE * 0.06081006262521796499, 236 [57] = FSCALE * 0.05784432087483846296, 237 [58] = FSCALE * 0.05502322005640722902, 238 [59] = FSCALE * 0.05233970594843239308, 239 [60] = FSCALE * 0.04978706836786394297, 240 [61] = FSCALE * 0.04735892439114092119, 241 [62] = FSCALE * 0.04504920239355780606, 242 [63] = FSCALE * 0.04285212686704017991, 243 [64] = FSCALE * 0.04076220397836621516, 244 [65] = FSCALE * 0.03877420783172200988, 245 [66] = FSCALE * 0.03688316740124000544, 246 [67] = FSCALE * 0.03508435410084502588, 247 [68] = FSCALE * 0.03337326996032607948, 248 [69] = FSCALE * 0.03174563637806794323, 249 [70] = FSCALE * 0.03019738342231850073, 250 [71] = FSCALE * 0.02872463965423942912, 251 [72] = FSCALE * 0.02732372244729256080, 252 [73] = FSCALE * 0.02599112877875534358, 253 [74] = FSCALE * 0.02472352647033939120, 254 [75] = FSCALE * 0.02351774585600910823, 255 [76] = FSCALE * 0.02237077185616559577, 256 [77] = FSCALE * 0.02127973643837716938, 257 [78] = FSCALE * 0.02024191144580438847, 258 [79] = FSCALE * 0.01925470177538692429, 259 [80] = FSCALE * 0.01831563888873418029, 260 [81] = FSCALE * 0.01742237463949351138, 261 [82] = FSCALE * 0.01657267540176124754, 262 [83] = FSCALE * 0.01576441648485449082, 263 [84] = FSCALE * 0.01499557682047770621, 264 [85] = FSCALE * 0.01426423390899925527, 265 [86] = FSCALE * 0.01356855901220093175, 266 [87] = FSCALE * 0.01290681258047986886, 267 [88] = FSCALE * 0.01227733990306844117, 268 [89] = FSCALE * 0.01167856697039544521, 269 [90] = FSCALE * 0.01110899653824230649, 270 [91] = FSCALE * 0.01056720438385265337, 271 [92] = FSCALE * 0.01005183574463358164, 272 [93] = FSCALE * 0.00956160193054350793, 273 [94] = FSCALE * 0.00909527710169581709, 274 [95] = FSCALE * 0.00865169520312063417, 275 [96] = FSCALE * 0.00822974704902002884, 276 [97] = FSCALE * 0.00782837754922577143, 277 [98] = FSCALE * 0.00744658307092434051, 278 [99] = FSCALE * 0.00708340892905212004, 279 [100] = FSCALE * 0.00673794699908546709, 280 [101] = FSCALE * 0.00640933344625638184, 281 [102] = FSCALE * 0.00609674656551563610, 282 [103] = FSCALE * 0.00579940472684214321, 283 [104] = FSCALE * 0.00551656442076077241, 284 [105] = FSCALE * 0.00524751839918138427, 285 [106] = FSCALE * 0.00499159390691021621, 286 [107] = FSCALE * 0.00474815099941147558, 287 [108] = FSCALE * 0.00451658094261266798, 288 [109] = FSCALE * 0.00429630469075234057, 289 [110] = FSCALE * 0.00408677143846406699, 290}; 291#endif 292 293#define CCPU_EXP_MAX 110 294 295/* 296 * This function is analogical to the getpcpu() function in the ps(1) command. 297 * They should both calculate in the same way so that the racct %cpu 298 * calculations are consistent with the values showed by the ps(1) tool. 299 * The calculations are more complex in the 4BSD scheduler because of the value 300 * of the ccpu variable. In ULE it is defined to be zero which saves us some 301 * work. 302 */ 303static uint64_t 304racct_getpcpu(struct proc *p, u_int pcpu) 305{ 306 u_int swtime; 307#ifdef SCHED_4BSD 308 fixpt_t pctcpu, pctcpu_next; 309#endif 310#ifdef SMP 311 struct pcpu *pc; 312 int found; 313#endif 314 fixpt_t p_pctcpu; 315 struct thread *td; 316 317 /* 318 * If the process is swapped out, we count its %cpu usage as zero. 319 * This behaviour is consistent with the userland ps(1) tool. 320 */ 321 if ((p->p_flag & P_INMEM) == 0) 322 return (0); 323 swtime = (ticks - p->p_swtick) / hz; 324 325 /* 326 * For short-lived processes, the sched_pctcpu() returns small 327 * values even for cpu intensive processes. Therefore we use 328 * our own estimate in this case. 329 */ 330 if (swtime < RACCT_PCPU_SECS) 331 return (pcpu); 332 333 p_pctcpu = 0; 334 FOREACH_THREAD_IN_PROC(p, td) { 335 if (td == PCPU_GET(idlethread)) 336 continue; 337#ifdef SMP 338 found = 0; 339 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 340 if (td == pc->pc_idlethread) { 341 found = 1; 342 break; 343 } 344 } 345 if (found) 346 continue; 347#endif 348 thread_lock(td); 349#ifdef SCHED_4BSD 350 pctcpu = sched_pctcpu(td); 351 /* Count also the yet unfinished second. */ 352 pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; 353 pctcpu_next += sched_pctcpu_delta(td); 354 p_pctcpu += max(pctcpu, pctcpu_next); 355#else 356 /* 357 * In ULE the %cpu statistics are updated on every 358 * sched_pctcpu() call. So special calculations to 359 * account for the latest (unfinished) second are 360 * not needed. 361 */ 362 p_pctcpu += sched_pctcpu(td); 363#endif 364 thread_unlock(td); 365 } 366 367#ifdef SCHED_4BSD 368 if (swtime <= CCPU_EXP_MAX) 369 return ((100 * (uint64_t)p_pctcpu * 1000000) / 370 (FSCALE - ccpu_exp[swtime])); 371#endif 372 373 return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); 374} 375 |
|
145static void 146racct_add_racct(struct racct *dest, const struct racct *src) 147{ 148 int i; 149 150 mtx_assert(&racct_lock, MA_OWNED); 151 152 /* --- 22 unchanged lines hidden (view full) --- 175 if (!RACCT_IS_SLOPPY(i)) { 176 KASSERT(dest->r_resources[i] >= 0, 177 ("racct propagation meltdown: dest < 0")); 178 KASSERT(src->r_resources[i] >= 0, 179 ("racct propagation meltdown: src < 0")); 180 KASSERT(src->r_resources[i] <= dest->r_resources[i], 181 ("racct propagation meltdown: src > dest")); 182 } | 376static void 377racct_add_racct(struct racct *dest, const struct racct *src) 378{ 379 int i; 380 381 mtx_assert(&racct_lock, MA_OWNED); 382 383 /* --- 22 unchanged lines hidden (view full) --- 406 if (!RACCT_IS_SLOPPY(i)) { 407 KASSERT(dest->r_resources[i] >= 0, 408 ("racct propagation meltdown: dest < 0")); 409 KASSERT(src->r_resources[i] >= 0, 410 ("racct propagation meltdown: src < 0")); 411 KASSERT(src->r_resources[i] <= dest->r_resources[i], 412 ("racct propagation meltdown: src > dest")); 413 } |
183 if (RACCT_IS_RECLAIMABLE(i)) { | 414 if (RACCT_CAN_DROP(i)) { |
184 dest->r_resources[i] -= src->r_resources[i]; 185 if (dest->r_resources[i] < 0) { 186 KASSERT(RACCT_IS_SLOPPY(i), 187 ("racct_sub_racct: usage < 0")); 188 dest->r_resources[i] = 0; 189 } 190 } 191 } --- 57 unchanged lines hidden (view full) --- 249 uint64_t amount) 250{ 251 252 mtx_assert(&racct_lock, MA_OWNED); 253 KASSERT(racct != NULL, ("NULL racct")); 254 255 racct->r_resources[resource] += amount; 256 if (racct->r_resources[resource] < 0) { | 415 dest->r_resources[i] -= src->r_resources[i]; 416 if (dest->r_resources[i] < 0) { 417 KASSERT(RACCT_IS_SLOPPY(i), 418 ("racct_sub_racct: usage < 0")); 419 dest->r_resources[i] = 0; 420 } 421 } 422 } --- 57 unchanged lines hidden (view full) --- 480 uint64_t amount) 481{ 482 483 mtx_assert(&racct_lock, MA_OWNED); 484 KASSERT(racct != NULL, ("NULL racct")); 485 486 racct->r_resources[resource] += amount; 487 if (racct->r_resources[resource] < 0) { |
257 KASSERT(RACCT_IS_SLOPPY(resource), | 488 KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), |
258 ("racct_alloc_resource: usage < 0")); 259 racct->r_resources[resource] = 0; 260 } | 489 ("racct_alloc_resource: usage < 0")); 490 racct->r_resources[resource] = 0; 491 } |
492 493 /* 494 * There are some cases where the racct %cpu resource would grow 495 * beyond 100%. 496 * For example in racct_proc_exit() we add the process %cpu usage 497 * to the ucred racct containers. If too many processes terminated 498 * in a short time span, the ucred %cpu resource could grow too much. 499 * Also, the 4BSD scheduler sometimes returns for a thread more than 500 * 100% cpu usage. So we set a boundary here to 100%. 501 */ 502 if ((resource == RACCT_PCTCPU) && 503 (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000)) 504 racct->r_resources[RACCT_PCTCPU] = 100 * 1000000; |
|
261} 262 263static int 264racct_add_locked(struct proc *p, int resource, uint64_t amount) 265{ 266#ifdef RCTL 267 int error; 268#endif --- 83 unchanged lines hidden (view full) --- 352 racct_alloc_resource(p->p_racct, resource, amount); 353 mtx_unlock(&racct_lock); 354 racct_add_cred(p->p_ucred, resource, amount); 355} 356 357static int 358racct_set_locked(struct proc *p, int resource, uint64_t amount) 359{ | 505} 506 507static int 508racct_add_locked(struct proc *p, int resource, uint64_t amount) 509{ 510#ifdef RCTL 511 int error; 512#endif --- 83 unchanged lines hidden (view full) --- 596 racct_alloc_resource(p->p_racct, resource, amount); 597 mtx_unlock(&racct_lock); 598 racct_add_cred(p->p_ucred, resource, amount); 599} 600 601static int 602racct_set_locked(struct proc *p, int resource, uint64_t amount) 603{ |
360 int64_t diff; | 604 int64_t old_amount, decayed_amount; 605 int64_t diff_proc, diff_cred; |
361#ifdef RCTL 362 int error; 363#endif 364 365 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 366 367 /* 368 * We need proc lock to dereference p->p_ucred. 369 */ 370 PROC_LOCK_ASSERT(p, MA_OWNED); 371 | 606#ifdef RCTL 607 int error; 608#endif 609 610 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 611 612 /* 613 * We need proc lock to dereference p->p_ucred. 614 */ 615 PROC_LOCK_ASSERT(p, MA_OWNED); 616 |
372 diff = amount - p->p_racct->r_resources[resource]; | 617 old_amount = p->p_racct->r_resources[resource]; 618 /* 619 * The diffs may be negative. 620 */ 621 diff_proc = amount - old_amount; 622 if (RACCT_IS_DECAYING(resource)) { 623 /* 624 * Resources in per-credential racct containers may decay. 625 * If this is the case, we need to calculate the difference 626 * between the new amount and the proportional value of the 627 * old amount that has decayed in the ucred racct containers. 628 */ 629 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 630 diff_cred = amount - decayed_amount; 631 } else 632 diff_cred = diff_proc; |
373#ifdef notyet | 633#ifdef notyet |
374 KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource), 375 ("racct_set: usage of non-reclaimable resource %d dropping", | 634 KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), 635 ("racct_set: usage of non-droppable resource %d dropping", |
376 resource)); 377#endif 378#ifdef RCTL | 636 resource)); 637#endif 638#ifdef RCTL |
379 if (diff > 0) { 380 error = rctl_enforce(p, resource, diff); | 639 if (diff_proc > 0) { 640 error = rctl_enforce(p, resource, diff_proc); |
381 if (error && RACCT_IS_DENIABLE(resource)) { 382 SDT_PROBE(racct, kernel, rusage, set_failure, p, 383 resource, amount, 0, 0); 384 return (error); 385 } 386 } 387#endif | 641 if (error && RACCT_IS_DENIABLE(resource)) { 642 SDT_PROBE(racct, kernel, rusage, set_failure, p, 643 resource, amount, 0, 0); 644 return (error); 645 } 646 } 647#endif |
388 racct_alloc_resource(p->p_racct, resource, diff); 389 if (diff > 0) 390 racct_add_cred_locked(p->p_ucred, resource, diff); 391 else if (diff < 0) 392 racct_sub_cred_locked(p->p_ucred, resource, -diff); | 648 racct_alloc_resource(p->p_racct, resource, diff_proc); 649 if (diff_cred > 0) 650 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 651 else if (diff_cred < 0) 652 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); |
393 394 return (0); 395} 396 397/* 398 * Set allocation of 'resource' to 'amount' for process 'p'. 399 * Return 0 if it's below limits, or errno, if it's not. 400 * --- 6 unchanged lines hidden (view full) --- 407 int error; 408 409 mtx_lock(&racct_lock); 410 error = racct_set_locked(p, resource, amount); 411 mtx_unlock(&racct_lock); 412 return (error); 413} 414 | 653 654 return (0); 655} 656 657/* 658 * Set allocation of 'resource' to 'amount' for process 'p'. 659 * Return 0 if it's below limits, or errno, if it's not. 660 * --- 6 unchanged lines hidden (view full) --- 667 int error; 668 669 mtx_lock(&racct_lock); 670 error = racct_set_locked(p, resource, amount); 671 mtx_unlock(&racct_lock); 672 return (error); 673} 674 |
415void 416racct_set_force(struct proc *p, int resource, uint64_t amount) | 675static void 676racct_set_force_locked(struct proc *p, int resource, uint64_t amount) |
417{ | 677{ |
418 int64_t diff; | 678 int64_t old_amount, decayed_amount; 679 int64_t diff_proc, diff_cred; |
419 420 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 421 422 /* 423 * We need proc lock to dereference p->p_ucred. 424 */ 425 PROC_LOCK_ASSERT(p, MA_OWNED); 426 | 680 681 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 682 683 /* 684 * We need proc lock to dereference p->p_ucred. 685 */ 686 PROC_LOCK_ASSERT(p, MA_OWNED); 687 |
688 old_amount = p->p_racct->r_resources[resource]; 689 /* 690 * The diffs may be negative. 691 */ 692 diff_proc = amount - old_amount; 693 if (RACCT_IS_DECAYING(resource)) { 694 /* 695 * Resources in per-credential racct containers may decay. 696 * If this is the case, we need to calculate the difference 697 * between the new amount and the proportional value of the 698 * old amount that has decayed in the ucred racct containers. 699 */ 700 decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; 701 diff_cred = amount - decayed_amount; 702 } else 703 diff_cred = diff_proc; 704 705 racct_alloc_resource(p->p_racct, resource, diff_proc); 706 if (diff_cred > 0) 707 racct_add_cred_locked(p->p_ucred, resource, diff_cred); 708 else if (diff_cred < 0) 709 racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); 710} 711 712void 713racct_set_force(struct proc *p, int resource, uint64_t amount) 714{ |
|
427 mtx_lock(&racct_lock); | 715 mtx_lock(&racct_lock); |
428 diff = amount - p->p_racct->r_resources[resource]; 429 racct_alloc_resource(p->p_racct, resource, diff); 430 if (diff > 0) 431 racct_add_cred_locked(p->p_ucred, resource, diff); 432 else if (diff < 0) 433 racct_sub_cred_locked(p->p_ucred, resource, -diff); | 716 racct_set_force_locked(p, resource, amount); |
434 mtx_unlock(&racct_lock); 435} 436 437/* 438 * Returns amount of 'resource' the process 'p' can keep allocated. 439 * Allocating more than that would be denied, unless the resource 440 * is marked undeniable. Amount of already allocated resource does 441 * not matter. --- 22 unchanged lines hidden (view full) --- 464#ifdef RCTL 465 return (rctl_get_available(p, resource)); 466#else 467 return (UINT64_MAX); 468#endif 469} 470 471/* | 717 mtx_unlock(&racct_lock); 718} 719 720/* 721 * Returns amount of 'resource' the process 'p' can keep allocated. 722 * Allocating more than that would be denied, unless the resource 723 * is marked undeniable. Amount of already allocated resource does 724 * not matter. --- 22 unchanged lines hidden (view full) --- 747#ifdef RCTL 748 return (rctl_get_available(p, resource)); 749#else 750 return (UINT64_MAX); 751#endif 752} 753 754/* |
755 * Returns amount of the %cpu resource that process 'p' can add to its %cpu 756 * utilization. Adding more than that would lead to the process being 757 * throttled. 758 */ 759static int64_t 760racct_pcpu_available(struct proc *p) 761{ 762 763#ifdef RCTL 764 return (rctl_pcpu_available(p)); 765#else 766 return (INT64_MAX); 767#endif 768} 769 770/* |
|
472 * Decrease allocation of 'resource' by 'amount' for process 'p'. 473 */ 474void 475racct_sub(struct proc *p, int resource, uint64_t amount) 476{ 477 478 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); 479 480 /* 481 * We need proc lock to dereference p->p_ucred. 482 */ 483 PROC_LOCK_ASSERT(p, MA_OWNED); | 771 * Decrease allocation of 'resource' by 'amount' for process 'p'. 772 */ 773void 774racct_sub(struct proc *p, int resource, uint64_t amount) 775{ 776 777 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); 778 779 /* 780 * We need proc lock to dereference p->p_ucred. 781 */ 782 PROC_LOCK_ASSERT(p, MA_OWNED); |
484 KASSERT(RACCT_IS_RECLAIMABLE(resource), 485 ("racct_sub: called for non-reclaimable resource %d", resource)); | 783 KASSERT(RACCT_CAN_DROP(resource), 784 ("racct_sub: called for non-droppable resource %d", resource)); |
486 487 mtx_lock(&racct_lock); 488 KASSERT(amount <= p->p_racct->r_resources[resource], 489 ("racct_sub: freeing %ju of resource %d, which is more " 490 "than allocated %jd for %s (pid %d)", amount, resource, 491 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 492 493 racct_alloc_resource(p->p_racct, resource, -amount); --- 5 unchanged lines hidden (view full) --- 499racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 500{ 501 struct prison *pr; 502 503 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount, 504 0, 0); 505 506#ifdef notyet | 785 786 mtx_lock(&racct_lock); 787 KASSERT(amount <= p->p_racct->r_resources[resource], 788 ("racct_sub: freeing %ju of resource %d, which is more " 789 "than allocated %jd for %s (pid %d)", amount, resource, 790 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 791 792 racct_alloc_resource(p->p_racct, resource, -amount); --- 5 unchanged lines hidden (view full) --- 798racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 799{ 800 struct prison *pr; 801 802 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount, 803 0, 0); 804 805#ifdef notyet |
507 KASSERT(RACCT_IS_RECLAIMABLE(resource), 508 ("racct_sub_cred: called for non-reclaimable resource %d", | 806 KASSERT(RACCT_CAN_DROP(resource), 807 ("racct_sub_cred: called for resource %d which can not drop", |
509 resource)); 510#endif 511 512 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 513 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 514 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 515 -amount); 516 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); --- 29 unchanged lines hidden (view full) --- 546 mtx_lock(&racct_lock); 547 548#ifdef RCTL 549 error = rctl_proc_fork(parent, child); 550 if (error != 0) 551 goto out; 552#endif 553 | 808 resource)); 809#endif 810 811 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 812 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 813 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 814 -amount); 815 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); --- 29 unchanged lines hidden (view full) --- 845 mtx_lock(&racct_lock); 846 847#ifdef RCTL 848 error = rctl_proc_fork(parent, child); 849 if (error != 0) 850 goto out; 851#endif 852 |
853 /* Init process cpu time. */ 854 child->p_prev_runtime = 0; 855 child->p_throttled = 0; 856 |
|
554 /* 555 * Inherit resource usage. 556 */ 557 for (i = 0; i <= RACCT_MAX; i++) { 558 if (parent->p_racct->r_resources[i] == 0 || 559 !RACCT_IS_INHERITABLE(i)) 560 continue; 561 --- 35 unchanged lines hidden (view full) --- 597#endif 598} 599 600void 601racct_proc_exit(struct proc *p) 602{ 603 int i; 604 uint64_t runtime; | 857 /* 858 * Inherit resource usage. 859 */ 860 for (i = 0; i <= RACCT_MAX; i++) { 861 if (parent->p_racct->r_resources[i] == 0 || 862 !RACCT_IS_INHERITABLE(i)) 863 continue; 864 --- 35 unchanged lines hidden (view full) --- 900#endif 901} 902 903void 904racct_proc_exit(struct proc *p) 905{ 906 int i; 907 uint64_t runtime; |
908 struct timeval wallclock; 909 uint64_t pct_estimate, pct; |
|
605 606 PROC_LOCK(p); 607 /* 608 * We don't need to calculate rux, proc_reap() has already done this. 609 */ 610 runtime = cputick2usec(p->p_rux.rux_runtime); 611#ifdef notyet 612 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 613#else 614 if (runtime < p->p_prev_runtime) 615 runtime = p->p_prev_runtime; 616#endif | 910 911 PROC_LOCK(p); 912 /* 913 * We don't need to calculate rux, proc_reap() has already done this. 914 */ 915 runtime = cputick2usec(p->p_rux.rux_runtime); 916#ifdef notyet 917 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 918#else 919 if (runtime < p->p_prev_runtime) 920 runtime = p->p_prev_runtime; 921#endif |
922 microuptime(&wallclock); 923 timevalsub(&wallclock, &p->p_stats->p_start); 924 pct_estimate = (1000000 * runtime * 100) / 925 ((uint64_t)wallclock.tv_sec * 1000000 + 926 wallclock.tv_usec); 927 pct = racct_getpcpu(p, pct_estimate); 928 |
|
617 mtx_lock(&racct_lock); 618 racct_set_locked(p, RACCT_CPU, runtime); | 929 mtx_lock(&racct_lock); 930 racct_set_locked(p, RACCT_CPU, runtime); |
931 racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); |
|
619 620 for (i = 0; i <= RACCT_MAX; i++) { 621 if (p->p_racct->r_resources[i] == 0) 622 continue; 623 if (!RACCT_IS_RECLAIMABLE(i)) 624 continue; 625 racct_set_locked(p, i, 0); 626 } --- 60 unchanged lines hidden (view full) --- 687 688 racct_add_racct(dest, src); 689 racct_sub_racct(src, src); 690 691 mtx_unlock(&racct_lock); 692} 693 694static void | 932 933 for (i = 0; i <= RACCT_MAX; i++) { 934 if (p->p_racct->r_resources[i] == 0) 935 continue; 936 if (!RACCT_IS_RECLAIMABLE(i)) 937 continue; 938 racct_set_locked(p, i, 0); 939 } --- 60 unchanged lines hidden (view full) --- 1000 1001 racct_add_racct(dest, src); 1002 racct_sub_racct(src, src); 1003 1004 mtx_unlock(&racct_lock); 1005} 1006 1007static void |
1008racct_proc_throttle(struct proc *p) 1009{ 1010 struct thread *td; 1011#ifdef SMP 1012 int cpuid; 1013#endif 1014 1015 PROC_LOCK_ASSERT(p, MA_OWNED); 1016 1017 /* 1018 * Do not block kernel processes. Also do not block processes with 1019 * low %cpu utilization to improve interactivity. 1020 */ 1021 if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) || 1022 (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold)) 1023 return; 1024 p->p_throttled = 1; 1025 1026 FOREACH_THREAD_IN_PROC(p, td) { 1027 switch (td->td_state) { 1028 case TDS_RUNQ: 1029 /* 1030 * If the thread is on the scheduler run-queue, we can 1031 * not just remove it from there. So we set the flag 1032 * TDF_NEEDRESCHED for the thread, so that once it is 1033 * running, it is taken off the cpu as soon as possible. 1034 */ 1035 thread_lock(td); 1036 td->td_flags |= TDF_NEEDRESCHED; 1037 thread_unlock(td); 1038 break; 1039 case TDS_RUNNING: 1040 /* 1041 * If the thread is running, we request a context 1042 * switch for it by setting the TDF_NEEDRESCHED flag. 1043 */ 1044 thread_lock(td); 1045 td->td_flags |= TDF_NEEDRESCHED; 1046#ifdef SMP 1047 cpuid = td->td_oncpu; 1048 if ((cpuid != NOCPU) && (td != curthread)) 1049 ipi_cpu(cpuid, IPI_AST); 1050#endif 1051 thread_unlock(td); 1052 break; 1053 default: 1054 break; 1055 } 1056 } 1057} 1058 1059static void 1060racct_proc_wakeup(struct proc *p) 1061{ 1062 PROC_LOCK_ASSERT(p, MA_OWNED); 1063 1064 if (p->p_throttled) { 1065 p->p_throttled = 0; 1066 wakeup(p->p_racct); 1067 } 1068} 1069 1070static void 1071racct_decay_resource(struct racct *racct, void * res, void* dummy) 1072{ 1073 int resource; 1074 int64_t r_old, r_new; 1075 1076 resource = *(int *)res; 1077 r_old = racct->r_resources[resource]; 1078 1079 /* If there is nothing to decay, just exit. */ 1080 if (r_old <= 0) 1081 return; 1082 1083 mtx_lock(&racct_lock); 1084 r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; 1085 racct->r_resources[resource] = r_new; 1086 mtx_unlock(&racct_lock); 1087} 1088 1089static void 1090racct_decay(int resource) 1091{ 1092 ui_racct_foreach(racct_decay_resource, &resource, NULL); 1093 loginclass_racct_foreach(racct_decay_resource, &resource, NULL); 1094 prison_racct_foreach(racct_decay_resource, &resource, NULL); 1095} 1096 1097static void |
|
695racctd(void) 696{ 697 struct thread *td; 698 struct proc *p; 699 struct timeval wallclock; 700 uint64_t runtime; | 1098racctd(void) 1099{ 1100 struct thread *td; 1101 struct proc *p; 1102 struct timeval wallclock; 1103 uint64_t runtime; |
1104 uint64_t pct, pct_estimate; |
|
701 702 for (;;) { | 1105 1106 for (;;) { |
1107 racct_decay(RACCT_PCTCPU); 1108 |
|
703 sx_slock(&allproc_lock); 704 | 1109 sx_slock(&allproc_lock); 1110 |
1111 LIST_FOREACH(p, &zombproc, p_list) { 1112 PROC_LOCK(p); 1113 racct_set(p, RACCT_PCTCPU, 0); 1114 PROC_UNLOCK(p); 1115 } 1116 |
|
705 FOREACH_PROC_IN_SYSTEM(p) { | 1117 FOREACH_PROC_IN_SYSTEM(p) { |
706 if (p->p_state != PRS_NORMAL) | 1118 PROC_LOCK(p); 1119 if (p->p_state != PRS_NORMAL) { 1120 PROC_UNLOCK(p); |
707 continue; | 1121 continue; |
1122 } |
|
708 709 microuptime(&wallclock); 710 timevalsub(&wallclock, &p->p_stats->p_start); | 1123 1124 microuptime(&wallclock); 1125 timevalsub(&wallclock, &p->p_stats->p_start); |
711 PROC_LOCK(p); | |
712 PROC_SLOCK(p); 713 FOREACH_THREAD_IN_PROC(p, td) 714 ruxagg(p, td); 715 runtime = cputick2usec(p->p_rux.rux_runtime); 716 PROC_SUNLOCK(p); 717#ifdef notyet 718 KASSERT(runtime >= p->p_prev_runtime, 719 ("runtime < p_prev_runtime")); 720#else 721 if (runtime < p->p_prev_runtime) 722 runtime = p->p_prev_runtime; 723#endif 724 p->p_prev_runtime = runtime; | 1126 PROC_SLOCK(p); 1127 FOREACH_THREAD_IN_PROC(p, td) 1128 ruxagg(p, td); 1129 runtime = cputick2usec(p->p_rux.rux_runtime); 1130 PROC_SUNLOCK(p); 1131#ifdef notyet 1132 KASSERT(runtime >= p->p_prev_runtime, 1133 ("runtime < p_prev_runtime")); 1134#else 1135 if (runtime < p->p_prev_runtime) 1136 runtime = p->p_prev_runtime; 1137#endif 1138 p->p_prev_runtime = runtime; |
1139 pct_estimate = (1000000 * runtime * 100) / 1140 ((uint64_t)wallclock.tv_sec * 1000000 + 1141 wallclock.tv_usec); 1142 pct = racct_getpcpu(p, pct_estimate); |
|
725 mtx_lock(&racct_lock); | 1143 mtx_lock(&racct_lock); |
1144 racct_set_force_locked(p, RACCT_PCTCPU, pct); |
|
726 racct_set_locked(p, RACCT_CPU, runtime); 727 racct_set_locked(p, RACCT_WALLCLOCK, 728 (uint64_t)wallclock.tv_sec * 1000000 + 729 wallclock.tv_usec); 730 mtx_unlock(&racct_lock); 731 PROC_UNLOCK(p); 732 } | 1145 racct_set_locked(p, RACCT_CPU, runtime); 1146 racct_set_locked(p, RACCT_WALLCLOCK, 1147 (uint64_t)wallclock.tv_sec * 1000000 + 1148 wallclock.tv_usec); 1149 mtx_unlock(&racct_lock); 1150 PROC_UNLOCK(p); 1151 } |
1152 1153 /* 1154 * To ensure that processes are throttled in a fair way, we need 1155 * to iterate over all processes again and check the limits 1156 * for %cpu resource only after ucred racct containers have been 1157 * properly filled. 1158 */ 1159 FOREACH_PROC_IN_SYSTEM(p) { 1160 PROC_LOCK(p); 1161 if (p->p_state != PRS_NORMAL) { 1162 PROC_UNLOCK(p); 1163 continue; 1164 } 1165 1166 if (racct_pcpu_available(p) <= 0) 1167 racct_proc_throttle(p); 1168 else if (p->p_throttled) 1169 racct_proc_wakeup(p); 1170 PROC_UNLOCK(p); 1171 } |
|
733 sx_sunlock(&allproc_lock); 734 pause("-", hz); 735 } 736} 737 738static struct kproc_desc racctd_kp = { 739 "racctd", 740 racctd, --- 102 unchanged lines hidden --- | 1172 sx_sunlock(&allproc_lock); 1173 pause("-", hz); 1174 } 1175} 1176 1177static struct kproc_desc racctd_kp = { 1178 "racctd", 1179 racctd, --- 102 unchanged lines hidden --- |