kern_racct.c revision 225981
1220137Strasz/*- 2220137Strasz * Copyright (c) 2010 The FreeBSD Foundation 3220137Strasz * All rights reserved. 4220137Strasz * 5220137Strasz * This software was developed by Edward Tomasz Napierala under sponsorship 6220137Strasz * from the FreeBSD Foundation. 7220137Strasz * 8220137Strasz * Redistribution and use in source and binary forms, with or without 9220137Strasz * modification, are permitted provided that the following conditions 10220137Strasz * are met: 11220137Strasz * 1. Redistributions of source code must retain the above copyright 12220137Strasz * notice, this list of conditions and the following disclaimer. 13220137Strasz * 2. Redistributions in binary form must reproduce the above copyright 14220137Strasz * notice, this list of conditions and the following disclaimer in the 15220137Strasz * documentation and/or other materials provided with the distribution. 16220137Strasz * 17220137Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18220137Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19220137Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20220137Strasz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21220137Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22220137Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23220137Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24220137Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25220137Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26220137Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27220137Strasz * SUCH DAMAGE. 28220137Strasz * 29220137Strasz * $FreeBSD: head/sys/kern/kern_racct.c 225981 2011-10-04 14:56:33Z trasz $ 30220137Strasz */ 31220137Strasz 32220137Strasz#include <sys/cdefs.h> 33220137Strasz__FBSDID("$FreeBSD: head/sys/kern/kern_racct.c 225981 2011-10-04 14:56:33Z trasz $"); 34220137Strasz 35220137Strasz#include "opt_kdtrace.h" 36220137Strasz 37220137Strasz#include <sys/param.h> 38220137Strasz#include <sys/eventhandler.h> 39220137Strasz#include <sys/param.h> 40220137Strasz#include <sys/jail.h> 41220137Strasz#include <sys/kernel.h> 42220137Strasz#include <sys/kthread.h> 43220137Strasz#include <sys/lock.h> 44220137Strasz#include <sys/loginclass.h> 45220137Strasz#include <sys/malloc.h> 46220137Strasz#include <sys/mutex.h> 47220137Strasz#include <sys/proc.h> 48220137Strasz#include <sys/racct.h> 49220137Strasz#include <sys/resourcevar.h> 50220137Strasz#include <sys/sbuf.h> 51220137Strasz#include <sys/sched.h> 52220137Strasz#include <sys/sdt.h> 53220137Strasz#include <sys/sx.h> 54220137Strasz#include <sys/sysent.h> 55220137Strasz#include <sys/sysproto.h> 56220137Strasz#include <sys/systm.h> 57220137Strasz#include <sys/umtx.h> 58220137Strasz 59220137Strasz#ifdef RCTL 60220137Strasz#include <sys/rctl.h> 61220137Strasz#endif 62220137Strasz 63220137Strasz#ifdef RACCT 64220137Strasz 65220137StraszFEATURE(racct, "Resource Accounting"); 66220137Strasz 67220137Straszstatic struct mtx racct_lock; 68220137StraszMTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); 69220137Strasz 70220137Straszstatic uma_zone_t racct_zone; 71220137Strasz 72220137Straszstatic void racct_sub_racct(struct racct *dest, const struct racct *src); 73220137Straszstatic void racct_sub_cred_locked(struct ucred *cred, int resource, 74220137Strasz uint64_t amount); 75220137Straszstatic void racct_add_cred_locked(struct ucred *cred, int resource, 76220137Strasz uint64_t amount); 77220137Strasz 78220137StraszSDT_PROVIDER_DEFINE(racct); 79220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int", 80220137Strasz "uint64_t"); 81220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure, 82220137Strasz "struct proc *", "int", "uint64_t"); 83220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *", 84220137Strasz "int", "uint64_t"); 85220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *", 86220137Strasz "int", "uint64_t"); 87220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int", 88220137Strasz "uint64_t"); 89220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure, 90220137Strasz "struct proc *", "int", "uint64_t"); 91220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int", 92220137Strasz "uint64_t"); 93220137StraszSDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *", 94220137Strasz "int", "uint64_t"); 95220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *"); 96220137StraszSDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *"); 97220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *", 98220137Strasz "struct racct *"); 99220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure, 100220137Strasz "struct racct *", "struct racct *"); 101220137StraszSDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *", 102220137Strasz "struct racct *"); 103220137Strasz 104220137Straszint racct_types[] = { 105220137Strasz [RACCT_CPU] = 106224036Strasz RACCT_IN_MILLIONS, 107220137Strasz [RACCT_DATA] = 108220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 109220137Strasz [RACCT_STACK] = 110220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 111220137Strasz [RACCT_CORE] = 112220137Strasz RACCT_DENIABLE, 113220137Strasz [RACCT_RSS] = 114220137Strasz RACCT_RECLAIMABLE, 115220137Strasz [RACCT_MEMLOCK] = 116220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 117220137Strasz [RACCT_NPROC] = 118220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 119220137Strasz [RACCT_NOFILE] = 120220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 121220137Strasz [RACCT_VMEM] = 122220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 123220137Strasz [RACCT_NPTS] = 124220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 125220137Strasz [RACCT_SWAP] = 126220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 127220137Strasz [RACCT_NTHR] = 128220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE, 129220137Strasz [RACCT_MSGQQUEUED] = 130220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 131220137Strasz [RACCT_MSGQSIZE] = 132220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 133220137Strasz [RACCT_NMSGQ] = 134220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 135220137Strasz [RACCT_NSEM] = 136220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 137220137Strasz [RACCT_NSEMOP] = 138220137Strasz RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, 139220137Strasz [RACCT_NSHM] = 140220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 141220137Strasz [RACCT_SHMSIZE] = 142220137Strasz RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, 143220137Strasz [RACCT_WALLCLOCK] = 144224036Strasz RACCT_IN_MILLIONS }; 145220137Strasz 146220137Straszstatic void 147220137Straszracct_add_racct(struct racct *dest, const struct racct *src) 148220137Strasz{ 149220137Strasz int i; 150220137Strasz 151220137Strasz mtx_assert(&racct_lock, MA_OWNED); 152220137Strasz 153220137Strasz /* 154220137Strasz * Update resource usage in dest. 155220137Strasz */ 156220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 157220137Strasz KASSERT(dest->r_resources[i] >= 0, 158220137Strasz ("racct propagation meltdown: dest < 0")); 159220137Strasz KASSERT(src->r_resources[i] >= 0, 160220137Strasz ("racct propagation meltdown: src < 0")); 161220137Strasz dest->r_resources[i] += src->r_resources[i]; 162220137Strasz } 163220137Strasz} 164220137Strasz 165220137Straszstatic void 166220137Straszracct_sub_racct(struct racct *dest, const struct racct *src) 167220137Strasz{ 168220137Strasz int i; 169220137Strasz 170220137Strasz mtx_assert(&racct_lock, MA_OWNED); 171220137Strasz 172220137Strasz /* 173220137Strasz * Update resource usage in dest. 174220137Strasz */ 175220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 176223844Strasz if (!RACCT_IS_SLOPPY(i)) { 177220137Strasz KASSERT(dest->r_resources[i] >= 0, 178220137Strasz ("racct propagation meltdown: dest < 0")); 179220137Strasz KASSERT(src->r_resources[i] >= 0, 180220137Strasz ("racct propagation meltdown: src < 0")); 181220137Strasz KASSERT(src->r_resources[i] <= dest->r_resources[i], 182220137Strasz ("racct propagation meltdown: src > dest")); 183220137Strasz } 184223844Strasz if (RACCT_IS_RECLAIMABLE(i)) { 185220137Strasz dest->r_resources[i] -= src->r_resources[i]; 186220137Strasz if (dest->r_resources[i] < 0) { 187223844Strasz KASSERT(RACCT_IS_SLOPPY(i), 188220137Strasz ("racct_sub_racct: usage < 0")); 189220137Strasz dest->r_resources[i] = 0; 190220137Strasz } 191220137Strasz } 192220137Strasz } 193220137Strasz} 194220137Strasz 195220137Straszvoid 196220137Straszracct_create(struct racct **racctp) 197220137Strasz{ 198220137Strasz 199220137Strasz SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0); 200220137Strasz 201220137Strasz KASSERT(*racctp == NULL, ("racct already allocated")); 202220137Strasz 203220137Strasz *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); 204220137Strasz} 205220137Strasz 206220137Straszstatic void 207220137Straszracct_destroy_locked(struct racct **racctp) 208220137Strasz{ 209220137Strasz int i; 210220137Strasz struct racct *racct; 211220137Strasz 212220137Strasz SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0); 213220137Strasz 214220137Strasz mtx_assert(&racct_lock, MA_OWNED); 215220137Strasz KASSERT(racctp != NULL, ("NULL racctp")); 216220137Strasz KASSERT(*racctp != NULL, ("NULL racct")); 217220137Strasz 218220137Strasz racct = *racctp; 219220137Strasz 220220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 221223844Strasz if (RACCT_IS_SLOPPY(i)) 222220137Strasz continue; 223223844Strasz if (!RACCT_IS_RECLAIMABLE(i)) 224220137Strasz continue; 225220137Strasz KASSERT(racct->r_resources[i] == 0, 226220137Strasz ("destroying non-empty racct: " 227220137Strasz "%ju allocated for resource %d\n", 228220137Strasz racct->r_resources[i], i)); 229220137Strasz } 230220137Strasz uma_zfree(racct_zone, racct); 231220137Strasz *racctp = NULL; 232220137Strasz} 233220137Strasz 234220137Straszvoid 235220137Straszracct_destroy(struct racct **racct) 236220137Strasz{ 237220137Strasz 238220137Strasz mtx_lock(&racct_lock); 239220137Strasz racct_destroy_locked(racct); 240220137Strasz mtx_unlock(&racct_lock); 241220137Strasz} 242220137Strasz 243220137Strasz/* 244220137Strasz * Increase consumption of 'resource' by 'amount' for 'racct' 245220137Strasz * and all its parents. Differently from other cases, 'amount' here 246220137Strasz * may be less than zero. 247220137Strasz */ 248220137Straszstatic void 249220137Straszracct_alloc_resource(struct racct *racct, int resource, 250220137Strasz uint64_t amount) 251220137Strasz{ 252220137Strasz 253220137Strasz mtx_assert(&racct_lock, MA_OWNED); 254220137Strasz KASSERT(racct != NULL, ("NULL racct")); 255220137Strasz 256220137Strasz racct->r_resources[resource] += amount; 257220137Strasz if (racct->r_resources[resource] < 0) { 258223844Strasz KASSERT(RACCT_IS_SLOPPY(resource), 259220137Strasz ("racct_alloc_resource: usage < 0")); 260220137Strasz racct->r_resources[resource] = 0; 261220137Strasz } 262220137Strasz} 263220137Strasz 264225944Straszstatic int 265225944Straszracct_add_locked(struct proc *p, int resource, uint64_t amount) 266220137Strasz{ 267220137Strasz#ifdef RCTL 268220137Strasz int error; 269220137Strasz#endif 270220137Strasz 271220137Strasz if (p->p_flag & P_SYSTEM) 272220137Strasz return (0); 273220137Strasz 274220137Strasz SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0); 275220137Strasz 276220137Strasz /* 277220137Strasz * We need proc lock to dereference p->p_ucred. 278220137Strasz */ 279220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 280220137Strasz 281220137Strasz#ifdef RCTL 282220137Strasz error = rctl_enforce(p, resource, amount); 283223844Strasz if (error && RACCT_IS_DENIABLE(resource)) { 284220137Strasz SDT_PROBE(racct, kernel, rusage, add_failure, p, resource, 285220137Strasz amount, 0, 0); 286220137Strasz return (error); 287220137Strasz } 288220137Strasz#endif 289220137Strasz racct_alloc_resource(p->p_racct, resource, amount); 290220137Strasz racct_add_cred_locked(p->p_ucred, resource, amount); 291220137Strasz 292220137Strasz return (0); 293220137Strasz} 294220137Strasz 295225944Strasz/* 296225944Strasz * Increase allocation of 'resource' by 'amount' for process 'p'. 297225944Strasz * Return 0 if it's below limits, or errno, if it's not. 298225944Strasz */ 299225944Straszint 300225944Straszracct_add(struct proc *p, int resource, uint64_t amount) 301225944Strasz{ 302225944Strasz int error; 303225944Strasz 304225944Strasz mtx_lock(&racct_lock); 305225944Strasz error = racct_add_locked(p, resource, amount); 306225944Strasz mtx_unlock(&racct_lock); 307225944Strasz return (error); 308225944Strasz} 309225944Strasz 310220137Straszstatic void 311220137Straszracct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) 312220137Strasz{ 313220137Strasz struct prison *pr; 314220137Strasz 315220137Strasz SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount, 316220137Strasz 0, 0); 317220137Strasz 318220137Strasz racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount); 319220137Strasz for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 320221362Strasz racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 321221362Strasz amount); 322220137Strasz racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount); 323220137Strasz} 324220137Strasz 325220137Strasz/* 326220137Strasz * Increase allocation of 'resource' by 'amount' for credential 'cred'. 327220137Strasz * Doesn't check for limits and never fails. 328220137Strasz * 329220137Strasz * XXX: Shouldn't this ever return an error? 330220137Strasz */ 331220137Straszvoid 332220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount) 333220137Strasz{ 334220137Strasz 335220137Strasz mtx_lock(&racct_lock); 336220137Strasz racct_add_cred_locked(cred, resource, amount); 337220137Strasz mtx_unlock(&racct_lock); 338220137Strasz} 339220137Strasz 340220137Strasz/* 341220137Strasz * Increase allocation of 'resource' by 'amount' for process 'p'. 342220137Strasz * Doesn't check for limits and never fails. 343220137Strasz */ 344220137Straszvoid 345220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount) 346220137Strasz{ 347220137Strasz 348220137Strasz if (p->p_flag & P_SYSTEM) 349220137Strasz return; 350220137Strasz 351220137Strasz SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0); 352220137Strasz 353220137Strasz /* 354220137Strasz * We need proc lock to dereference p->p_ucred. 355220137Strasz */ 356220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 357220137Strasz 358220137Strasz mtx_lock(&racct_lock); 359220137Strasz racct_alloc_resource(p->p_racct, resource, amount); 360220137Strasz mtx_unlock(&racct_lock); 361220137Strasz racct_add_cred(p->p_ucred, resource, amount); 362220137Strasz} 363220137Strasz 364220137Straszstatic int 365220137Straszracct_set_locked(struct proc *p, int resource, uint64_t amount) 366220137Strasz{ 367220137Strasz int64_t diff; 368220137Strasz#ifdef RCTL 369220137Strasz int error; 370220137Strasz#endif 371220137Strasz 372220137Strasz if (p->p_flag & P_SYSTEM) 373220137Strasz return (0); 374220137Strasz 375220137Strasz SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 376220137Strasz 377220137Strasz /* 378220137Strasz * We need proc lock to dereference p->p_ucred. 379220137Strasz */ 380220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 381220137Strasz 382220137Strasz diff = amount - p->p_racct->r_resources[resource]; 383220137Strasz#ifdef notyet 384223844Strasz KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource), 385220137Strasz ("racct_set: usage of non-reclaimable resource %d dropping", 386220137Strasz resource)); 387220137Strasz#endif 388220137Strasz#ifdef RCTL 389220137Strasz if (diff > 0) { 390220137Strasz error = rctl_enforce(p, resource, diff); 391223844Strasz if (error && RACCT_IS_DENIABLE(resource)) { 392220137Strasz SDT_PROBE(racct, kernel, rusage, set_failure, p, 393220137Strasz resource, amount, 0, 0); 394220137Strasz return (error); 395220137Strasz } 396220137Strasz } 397220137Strasz#endif 398220137Strasz racct_alloc_resource(p->p_racct, resource, diff); 399220137Strasz if (diff > 0) 400220137Strasz racct_add_cred_locked(p->p_ucred, resource, diff); 401220137Strasz else if (diff < 0) 402220137Strasz racct_sub_cred_locked(p->p_ucred, resource, -diff); 403220137Strasz 404220137Strasz return (0); 405220137Strasz} 406220137Strasz 407220137Strasz/* 408220137Strasz * Set allocation of 'resource' to 'amount' for process 'p'. 409220137Strasz * Return 0 if it's below limits, or errno, if it's not. 410220137Strasz * 411220137Strasz * Note that decreasing the allocation always returns 0, 412220137Strasz * even if it's above the limit. 413220137Strasz */ 414220137Straszint 415220137Straszracct_set(struct proc *p, int resource, uint64_t amount) 416220137Strasz{ 417220137Strasz int error; 418220137Strasz 419220137Strasz mtx_lock(&racct_lock); 420220137Strasz error = racct_set_locked(p, resource, amount); 421220137Strasz mtx_unlock(&racct_lock); 422220137Strasz return (error); 423220137Strasz} 424220137Strasz 425220137Straszvoid 426220137Straszracct_set_force(struct proc *p, int resource, uint64_t amount) 427220137Strasz{ 428220137Strasz int64_t diff; 429220137Strasz 430220137Strasz if (p->p_flag & P_SYSTEM) 431220137Strasz return; 432220137Strasz 433220137Strasz SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); 434220137Strasz 435220137Strasz /* 436220137Strasz * We need proc lock to dereference p->p_ucred. 437220137Strasz */ 438220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 439220137Strasz 440220137Strasz mtx_lock(&racct_lock); 441220137Strasz diff = amount - p->p_racct->r_resources[resource]; 442220137Strasz racct_alloc_resource(p->p_racct, resource, diff); 443220137Strasz if (diff > 0) 444220137Strasz racct_add_cred_locked(p->p_ucred, resource, diff); 445220137Strasz else if (diff < 0) 446220137Strasz racct_sub_cred_locked(p->p_ucred, resource, -diff); 447220137Strasz mtx_unlock(&racct_lock); 448220137Strasz} 449220137Strasz 450220137Strasz/* 451220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated. 452220137Strasz * Allocating more than that would be denied, unless the resource 453220137Strasz * is marked undeniable. Amount of already allocated resource does 454220137Strasz * not matter. 455220137Strasz */ 456220137Straszuint64_t 457220137Straszracct_get_limit(struct proc *p, int resource) 458220137Strasz{ 459220137Strasz 460220137Strasz#ifdef RCTL 461220137Strasz return (rctl_get_limit(p, resource)); 462220137Strasz#else 463220137Strasz return (UINT64_MAX); 464220137Strasz#endif 465220137Strasz} 466220137Strasz 467220137Strasz/* 468220137Strasz * Returns amount of 'resource' the process 'p' can keep allocated. 469220137Strasz * Allocating more than that would be denied, unless the resource 470220137Strasz * is marked undeniable. Amount of already allocated resource does 471220137Strasz * matter. 472220137Strasz */ 473220137Straszuint64_t 474220137Straszracct_get_available(struct proc *p, int resource) 475220137Strasz{ 476220137Strasz 477220137Strasz#ifdef RCTL 478220137Strasz return (rctl_get_available(p, resource)); 479220137Strasz#else 480220137Strasz return (UINT64_MAX); 481220137Strasz#endif 482220137Strasz} 483220137Strasz 484220137Strasz/* 485220137Strasz * Decrease allocation of 'resource' by 'amount' for process 'p'. 486220137Strasz */ 487220137Straszvoid 488220137Straszracct_sub(struct proc *p, int resource, uint64_t amount) 489220137Strasz{ 490220137Strasz 491220137Strasz if (p->p_flag & P_SYSTEM) 492220137Strasz return; 493220137Strasz 494220137Strasz SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); 495220137Strasz 496220137Strasz /* 497220137Strasz * We need proc lock to dereference p->p_ucred. 498220137Strasz */ 499220137Strasz PROC_LOCK_ASSERT(p, MA_OWNED); 500223844Strasz KASSERT(RACCT_IS_RECLAIMABLE(resource), 501220137Strasz ("racct_sub: called for non-reclaimable resource %d", resource)); 502220137Strasz 503220137Strasz mtx_lock(&racct_lock); 504220137Strasz KASSERT(amount <= p->p_racct->r_resources[resource], 505220137Strasz ("racct_sub: freeing %ju of resource %d, which is more " 506220137Strasz "than allocated %jd for %s (pid %d)", amount, resource, 507220137Strasz (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); 508220137Strasz 509220137Strasz racct_alloc_resource(p->p_racct, resource, -amount); 510220137Strasz racct_sub_cred_locked(p->p_ucred, resource, amount); 511220137Strasz mtx_unlock(&racct_lock); 512220137Strasz} 513220137Strasz 514220137Straszstatic void 515220137Straszracct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) 516220137Strasz{ 517220137Strasz struct prison *pr; 518220137Strasz 519220137Strasz SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount, 520220137Strasz 0, 0); 521220137Strasz 522220137Strasz#ifdef notyet 523223844Strasz KASSERT(RACCT_IS_RECLAIMABLE(resource), 524220137Strasz ("racct_sub_cred: called for non-reclaimable resource %d", 525220137Strasz resource)); 526220137Strasz#endif 527220137Strasz 528220137Strasz racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); 529220137Strasz for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) 530221362Strasz racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource, 531221362Strasz -amount); 532220137Strasz racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount); 533220137Strasz} 534220137Strasz 535220137Strasz/* 536220137Strasz * Decrease allocation of 'resource' by 'amount' for credential 'cred'. 537220137Strasz */ 538220137Straszvoid 539220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 540220137Strasz{ 541220137Strasz 542220137Strasz mtx_lock(&racct_lock); 543220137Strasz racct_sub_cred_locked(cred, resource, amount); 544220137Strasz mtx_unlock(&racct_lock); 545220137Strasz} 546220137Strasz 547220137Strasz/* 548220137Strasz * Inherit resource usage information from the parent process. 549220137Strasz */ 550220137Straszint 551220137Straszracct_proc_fork(struct proc *parent, struct proc *child) 552220137Strasz{ 553220137Strasz int i, error = 0; 554220137Strasz 555220137Strasz /* 556220137Strasz * Create racct for the child process. 557220137Strasz */ 558220137Strasz racct_create(&child->p_racct); 559220137Strasz 560220137Strasz /* 561220137Strasz * No resource accounting for kernel processes. 562220137Strasz */ 563220137Strasz if (child->p_flag & P_SYSTEM) 564220137Strasz return (0); 565220137Strasz 566220137Strasz PROC_LOCK(parent); 567220137Strasz PROC_LOCK(child); 568220137Strasz mtx_lock(&racct_lock); 569220137Strasz 570225981Strasz#ifdef RCTL 571225981Strasz error = rctl_proc_fork(parent, child); 572225981Strasz if (error != 0) 573225981Strasz goto out; 574225981Strasz#endif 575225981Strasz 576220137Strasz /* 577220137Strasz * Inherit resource usage. 578220137Strasz */ 579220137Strasz for (i = 0; i <= RACCT_MAX; i++) { 580220137Strasz if (parent->p_racct->r_resources[i] == 0 || 581223844Strasz !RACCT_IS_INHERITABLE(i)) 582220137Strasz continue; 583220137Strasz 584220137Strasz error = racct_set_locked(child, i, 585220137Strasz parent->p_racct->r_resources[i]); 586225938Strasz if (error != 0) 587220137Strasz goto out; 588220137Strasz } 589220137Strasz 590225944Strasz error = racct_add_locked(child, RACCT_NPROC, 1); 591225944Strasz error += racct_add_locked(child, RACCT_NTHR, 1); 592225944Strasz 593220137Straszout: 594220137Strasz mtx_unlock(&racct_lock); 595220137Strasz PROC_UNLOCK(child); 596220137Strasz PROC_UNLOCK(parent); 597220137Strasz 598220137Strasz return (error); 599220137Strasz} 600220137Strasz 601225940Strasz/* 602225940Strasz * Called at the end of fork1(), to handle rules that require the process 603225940Strasz * to be fully initialized. 604225940Strasz */ 605220137Straszvoid 606225940Straszracct_proc_fork_done(struct proc *child) 607225940Strasz{ 608225940Strasz 609225940Strasz#ifdef RCTL 610225940Strasz PROC_LOCK(child); 611225940Strasz mtx_lock(&racct_lock); 612225940Strasz rctl_enforce(child, RACCT_NPROC, 0); 613225940Strasz rctl_enforce(child, RACCT_NTHR, 0); 614225940Strasz mtx_unlock(&racct_lock); 615225940Strasz PROC_UNLOCK(child); 616225940Strasz#endif 617225940Strasz} 618225940Strasz 619225940Straszvoid 620220137Straszracct_proc_exit(struct proc *p) 621220137Strasz{ 622225364Strasz int i; 623220137Strasz uint64_t runtime; 624220137Strasz 625220137Strasz PROC_LOCK(p); 626220137Strasz /* 627220137Strasz * We don't need to calculate rux, proc_reap() has already done this. 628220137Strasz */ 629220137Strasz runtime = cputick2usec(p->p_rux.rux_runtime); 630220137Strasz#ifdef notyet 631220137Strasz KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); 632220137Strasz#else 633220137Strasz if (runtime < p->p_prev_runtime) 634220137Strasz runtime = p->p_prev_runtime; 635220137Strasz#endif 636225364Strasz mtx_lock(&racct_lock); 637225364Strasz racct_set_locked(p, RACCT_CPU, runtime); 638220137Strasz 639225364Strasz for (i = 0; i <= RACCT_MAX; i++) { 640225364Strasz if (p->p_racct->r_resources[i] == 0) 641225364Strasz continue; 642225364Strasz if (!RACCT_IS_RECLAIMABLE(i)) 643225364Strasz continue; 644225364Strasz racct_set_locked(p, i, 0); 645225364Strasz } 646225364Strasz 647225364Strasz mtx_unlock(&racct_lock); 648220137Strasz PROC_UNLOCK(p); 649220137Strasz 650220137Strasz#ifdef RCTL 651220137Strasz rctl_racct_release(p->p_racct); 652220137Strasz#endif 653220137Strasz racct_destroy(&p->p_racct); 654220137Strasz} 655220137Strasz 656220137Strasz/* 657220137Strasz * Called after credentials change, to move resource utilisation 658220137Strasz * between raccts. 659220137Strasz */ 660220137Straszvoid 661220137Straszracct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, 662220137Strasz struct ucred *newcred) 663220137Strasz{ 664220137Strasz struct uidinfo *olduip, *newuip; 665220137Strasz struct loginclass *oldlc, *newlc; 666220137Strasz struct prison *oldpr, *newpr, *pr; 667220137Strasz 668220137Strasz PROC_LOCK_ASSERT(p, MA_NOTOWNED); 669220137Strasz 670220137Strasz newuip = newcred->cr_ruidinfo; 671220137Strasz olduip = oldcred->cr_ruidinfo; 672220137Strasz newlc = newcred->cr_loginclass; 673220137Strasz oldlc = oldcred->cr_loginclass; 674220137Strasz newpr = newcred->cr_prison; 675220137Strasz oldpr = oldcred->cr_prison; 676220137Strasz 677220137Strasz mtx_lock(&racct_lock); 678220137Strasz if (newuip != olduip) { 679220137Strasz racct_sub_racct(olduip->ui_racct, p->p_racct); 680220137Strasz racct_add_racct(newuip->ui_racct, p->p_racct); 681220137Strasz } 682220137Strasz if (newlc != oldlc) { 683220137Strasz racct_sub_racct(oldlc->lc_racct, p->p_racct); 684220137Strasz racct_add_racct(newlc->lc_racct, p->p_racct); 685220137Strasz } 686220137Strasz if (newpr != oldpr) { 687220137Strasz for (pr = oldpr; pr != NULL; pr = pr->pr_parent) 688221362Strasz racct_sub_racct(pr->pr_prison_racct->prr_racct, 689221362Strasz p->p_racct); 690220137Strasz for (pr = newpr; pr != NULL; pr = pr->pr_parent) 691221362Strasz racct_add_racct(pr->pr_prison_racct->prr_racct, 692221362Strasz p->p_racct); 693220137Strasz } 694220137Strasz mtx_unlock(&racct_lock); 695220137Strasz 696220137Strasz#ifdef RCTL 697220137Strasz rctl_proc_ucred_changed(p, newcred); 698220137Strasz#endif 699220137Strasz} 700220137Strasz 701220137Straszstatic void 702220137Straszracctd(void) 703220137Strasz{ 704220137Strasz struct thread *td; 705220137Strasz struct proc *p; 706220137Strasz struct timeval wallclock; 707220137Strasz uint64_t runtime; 708220137Strasz 709220137Strasz for (;;) { 710220137Strasz sx_slock(&allproc_lock); 711220137Strasz 712220137Strasz FOREACH_PROC_IN_SYSTEM(p) { 713220137Strasz if (p->p_state != PRS_NORMAL) 714220137Strasz continue; 715220137Strasz if (p->p_flag & P_SYSTEM) 716220137Strasz continue; 717220137Strasz 718220137Strasz microuptime(&wallclock); 719220137Strasz timevalsub(&wallclock, &p->p_stats->p_start); 720220137Strasz PROC_LOCK(p); 721220137Strasz PROC_SLOCK(p); 722220137Strasz FOREACH_THREAD_IN_PROC(p, td) { 723220137Strasz ruxagg(p, td); 724220137Strasz thread_lock(td); 725220137Strasz thread_unlock(td); 726220137Strasz } 727220137Strasz runtime = cputick2usec(p->p_rux.rux_runtime); 728220137Strasz PROC_SUNLOCK(p); 729220137Strasz#ifdef notyet 730220137Strasz KASSERT(runtime >= p->p_prev_runtime, 731220137Strasz ("runtime < p_prev_runtime")); 732220137Strasz#else 733220137Strasz if (runtime < p->p_prev_runtime) 734220137Strasz runtime = p->p_prev_runtime; 735220137Strasz#endif 736220137Strasz p->p_prev_runtime = runtime; 737220137Strasz mtx_lock(&racct_lock); 738220137Strasz racct_set_locked(p, RACCT_CPU, runtime); 739220137Strasz racct_set_locked(p, RACCT_WALLCLOCK, 740220137Strasz wallclock.tv_sec * 1000000 + wallclock.tv_usec); 741220137Strasz mtx_unlock(&racct_lock); 742220137Strasz PROC_UNLOCK(p); 743220137Strasz } 744220137Strasz sx_sunlock(&allproc_lock); 745220137Strasz pause("-", hz); 746220137Strasz } 747220137Strasz} 748220137Strasz 749220137Straszstatic struct kproc_desc racctd_kp = { 750220137Strasz "racctd", 751220137Strasz racctd, 752220137Strasz NULL 753220137Strasz}; 754220137StraszSYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp); 755220137Strasz 756220137Straszstatic void 757220137Straszracct_init(void) 758220137Strasz{ 759220137Strasz 760220137Strasz racct_zone = uma_zcreate("racct", sizeof(struct racct), 761220137Strasz NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 762220137Strasz /* 763220137Strasz * XXX: Move this somewhere. 764220137Strasz */ 765221362Strasz prison0.pr_prison_racct = prison_racct_find("0"); 766220137Strasz} 767220137StraszSYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); 768220137Strasz 769220137Strasz#else /* !RACCT */ 770220137Strasz 771220137Straszint 772220137Straszracct_add(struct proc *p, int resource, uint64_t amount) 773220137Strasz{ 774220137Strasz 775220137Strasz return (0); 776220137Strasz} 777220137Strasz 778220137Straszvoid 779220137Straszracct_add_cred(struct ucred *cred, int resource, uint64_t amount) 780220137Strasz{ 781220137Strasz} 782220137Strasz 783220137Straszvoid 784220137Straszracct_add_force(struct proc *p, int resource, uint64_t amount) 785220137Strasz{ 786220137Strasz 787220137Strasz return; 788220137Strasz} 789220137Strasz 790220137Straszint 791220137Straszracct_set(struct proc *p, int resource, uint64_t amount) 792220137Strasz{ 793220137Strasz 794220137Strasz return (0); 795220137Strasz} 796220137Strasz 797220137Straszvoid 798220372Straszracct_set_force(struct proc *p, int resource, uint64_t amount) 799220372Strasz{ 800220372Strasz} 801220372Strasz 802220372Straszvoid 803220137Straszracct_sub(struct proc *p, int resource, uint64_t amount) 804220137Strasz{ 805220137Strasz} 806220137Strasz 807220137Straszvoid 808220137Straszracct_sub_cred(struct ucred *cred, int resource, uint64_t amount) 809220137Strasz{ 810220137Strasz} 811220137Strasz 812220137Straszuint64_t 813220137Straszracct_get_limit(struct proc *p, int resource) 814220137Strasz{ 815220137Strasz 816220137Strasz return (UINT64_MAX); 817220137Strasz} 818220137Strasz 819220372Straszuint64_t 820220372Straszracct_get_available(struct proc *p, int resource) 821220372Strasz{ 822220372Strasz 823220372Strasz return (UINT64_MAX); 824220372Strasz} 825220372Strasz 826220137Straszvoid 827220137Straszracct_create(struct racct **racctp) 828220137Strasz{ 829220137Strasz} 830220137Strasz 831220137Straszvoid 832220137Straszracct_destroy(struct racct **racctp) 833220137Strasz{ 834220137Strasz} 835220137Strasz 836220137Straszint 837220137Straszracct_proc_fork(struct proc *parent, struct proc *child) 838220137Strasz{ 839220137Strasz 840220137Strasz return (0); 841220137Strasz} 842220137Strasz 843220137Straszvoid 844225940Straszracct_proc_fork_done(struct proc *child) 845225940Strasz{ 846225940Strasz} 847225940Strasz 848225940Straszvoid 849220137Straszracct_proc_exit(struct proc *p) 850220137Strasz{ 851220137Strasz} 852220137Strasz 853220137Strasz#endif /* !RACCT */ 854