1176730Sjeff/*- 2176730Sjeff * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org> 3176730Sjeff * All rights reserved. 4177904Sjeff * 5177904Sjeff * Copyright (c) 2008 Nokia Corporation 6177904Sjeff * All rights reserved. 7176730Sjeff * 8176730Sjeff * Redistribution and use in source and binary forms, with or without 9176730Sjeff * modification, are permitted provided that the following conditions 10176730Sjeff * are met: 11176730Sjeff * 1. Redistributions of source code must retain the above copyright 12176730Sjeff * notice unmodified, this list of conditions, and the following 13176730Sjeff * disclaimer. 14176730Sjeff * 2. Redistributions in binary form must reproduce the above copyright 15176730Sjeff * notice, this list of conditions and the following disclaimer in the 16176730Sjeff * documentation and/or other materials provided with the distribution. 17176730Sjeff * 18176730Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19176730Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20176730Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21176730Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22176730Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23176730Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24176730Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25176730Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26176730Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27176730Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28176730Sjeff * 29176730Sjeff */ 30176730Sjeff 31176730Sjeff#include <sys/cdefs.h> 32176730Sjeff__FBSDID("$FreeBSD: stable/11/sys/kern/kern_cpuset.c 333338 2018-05-07 21:42:22Z shurd $"); 33176730Sjeff 34180358Sbz#include "opt_ddb.h" 35180358Sbz 36176730Sjeff#include <sys/param.h> 37176730Sjeff#include <sys/systm.h> 38176730Sjeff#include <sys/sysproto.h> 39192895Sjamie#include <sys/jail.h> 40176730Sjeff#include <sys/kernel.h> 41176730Sjeff#include <sys/lock.h> 42176730Sjeff#include <sys/malloc.h> 43176730Sjeff#include <sys/mutex.h> 44176730Sjeff#include <sys/priv.h> 45176730Sjeff#include <sys/proc.h> 46176730Sjeff#include <sys/refcount.h> 47176730Sjeff#include <sys/sched.h> 48176730Sjeff#include <sys/smp.h> 49176730Sjeff#include <sys/syscallsubr.h> 50319819Sallanjude#include <sys/capsicum.h> 51176730Sjeff#include <sys/cpuset.h> 52176730Sjeff#include <sys/sx.h> 53176730Sjeff#include <sys/queue.h> 54222813Sattilio#include <sys/libkern.h> 55176730Sjeff#include <sys/limits.h> 56177738Sjeff#include <sys/bus.h> 57177738Sjeff#include <sys/interrupt.h> 58176730Sjeff 59176730Sjeff#include <vm/uma.h> 60276829Sjhb#include <vm/vm.h> 61276829Sjhb#include <vm/vm_page.h> 62276829Sjhb#include <vm/vm_param.h> 63276829Sjhb#include <vm/vm_phys.h> 64176730Sjeff 65180358Sbz#ifdef DDB 66180358Sbz#include <ddb/ddb.h> 67180358Sbz#endif /* DDB */ 68180358Sbz 69176730Sjeff/* 70176730Sjeff * cpusets provide a mechanism for creating and manipulating sets of 71176730Sjeff * processors for the purpose of constraining the scheduling of threads to 72176730Sjeff * specific processors. 73176730Sjeff * 74176730Sjeff * Each process belongs to an identified set, by default this is set 1. Each 75176730Sjeff * thread may further restrict the cpus it may run on to a subset of this 76176730Sjeff * named set. This creates an anonymous set which other threads and processes 77176730Sjeff * may not join by number. 78176730Sjeff * 79176730Sjeff * The named set is referred to herein as the 'base' set to avoid ambiguity. 80176730Sjeff * This set is usually a child of a 'root' set while the anonymous set may 81176730Sjeff * simply be referred to as a mask. In the syscall api these are referred to 82176730Sjeff * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here. 83176730Sjeff * 84176730Sjeff * Threads inherit their set from their creator whether it be anonymous or 85176730Sjeff * not. This means that anonymous sets are immutable because they may be 86176730Sjeff * shared. To modify an anonymous set a new set is created with the desired 87176730Sjeff * mask and the same parent as the existing anonymous set. This gives the 88198493Sjhb * illusion of each thread having a private mask. 89176730Sjeff * 90176730Sjeff * Via the syscall apis a user may ask to retrieve or modify the root, base, 91176730Sjeff * or mask that is discovered via a pid, tid, or setid. Modifying a set 92176730Sjeff * modifies all numbered and anonymous child sets to comply with the new mask. 93176730Sjeff * Modifying a pid or tid's mask applies only to that tid but must still 94176730Sjeff * exist within the assigned parent set. 95176730Sjeff * 96198495Sjhb * A thread may not be assigned to a group separate from other threads in 97176730Sjeff * the process. This is to remove ambiguity when the setid is queried with 98176730Sjeff * a pid argument. There is no other technical limitation. 99176730Sjeff * 100176730Sjeff * This somewhat complex arrangement is intended to make it easy for 101176730Sjeff * applications to query available processors and bind their threads to 102176730Sjeff * specific processors while also allowing administrators to dynamically 103176730Sjeff * reprovision by changing sets which apply to groups of processes. 104176730Sjeff * 105176730Sjeff * A simple application should not concern itself with sets at all and 106176730Sjeff * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id 107198493Sjhb * meaning 'curthread'. It may query available cpus for that tid with a 108176730Sjeff * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). 109176730Sjeff */ 110176730Sjeffstatic uma_zone_t cpuset_zone; 111176730Sjeffstatic struct mtx cpuset_lock; 112176730Sjeffstatic struct setlist cpuset_ids; 113176730Sjeffstatic struct unrhdr *cpuset_unr; 114267716Smelifarostatic struct cpuset *cpuset_zero, *cpuset_default; 115176730Sjeff 116214611Sdavidxu/* Return the size of cpuset_t at the kernel level */ 117282906SjonathanSYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD, 118273377Shselasky SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)"); 119214611Sdavidxu 120177738Sjeffcpuset_t *cpuset_root; 121276829Sjhbcpuset_t cpuset_domain[MAXMEMDOM]; 122177738Sjeff 123176730Sjeff/* 124176730Sjeff * Acquire a reference to a cpuset, all pointers must be tracked with refs. 125176730Sjeff */ 126176730Sjeffstruct cpuset * 127176730Sjeffcpuset_ref(struct cpuset *set) 128176730Sjeff{ 129176730Sjeff 130176730Sjeff refcount_acquire(&set->cs_ref); 131176730Sjeff return (set); 132176730Sjeff} 133176730Sjeff 134176730Sjeff/* 135180356Sbz * Walks up the tree from 'set' to find the root. Returns the root 136180356Sbz * referenced. 137180356Sbz */ 138180356Sbzstatic struct cpuset * 139180356Sbzcpuset_refroot(struct cpuset *set) 140180356Sbz{ 141180356Sbz 142180356Sbz for (; set->cs_parent != NULL; set = set->cs_parent) 143180356Sbz if (set->cs_flags & CPU_SET_ROOT) 144180356Sbz break; 145180356Sbz cpuset_ref(set); 146180356Sbz 147180356Sbz return (set); 148180356Sbz} 149180356Sbz 150180356Sbz/* 151180356Sbz * Find the first non-anonymous set starting from 'set'. Returns this set 152180356Sbz * referenced. May return the passed in set with an extra ref if it is 153180356Sbz * not anonymous. 154180356Sbz */ 155180356Sbzstatic struct cpuset * 156180356Sbzcpuset_refbase(struct cpuset *set) 157180356Sbz{ 158180356Sbz 159180356Sbz if (set->cs_id == CPUSET_INVALID) 160180356Sbz set = set->cs_parent; 161180356Sbz cpuset_ref(set); 162180356Sbz 163180356Sbz return (set); 164180356Sbz} 165180356Sbz 166180356Sbz/* 167198493Sjhb * Release a reference in a context where it is safe to allocate. 168176730Sjeff */ 169176730Sjeffvoid 170176730Sjeffcpuset_rel(struct cpuset *set) 171176730Sjeff{ 172176730Sjeff cpusetid_t id; 173176730Sjeff 174176730Sjeff if (refcount_release(&set->cs_ref) == 0) 175176730Sjeff return; 176176730Sjeff mtx_lock_spin(&cpuset_lock); 177176730Sjeff LIST_REMOVE(set, cs_siblings); 178176730Sjeff id = set->cs_id; 179176730Sjeff if (id != CPUSET_INVALID) 180176730Sjeff LIST_REMOVE(set, cs_link); 181176730Sjeff mtx_unlock_spin(&cpuset_lock); 182176730Sjeff cpuset_rel(set->cs_parent); 183176730Sjeff uma_zfree(cpuset_zone, set); 184176730Sjeff if (id != CPUSET_INVALID) 185176730Sjeff free_unr(cpuset_unr, id); 186176730Sjeff} 187176730Sjeff 188176730Sjeff/* 189176730Sjeff * Deferred release must be used when in a context that is not safe to 190176730Sjeff * allocate/free. This places any unreferenced sets on the list 'head'. 191176730Sjeff */ 192176730Sjeffstatic void 193176730Sjeffcpuset_rel_defer(struct setlist *head, struct cpuset *set) 194176730Sjeff{ 195176730Sjeff 196176730Sjeff if (refcount_release(&set->cs_ref) == 0) 197176730Sjeff return; 198176730Sjeff mtx_lock_spin(&cpuset_lock); 199176730Sjeff LIST_REMOVE(set, cs_siblings); 200176730Sjeff if (set->cs_id != CPUSET_INVALID) 201176730Sjeff LIST_REMOVE(set, cs_link); 202176730Sjeff LIST_INSERT_HEAD(head, set, cs_link); 203176730Sjeff mtx_unlock_spin(&cpuset_lock); 204176730Sjeff} 205176730Sjeff 206176730Sjeff/* 207176730Sjeff * Complete a deferred release. Removes the set from the list provided to 208176730Sjeff * cpuset_rel_defer. 209176730Sjeff */ 210176730Sjeffstatic void 211176730Sjeffcpuset_rel_complete(struct cpuset *set) 212176730Sjeff{ 213176730Sjeff LIST_REMOVE(set, cs_link); 214176730Sjeff cpuset_rel(set->cs_parent); 215176730Sjeff uma_zfree(cpuset_zone, set); 216176730Sjeff} 217176730Sjeff 218176730Sjeff/* 219176730Sjeff * Find a set based on an id. Returns it with a ref. 220176730Sjeff */ 221176730Sjeffstatic struct cpuset * 222185435Sbzcpuset_lookup(cpusetid_t setid, struct thread *td) 223176730Sjeff{ 224176730Sjeff struct cpuset *set; 225176730Sjeff 226176730Sjeff if (setid == CPUSET_INVALID) 227176730Sjeff return (NULL); 228176730Sjeff mtx_lock_spin(&cpuset_lock); 229176730Sjeff LIST_FOREACH(set, &cpuset_ids, cs_link) 230176730Sjeff if (set->cs_id == setid) 231176730Sjeff break; 232176730Sjeff if (set) 233176730Sjeff cpuset_ref(set); 234176730Sjeff mtx_unlock_spin(&cpuset_lock); 235185435Sbz 236185435Sbz KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__)); 237185435Sbz if (set != NULL && jailed(td->td_ucred)) { 238192895Sjamie struct cpuset *jset, *tset; 239185435Sbz 240192895Sjamie jset = td->td_ucred->cr_prison->pr_cpuset; 241192895Sjamie for (tset = set; tset != NULL; tset = tset->cs_parent) 242192895Sjamie if (tset == jset) 243192895Sjamie break; 244192895Sjamie if (tset == NULL) { 245185435Sbz cpuset_rel(set); 246185435Sbz set = NULL; 247185435Sbz } 248185435Sbz } 249185435Sbz 250176730Sjeff return (set); 251176730Sjeff} 252176730Sjeff 253176730Sjeff/* 254176730Sjeff * Create a set in the space provided in 'set' with the provided parameters. 255176730Sjeff * The set is returned with a single ref. May return EDEADLK if the set 256176730Sjeff * will have no valid cpu based on restrictions from the parent. 257176730Sjeff */ 258176730Sjeffstatic int 259219399Sjhb_cpuset_create(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask, 260176730Sjeff cpusetid_t id) 261176730Sjeff{ 262176730Sjeff 263176811Sjeff if (!CPU_OVERLAP(&parent->cs_mask, mask)) 264176811Sjeff return (EDEADLK); 265176730Sjeff CPU_COPY(mask, &set->cs_mask); 266176730Sjeff LIST_INIT(&set->cs_children); 267176730Sjeff refcount_init(&set->cs_ref, 1); 268176730Sjeff set->cs_flags = 0; 269176730Sjeff mtx_lock_spin(&cpuset_lock); 270219399Sjhb CPU_AND(&set->cs_mask, &parent->cs_mask); 271176811Sjeff set->cs_id = id; 272176811Sjeff set->cs_parent = cpuset_ref(parent); 273176811Sjeff LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings); 274176811Sjeff if (set->cs_id != CPUSET_INVALID) 275176811Sjeff LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 276176730Sjeff mtx_unlock_spin(&cpuset_lock); 277176730Sjeff 278176811Sjeff return (0); 279176730Sjeff} 280176730Sjeff 281176730Sjeff/* 282176730Sjeff * Create a new non-anonymous set with the requested parent and mask. May 283176730Sjeff * return failures if the mask is invalid or a new number can not be 284176730Sjeff * allocated. 285176730Sjeff */ 286176730Sjeffstatic int 287219399Sjhbcpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask) 288176730Sjeff{ 289176730Sjeff struct cpuset *set; 290176730Sjeff cpusetid_t id; 291176730Sjeff int error; 292176730Sjeff 293176730Sjeff id = alloc_unr(cpuset_unr); 294176730Sjeff if (id == -1) 295176730Sjeff return (ENFILE); 296176730Sjeff *setp = set = uma_zalloc(cpuset_zone, M_WAITOK); 297176730Sjeff error = _cpuset_create(set, parent, mask, id); 298176730Sjeff if (error == 0) 299176730Sjeff return (0); 300176730Sjeff free_unr(cpuset_unr, id); 301176730Sjeff uma_zfree(cpuset_zone, set); 302176730Sjeff 303176730Sjeff return (error); 304176730Sjeff} 305176730Sjeff 306176730Sjeff/* 307176730Sjeff * Recursively check for errors that would occur from applying mask to 308176730Sjeff * the tree of sets starting at 'set'. Checks for sets that would become 309176730Sjeff * empty as well as RDONLY flags. 310176730Sjeff */ 311176730Sjeffstatic int 312251470Sjhbcpuset_testupdate(struct cpuset *set, cpuset_t *mask, int check_mask) 313176730Sjeff{ 314176730Sjeff struct cpuset *nset; 315176730Sjeff cpuset_t newmask; 316176730Sjeff int error; 317176730Sjeff 318176730Sjeff mtx_assert(&cpuset_lock, MA_OWNED); 319176730Sjeff if (set->cs_flags & CPU_SET_RDONLY) 320176730Sjeff return (EPERM); 321251470Sjhb if (check_mask) { 322251470Sjhb if (!CPU_OVERLAP(&set->cs_mask, mask)) 323251470Sjhb return (EDEADLK); 324251470Sjhb CPU_COPY(&set->cs_mask, &newmask); 325251470Sjhb CPU_AND(&newmask, mask); 326251470Sjhb } else 327251470Sjhb CPU_COPY(mask, &newmask); 328176811Sjeff error = 0; 329176730Sjeff LIST_FOREACH(nset, &set->cs_children, cs_siblings) 330251470Sjhb if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0) 331176730Sjeff break; 332176730Sjeff return (error); 333176730Sjeff} 334176730Sjeff 335176730Sjeff/* 336176730Sjeff * Applies the mask 'mask' without checking for empty sets or permissions. 337176730Sjeff */ 338176730Sjeffstatic void 339176730Sjeffcpuset_update(struct cpuset *set, cpuset_t *mask) 340176730Sjeff{ 341176730Sjeff struct cpuset *nset; 342176730Sjeff 343176730Sjeff mtx_assert(&cpuset_lock, MA_OWNED); 344176730Sjeff CPU_AND(&set->cs_mask, mask); 345176730Sjeff LIST_FOREACH(nset, &set->cs_children, cs_siblings) 346176730Sjeff cpuset_update(nset, &set->cs_mask); 347176730Sjeff 348176730Sjeff return; 349176730Sjeff} 350176730Sjeff 351176730Sjeff/* 352176730Sjeff * Modify the set 'set' to use a copy of the mask provided. Apply this new 353176730Sjeff * mask to restrict all children in the tree. Checks for validity before 354176730Sjeff * applying the changes. 355176730Sjeff */ 356176730Sjeffstatic int 357176730Sjeffcpuset_modify(struct cpuset *set, cpuset_t *mask) 358176730Sjeff{ 359176811Sjeff struct cpuset *root; 360176730Sjeff int error; 361176730Sjeff 362180098Sbz error = priv_check(curthread, PRIV_SCHED_CPUSET); 363176730Sjeff if (error) 364176730Sjeff return (error); 365176811Sjeff /* 366191639Sbz * In case we are called from within the jail 367191639Sbz * we do not allow modifying the dedicated root 368191639Sbz * cpuset of the jail but may still allow to 369191639Sbz * change child sets. 370191639Sbz */ 371191639Sbz if (jailed(curthread->td_ucred) && 372191639Sbz set->cs_flags & CPU_SET_ROOT) 373191639Sbz return (EPERM); 374191639Sbz /* 375176811Sjeff * Verify that we have access to this set of 376176811Sjeff * cpus. 377176811Sjeff */ 378176811Sjeff root = set->cs_parent; 379176811Sjeff if (root && !CPU_SUBSET(&root->cs_mask, mask)) 380176811Sjeff return (EINVAL); 381176730Sjeff mtx_lock_spin(&cpuset_lock); 382251470Sjhb error = cpuset_testupdate(set, mask, 0); 383176730Sjeff if (error) 384176730Sjeff goto out; 385251470Sjhb CPU_COPY(mask, &set->cs_mask); 386176730Sjeff cpuset_update(set, mask); 387176730Sjeffout: 388176730Sjeff mtx_unlock_spin(&cpuset_lock); 389176730Sjeff 390176730Sjeff return (error); 391176730Sjeff} 392176730Sjeff 393176730Sjeff/* 394176730Sjeff * Resolve the 'which' parameter of several cpuset apis. 395176730Sjeff * 396176730Sjeff * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also 397176730Sjeff * checks for permission via p_cansched(). 398176730Sjeff * 399176730Sjeff * For WHICH_SET returns a valid set with a new reference. 400176730Sjeff * 401176730Sjeff * -1 may be supplied for any argument to mean the current proc/thread or 402176730Sjeff * the base set of the current thread. May fail with ESRCH/EPERM. 403176730Sjeff */ 404284866Sadrianint 405176730Sjeffcpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, 406176730Sjeff struct cpuset **setp) 407176730Sjeff{ 408176730Sjeff struct cpuset *set; 409176730Sjeff struct thread *td; 410176730Sjeff struct proc *p; 411176730Sjeff int error; 412176730Sjeff 413176730Sjeff *pp = p = NULL; 414176730Sjeff *tdp = td = NULL; 415176730Sjeff *setp = set = NULL; 416176730Sjeff switch (which) { 417176730Sjeff case CPU_WHICH_PID: 418176730Sjeff if (id == -1) { 419176730Sjeff PROC_LOCK(curproc); 420176730Sjeff p = curproc; 421176730Sjeff break; 422176730Sjeff } 423176730Sjeff if ((p = pfind(id)) == NULL) 424176730Sjeff return (ESRCH); 425176730Sjeff break; 426176730Sjeff case CPU_WHICH_TID: 427176730Sjeff if (id == -1) { 428176730Sjeff PROC_LOCK(curproc); 429176730Sjeff p = curproc; 430176730Sjeff td = curthread; 431176730Sjeff break; 432176730Sjeff } 433214337Sdavidxu td = tdfind(id, -1); 434176730Sjeff if (td == NULL) 435176730Sjeff return (ESRCH); 436214337Sdavidxu p = td->td_proc; 437176730Sjeff break; 438176730Sjeff case CPU_WHICH_CPUSET: 439176730Sjeff if (id == -1) { 440176730Sjeff thread_lock(curthread); 441177738Sjeff set = cpuset_refbase(curthread->td_cpuset); 442176730Sjeff thread_unlock(curthread); 443176730Sjeff } else 444185435Sbz set = cpuset_lookup(id, curthread); 445176730Sjeff if (set) { 446176730Sjeff *setp = set; 447176730Sjeff return (0); 448176730Sjeff } 449176730Sjeff return (ESRCH); 450185435Sbz case CPU_WHICH_JAIL: 451185435Sbz { 452185435Sbz /* Find `set' for prison with given id. */ 453185435Sbz struct prison *pr; 454185435Sbz 455185435Sbz sx_slock(&allprison_lock); 456192895Sjamie pr = prison_find_child(curthread->td_ucred->cr_prison, id); 457185435Sbz sx_sunlock(&allprison_lock); 458185435Sbz if (pr == NULL) 459185435Sbz return (ESRCH); 460192895Sjamie cpuset_ref(pr->pr_cpuset); 461192895Sjamie *setp = pr->pr_cpuset; 462185435Sbz mtx_unlock(&pr->pr_mtx); 463192895Sjamie return (0); 464185435Sbz } 465178092Sjeff case CPU_WHICH_IRQ: 466276829Sjhb case CPU_WHICH_DOMAIN: 467178092Sjeff return (0); 468176730Sjeff default: 469176730Sjeff return (EINVAL); 470176730Sjeff } 471176730Sjeff error = p_cansched(curthread, p); 472176730Sjeff if (error) { 473176730Sjeff PROC_UNLOCK(p); 474176730Sjeff return (error); 475176730Sjeff } 476176730Sjeff if (td == NULL) 477176730Sjeff td = FIRST_THREAD_IN_PROC(p); 478176730Sjeff *pp = p; 479176730Sjeff *tdp = td; 480176730Sjeff return (0); 481176730Sjeff} 482176730Sjeff 483176730Sjeff/* 484176730Sjeff * Create an anonymous set with the provided mask in the space provided by 485176730Sjeff * 'fset'. If the passed in set is anonymous we use its parent otherwise 486176730Sjeff * the new set is a child of 'set'. 487176730Sjeff */ 488176730Sjeffstatic int 489219399Sjhbcpuset_shadow(struct cpuset *set, struct cpuset *fset, const cpuset_t *mask) 490176730Sjeff{ 491176730Sjeff struct cpuset *parent; 492176730Sjeff 493176730Sjeff if (set->cs_id == CPUSET_INVALID) 494176730Sjeff parent = set->cs_parent; 495176730Sjeff else 496176730Sjeff parent = set; 497176811Sjeff if (!CPU_SUBSET(&parent->cs_mask, mask)) 498177738Sjeff return (EDEADLK); 499176730Sjeff return (_cpuset_create(fset, parent, mask, CPUSET_INVALID)); 500176730Sjeff} 501176730Sjeff 502176730Sjeff/* 503176730Sjeff * Handle two cases for replacing the base set or mask of an entire process. 504176730Sjeff * 505176730Sjeff * 1) Set is non-null and mask is null. This reparents all anonymous sets 506176730Sjeff * to the provided set and replaces all non-anonymous td_cpusets with the 507176730Sjeff * provided set. 508176730Sjeff * 2) Mask is non-null and set is null. This replaces or creates anonymous 509176730Sjeff * sets for every thread with the existing base as a parent. 510176730Sjeff * 511176730Sjeff * This is overly complicated because we can't allocate while holding a 512176730Sjeff * spinlock and spinlocks must be held while changing and examining thread 513176730Sjeff * state. 514176730Sjeff */ 515176730Sjeffstatic int 516176730Sjeffcpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask) 517176730Sjeff{ 518176730Sjeff struct setlist freelist; 519176730Sjeff struct setlist droplist; 520176811Sjeff struct cpuset *tdset; 521176730Sjeff struct cpuset *nset; 522176730Sjeff struct thread *td; 523176730Sjeff struct proc *p; 524176730Sjeff int threads; 525176730Sjeff int nfree; 526176730Sjeff int error; 527319819Sallanjude 528176730Sjeff /* 529176730Sjeff * The algorithm requires two passes due to locking considerations. 530176730Sjeff * 531176730Sjeff * 1) Lookup the process and acquire the locks in the required order. 532176730Sjeff * 2) If enough cpusets have not been allocated release the locks and 533176730Sjeff * allocate them. Loop. 534176730Sjeff */ 535176730Sjeff LIST_INIT(&freelist); 536176730Sjeff LIST_INIT(&droplist); 537176730Sjeff nfree = 0; 538176730Sjeff for (;;) { 539176730Sjeff error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset); 540176730Sjeff if (error) 541176730Sjeff goto out; 542176730Sjeff if (nfree >= p->p_numthreads) 543176730Sjeff break; 544176730Sjeff threads = p->p_numthreads; 545176730Sjeff PROC_UNLOCK(p); 546176730Sjeff for (; nfree < threads; nfree++) { 547176730Sjeff nset = uma_zalloc(cpuset_zone, M_WAITOK); 548176730Sjeff LIST_INSERT_HEAD(&freelist, nset, cs_link); 549176730Sjeff } 550176730Sjeff } 551176730Sjeff PROC_LOCK_ASSERT(p, MA_OWNED); 552176730Sjeff /* 553176730Sjeff * Now that the appropriate locks are held and we have enough cpusets, 554176811Sjeff * make sure the operation will succeed before applying changes. The 555176811Sjeff * proc lock prevents td_cpuset from changing between calls. 556176811Sjeff */ 557176811Sjeff error = 0; 558176811Sjeff FOREACH_THREAD_IN_PROC(p, td) { 559176811Sjeff thread_lock(td); 560176811Sjeff tdset = td->td_cpuset; 561176811Sjeff /* 562176811Sjeff * Verify that a new mask doesn't specify cpus outside of 563176811Sjeff * the set the thread is a member of. 564176811Sjeff */ 565176811Sjeff if (mask) { 566176811Sjeff if (tdset->cs_id == CPUSET_INVALID) 567176811Sjeff tdset = tdset->cs_parent; 568176811Sjeff if (!CPU_SUBSET(&tdset->cs_mask, mask)) 569177738Sjeff error = EDEADLK; 570176811Sjeff /* 571176811Sjeff * Verify that a new set won't leave an existing thread 572176811Sjeff * mask without a cpu to run on. It can, however, restrict 573176811Sjeff * the set. 574176811Sjeff */ 575176811Sjeff } else if (tdset->cs_id == CPUSET_INVALID) { 576176811Sjeff if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask)) 577177738Sjeff error = EDEADLK; 578176811Sjeff } 579176811Sjeff thread_unlock(td); 580176811Sjeff if (error) 581176811Sjeff goto unlock_out; 582176811Sjeff } 583176811Sjeff /* 584176811Sjeff * Replace each thread's cpuset while using deferred release. We 585177368Sjeff * must do this because the thread lock must be held while operating 586177368Sjeff * on the thread and this limits the type of operations allowed. 587176730Sjeff */ 588176730Sjeff FOREACH_THREAD_IN_PROC(p, td) { 589176730Sjeff thread_lock(td); 590176730Sjeff /* 591176730Sjeff * If we presently have an anonymous set or are applying a 592176730Sjeff * mask we must create an anonymous shadow set. That is 593176730Sjeff * either parented to our existing base or the supplied set. 594176730Sjeff * 595176730Sjeff * If we have a base set with no anonymous shadow we simply 596176730Sjeff * replace it outright. 597176730Sjeff */ 598176730Sjeff tdset = td->td_cpuset; 599176730Sjeff if (tdset->cs_id == CPUSET_INVALID || mask) { 600176730Sjeff nset = LIST_FIRST(&freelist); 601176730Sjeff LIST_REMOVE(nset, cs_link); 602176730Sjeff if (mask) 603176730Sjeff error = cpuset_shadow(tdset, nset, mask); 604176730Sjeff else 605176730Sjeff error = _cpuset_create(nset, set, 606176730Sjeff &tdset->cs_mask, CPUSET_INVALID); 607176730Sjeff if (error) { 608176730Sjeff LIST_INSERT_HEAD(&freelist, nset, cs_link); 609176730Sjeff thread_unlock(td); 610176730Sjeff break; 611176730Sjeff } 612176730Sjeff } else 613176730Sjeff nset = cpuset_ref(set); 614176730Sjeff cpuset_rel_defer(&droplist, tdset); 615176730Sjeff td->td_cpuset = nset; 616176730Sjeff sched_affinity(td); 617176730Sjeff thread_unlock(td); 618176730Sjeff } 619176811Sjeffunlock_out: 620176730Sjeff PROC_UNLOCK(p); 621176730Sjeffout: 622176730Sjeff while ((nset = LIST_FIRST(&droplist)) != NULL) 623176730Sjeff cpuset_rel_complete(nset); 624176730Sjeff while ((nset = LIST_FIRST(&freelist)) != NULL) { 625176730Sjeff LIST_REMOVE(nset, cs_link); 626176730Sjeff uma_zfree(cpuset_zone, nset); 627176730Sjeff } 628176730Sjeff return (error); 629176730Sjeff} 630176730Sjeff 631176730Sjeff/* 632222813Sattilio * Return a string representing a valid layout for a cpuset_t object. 633222813Sattilio * It expects an incoming buffer at least sized as CPUSETBUFSIZ. 634222813Sattilio */ 635222813Sattiliochar * 636222813Sattiliocpusetobj_strprint(char *buf, const cpuset_t *set) 637222813Sattilio{ 638222813Sattilio char *tbuf; 639222813Sattilio size_t i, bytesp, bufsiz; 640222813Sattilio 641222813Sattilio tbuf = buf; 642222813Sattilio bytesp = 0; 643222813Sattilio bufsiz = CPUSETBUFSIZ; 644222813Sattilio 645239923Sattilio for (i = 0; i < (_NCPUWORDS - 1); i++) { 646239923Sattilio bytesp = snprintf(tbuf, bufsiz, "%lx,", set->__bits[i]); 647222813Sattilio bufsiz -= bytesp; 648222813Sattilio tbuf += bytesp; 649222813Sattilio } 650239923Sattilio snprintf(tbuf, bufsiz, "%lx", set->__bits[_NCPUWORDS - 1]); 651222813Sattilio return (buf); 652222813Sattilio} 653222813Sattilio 654222813Sattilio/* 655222813Sattilio * Build a valid cpuset_t object from a string representation. 656222813Sattilio * It expects an incoming buffer at least sized as CPUSETBUFSIZ. 657222813Sattilio */ 658222813Sattilioint 659222813Sattiliocpusetobj_strscan(cpuset_t *set, const char *buf) 660222813Sattilio{ 661222813Sattilio u_int nwords; 662222813Sattilio int i, ret; 663222813Sattilio 664222813Sattilio if (strlen(buf) > CPUSETBUFSIZ - 1) 665222813Sattilio return (-1); 666222813Sattilio 667222813Sattilio /* Allow to pass a shorter version of the mask when necessary. */ 668222813Sattilio nwords = 1; 669222813Sattilio for (i = 0; buf[i] != '\0'; i++) 670222813Sattilio if (buf[i] == ',') 671222813Sattilio nwords++; 672222813Sattilio if (nwords > _NCPUWORDS) 673222813Sattilio return (-1); 674222813Sattilio 675222813Sattilio CPU_ZERO(set); 676239923Sattilio for (i = 0; i < (nwords - 1); i++) { 677239923Sattilio ret = sscanf(buf, "%lx,", &set->__bits[i]); 678222813Sattilio if (ret == 0 || ret == -1) 679222813Sattilio return (-1); 680239923Sattilio buf = strstr(buf, ","); 681222813Sattilio if (buf == NULL) 682222813Sattilio return (-1); 683222813Sattilio buf++; 684222813Sattilio } 685239923Sattilio ret = sscanf(buf, "%lx", &set->__bits[nwords - 1]); 686222813Sattilio if (ret == 0 || ret == -1) 687222813Sattilio return (-1); 688222813Sattilio return (0); 689222813Sattilio} 690222813Sattilio 691222813Sattilio/* 692176730Sjeff * Apply an anonymous mask to a single thread. 693176730Sjeff */ 694177738Sjeffint 695176730Sjeffcpuset_setthread(lwpid_t id, cpuset_t *mask) 696176730Sjeff{ 697176730Sjeff struct cpuset *nset; 698176730Sjeff struct cpuset *set; 699176730Sjeff struct thread *td; 700176730Sjeff struct proc *p; 701176730Sjeff int error; 702176730Sjeff 703176730Sjeff nset = uma_zalloc(cpuset_zone, M_WAITOK); 704176821Sjeff error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); 705176730Sjeff if (error) 706176730Sjeff goto out; 707177738Sjeff set = NULL; 708176730Sjeff thread_lock(td); 709177738Sjeff error = cpuset_shadow(td->td_cpuset, nset, mask); 710176730Sjeff if (error == 0) { 711177738Sjeff set = td->td_cpuset; 712176730Sjeff td->td_cpuset = nset; 713176730Sjeff sched_affinity(td); 714176730Sjeff nset = NULL; 715176730Sjeff } 716176730Sjeff thread_unlock(td); 717176730Sjeff PROC_UNLOCK(p); 718177738Sjeff if (set) 719177738Sjeff cpuset_rel(set); 720176730Sjeffout: 721176730Sjeff if (nset) 722176730Sjeff uma_zfree(cpuset_zone, nset); 723176730Sjeff return (error); 724176730Sjeff} 725176730Sjeff 726176730Sjeff/* 727267716Smelifaro * Apply new cpumask to the ithread. 728267716Smelifaro */ 729267716Smelifaroint 730271658Sadriancpuset_setithread(lwpid_t id, int cpu) 731267716Smelifaro{ 732267716Smelifaro struct cpuset *nset, *rset; 733267716Smelifaro struct cpuset *parent, *old_set; 734267716Smelifaro struct thread *td; 735267716Smelifaro struct proc *p; 736267716Smelifaro cpusetid_t cs_id; 737267716Smelifaro cpuset_t mask; 738267716Smelifaro int error; 739267716Smelifaro 740267716Smelifaro nset = uma_zalloc(cpuset_zone, M_WAITOK); 741267716Smelifaro rset = uma_zalloc(cpuset_zone, M_WAITOK); 742271509Smelifaro cs_id = CPUSET_INVALID; 743267716Smelifaro 744267716Smelifaro CPU_ZERO(&mask); 745267716Smelifaro if (cpu == NOCPU) 746267716Smelifaro CPU_COPY(cpuset_root, &mask); 747267716Smelifaro else 748267716Smelifaro CPU_SET(cpu, &mask); 749267716Smelifaro 750267716Smelifaro error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &old_set); 751271509Smelifaro if (error != 0 || ((cs_id = alloc_unr(cpuset_unr)) == CPUSET_INVALID)) 752267716Smelifaro goto out; 753267716Smelifaro 754271509Smelifaro /* cpuset_which() returns with PROC_LOCK held. */ 755267716Smelifaro old_set = td->td_cpuset; 756267716Smelifaro 757267716Smelifaro if (cpu == NOCPU) { 758271509Smelifaro 759267716Smelifaro /* 760267716Smelifaro * roll back to default set. We're not using cpuset_shadow() 761267716Smelifaro * here because we can fail CPU_SUBSET() check. This can happen 762267716Smelifaro * if default set does not contain all CPUs. 763267716Smelifaro */ 764267716Smelifaro error = _cpuset_create(nset, cpuset_default, &mask, 765267716Smelifaro CPUSET_INVALID); 766267716Smelifaro 767267716Smelifaro goto applyset; 768267716Smelifaro } 769267716Smelifaro 770267716Smelifaro if (old_set->cs_id == 1 || (old_set->cs_id == CPUSET_INVALID && 771267716Smelifaro old_set->cs_parent->cs_id == 1)) { 772271509Smelifaro 773271509Smelifaro /* 774271509Smelifaro * Current set is either default (1) or 775271509Smelifaro * shadowed version of default set. 776271509Smelifaro * 777271509Smelifaro * Allocate new root set to be able to shadow it 778271509Smelifaro * with any mask. 779271509Smelifaro */ 780267716Smelifaro error = _cpuset_create(rset, cpuset_zero, 781267716Smelifaro &cpuset_zero->cs_mask, cs_id); 782267716Smelifaro if (error != 0) { 783267716Smelifaro PROC_UNLOCK(p); 784267716Smelifaro goto out; 785267716Smelifaro } 786267716Smelifaro rset->cs_flags |= CPU_SET_ROOT; 787267716Smelifaro parent = rset; 788267716Smelifaro rset = NULL; 789267716Smelifaro cs_id = CPUSET_INVALID; 790267716Smelifaro } else { 791267716Smelifaro /* Assume existing set was already allocated by previous call */ 792271509Smelifaro parent = old_set; 793267716Smelifaro old_set = NULL; 794267716Smelifaro } 795267716Smelifaro 796267716Smelifaro error = cpuset_shadow(parent, nset, &mask); 797267716Smelifaroapplyset: 798267716Smelifaro if (error == 0) { 799271509Smelifaro thread_lock(td); 800267716Smelifaro td->td_cpuset = nset; 801267716Smelifaro sched_affinity(td); 802271509Smelifaro thread_unlock(td); 803267716Smelifaro nset = NULL; 804271509Smelifaro } else 805271509Smelifaro old_set = NULL; 806267716Smelifaro PROC_UNLOCK(p); 807267716Smelifaro if (old_set != NULL) 808267716Smelifaro cpuset_rel(old_set); 809267716Smelifaroout: 810267716Smelifaro if (nset != NULL) 811267716Smelifaro uma_zfree(cpuset_zone, nset); 812267716Smelifaro if (rset != NULL) 813267716Smelifaro uma_zfree(cpuset_zone, rset); 814267716Smelifaro if (cs_id != CPUSET_INVALID) 815267716Smelifaro free_unr(cpuset_unr, cs_id); 816267716Smelifaro return (error); 817267716Smelifaro} 818267716Smelifaro 819267716Smelifaro 820267716Smelifaro/* 821276829Sjhb * Creates system-wide cpusets and the cpuset for thread0 including two 822276829Sjhb * sets: 823176730Sjeff * 824176730Sjeff * 0 - The root set which should represent all valid processors in the 825176730Sjeff * system. It is initially created with a mask of all processors 826176730Sjeff * because we don't know what processors are valid until cpuset_init() 827176730Sjeff * runs. This set is immutable. 828176730Sjeff * 1 - The default set which all processes are a member of until changed. 829176730Sjeff * This allows an administrator to move all threads off of given cpus to 830176730Sjeff * dedicate them to high priority tasks or save power etc. 831176730Sjeff */ 832176730Sjeffstruct cpuset * 833176730Sjeffcpuset_thread0(void) 834176730Sjeff{ 835176730Sjeff struct cpuset *set; 836297748Sjhb int error, i; 837176730Sjeff 838176730Sjeff cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, 839176730Sjeff NULL, NULL, UMA_ALIGN_PTR, 0); 840176730Sjeff mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); 841267716Smelifaro 842176730Sjeff /* 843176730Sjeff * Create the root system set for the whole machine. Doesn't use 844176730Sjeff * cpuset_create() due to NULL parent. 845176730Sjeff */ 846176730Sjeff set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 847222201Sattilio CPU_FILL(&set->cs_mask); 848176730Sjeff LIST_INIT(&set->cs_children); 849176730Sjeff LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 850176730Sjeff set->cs_ref = 1; 851176730Sjeff set->cs_flags = CPU_SET_ROOT; 852176730Sjeff cpuset_zero = set; 853177738Sjeff cpuset_root = &set->cs_mask; 854267716Smelifaro 855176730Sjeff /* 856176730Sjeff * Now derive a default, modifiable set from that to give out. 857176730Sjeff */ 858176730Sjeff set = uma_zalloc(cpuset_zone, M_WAITOK); 859176730Sjeff error = _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask, 1); 860176730Sjeff KASSERT(error == 0, ("Error creating default set: %d\n", error)); 861267716Smelifaro cpuset_default = set; 862267716Smelifaro 863176730Sjeff /* 864176730Sjeff * Initialize the unit allocator. 0 and 1 are allocated above. 865176730Sjeff */ 866176730Sjeff cpuset_unr = new_unrhdr(2, INT_MAX, NULL); 867176730Sjeff 868297748Sjhb /* 869297748Sjhb * If MD code has not initialized per-domain cpusets, place all 870297748Sjhb * CPUs in domain 0. 871297748Sjhb */ 872297748Sjhb for (i = 0; i < MAXMEMDOM; i++) 873297748Sjhb if (!CPU_EMPTY(&cpuset_domain[i])) 874297748Sjhb goto domains_set; 875297748Sjhb CPU_COPY(&all_cpus, &cpuset_domain[0]); 876297748Sjhbdomains_set: 877276829Sjhb 878176730Sjeff return (set); 879176730Sjeff} 880176730Sjeff 881176730Sjeff/* 882185435Sbz * Create a cpuset, which would be cpuset_create() but 883185435Sbz * mark the new 'set' as root. 884185435Sbz * 885191403Sbz * We are not going to reparent the td to it. Use cpuset_setproc_update_set() 886191403Sbz * for that. 887185435Sbz * 888185435Sbz * In case of no error, returns the set in *setp locked with a reference. 889185435Sbz */ 890185435Sbzint 891192895Sjamiecpuset_create_root(struct prison *pr, struct cpuset **setp) 892185435Sbz{ 893185435Sbz struct cpuset *set; 894185435Sbz int error; 895185435Sbz 896192895Sjamie KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__)); 897185435Sbz KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__)); 898185435Sbz 899192895Sjamie error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask); 900185435Sbz if (error) 901185435Sbz return (error); 902185435Sbz 903185435Sbz KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data", 904185435Sbz __func__, __LINE__)); 905185435Sbz 906185435Sbz /* Mark the set as root. */ 907185435Sbz set = *setp; 908185435Sbz set->cs_flags |= CPU_SET_ROOT; 909185435Sbz 910185435Sbz return (0); 911185435Sbz} 912185435Sbz 913185435Sbzint 914185435Sbzcpuset_setproc_update_set(struct proc *p, struct cpuset *set) 915185435Sbz{ 916185435Sbz int error; 917185435Sbz 918185435Sbz KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__)); 919185435Sbz KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__)); 920185435Sbz 921185435Sbz cpuset_ref(set); 922185435Sbz error = cpuset_setproc(p->p_pid, set, NULL); 923185435Sbz if (error) 924185435Sbz return (error); 925185435Sbz cpuset_rel(set); 926185435Sbz return (0); 927185435Sbz} 928185435Sbz 929185435Sbz/* 930176730Sjeff * This is called once the final set of system cpus is known. Modifies 931198493Sjhb * the root set and all children and mark the root read-only. 932176730Sjeff */ 933176730Sjeffstatic void 934176730Sjeffcpuset_init(void *arg) 935176730Sjeff{ 936176730Sjeff cpuset_t mask; 937176730Sjeff 938222813Sattilio mask = all_cpus; 939176730Sjeff if (cpuset_modify(cpuset_zero, &mask)) 940176730Sjeff panic("Can't set initial cpuset mask.\n"); 941176730Sjeff cpuset_zero->cs_flags |= CPU_SET_RDONLY; 942176730Sjeff} 943176730SjeffSYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL); 944176730Sjeff 945176730Sjeff#ifndef _SYS_SYSPROTO_H_ 946176730Sjeffstruct cpuset_args { 947176730Sjeff cpusetid_t *setid; 948176730Sjeff}; 949176730Sjeff#endif 950176730Sjeffint 951225617Skmacysys_cpuset(struct thread *td, struct cpuset_args *uap) 952176730Sjeff{ 953176730Sjeff struct cpuset *root; 954176730Sjeff struct cpuset *set; 955176730Sjeff int error; 956176730Sjeff 957176730Sjeff thread_lock(td); 958177738Sjeff root = cpuset_refroot(td->td_cpuset); 959176730Sjeff thread_unlock(td); 960176730Sjeff error = cpuset_create(&set, root, &root->cs_mask); 961176730Sjeff cpuset_rel(root); 962176730Sjeff if (error) 963176730Sjeff return (error); 964177738Sjeff error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id)); 965176730Sjeff if (error == 0) 966177738Sjeff error = cpuset_setproc(-1, set, NULL); 967176730Sjeff cpuset_rel(set); 968176730Sjeff return (error); 969176730Sjeff} 970176730Sjeff 971176730Sjeff#ifndef _SYS_SYSPROTO_H_ 972176730Sjeffstruct cpuset_setid_args { 973176730Sjeff cpuwhich_t which; 974176730Sjeff id_t id; 975176730Sjeff cpusetid_t setid; 976176730Sjeff}; 977176730Sjeff#endif 978176730Sjeffint 979225617Skmacysys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap) 980176730Sjeff{ 981315554Strasz 982315554Strasz return (kern_cpuset_setid(td, uap->which, uap->id, uap->setid)); 983315554Strasz} 984315554Strasz 985315554Straszint 986315554Straszkern_cpuset_setid(struct thread *td, cpuwhich_t which, 987315554Strasz id_t id, cpusetid_t setid) 988315554Strasz{ 989176730Sjeff struct cpuset *set; 990176730Sjeff int error; 991176730Sjeff 992176730Sjeff /* 993176730Sjeff * Presently we only support per-process sets. 994176730Sjeff */ 995315554Strasz if (which != CPU_WHICH_PID) 996176730Sjeff return (EINVAL); 997315554Strasz set = cpuset_lookup(setid, td); 998176730Sjeff if (set == NULL) 999176730Sjeff return (ESRCH); 1000315554Strasz error = cpuset_setproc(id, set, NULL); 1001176730Sjeff cpuset_rel(set); 1002176730Sjeff return (error); 1003176730Sjeff} 1004176730Sjeff 1005176730Sjeff#ifndef _SYS_SYSPROTO_H_ 1006176730Sjeffstruct cpuset_getid_args { 1007176730Sjeff cpulevel_t level; 1008176730Sjeff cpuwhich_t which; 1009176730Sjeff id_t id; 1010176730Sjeff cpusetid_t *setid; 1011228275Skevlo}; 1012176730Sjeff#endif 1013176730Sjeffint 1014225617Skmacysys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap) 1015176730Sjeff{ 1016315554Strasz 1017315554Strasz return (kern_cpuset_getid(td, uap->level, uap->which, uap->id, 1018315554Strasz uap->setid)); 1019315554Strasz} 1020315554Strasz 1021315554Straszint 1022315554Straszkern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, 1023315554Strasz id_t id, cpusetid_t *setid) 1024315554Strasz{ 1025176730Sjeff struct cpuset *nset; 1026176730Sjeff struct cpuset *set; 1027176730Sjeff struct thread *ttd; 1028176730Sjeff struct proc *p; 1029315554Strasz cpusetid_t tmpid; 1030176730Sjeff int error; 1031176730Sjeff 1032315554Strasz if (level == CPU_LEVEL_WHICH && which != CPU_WHICH_CPUSET) 1033176730Sjeff return (EINVAL); 1034315554Strasz error = cpuset_which(which, id, &p, &ttd, &set); 1035176730Sjeff if (error) 1036176730Sjeff return (error); 1037315554Strasz switch (which) { 1038176730Sjeff case CPU_WHICH_TID: 1039176730Sjeff case CPU_WHICH_PID: 1040176730Sjeff thread_lock(ttd); 1041177738Sjeff set = cpuset_refbase(ttd->td_cpuset); 1042176730Sjeff thread_unlock(ttd); 1043176730Sjeff PROC_UNLOCK(p); 1044176730Sjeff break; 1045176730Sjeff case CPU_WHICH_CPUSET: 1046185435Sbz case CPU_WHICH_JAIL: 1047176730Sjeff break; 1048178092Sjeff case CPU_WHICH_IRQ: 1049276829Sjhb case CPU_WHICH_DOMAIN: 1050178092Sjeff return (EINVAL); 1051176730Sjeff } 1052315554Strasz switch (level) { 1053176730Sjeff case CPU_LEVEL_ROOT: 1054177738Sjeff nset = cpuset_refroot(set); 1055176730Sjeff cpuset_rel(set); 1056176730Sjeff set = nset; 1057176730Sjeff break; 1058176730Sjeff case CPU_LEVEL_CPUSET: 1059176730Sjeff break; 1060176730Sjeff case CPU_LEVEL_WHICH: 1061176730Sjeff break; 1062176730Sjeff } 1063315554Strasz tmpid = set->cs_id; 1064176730Sjeff cpuset_rel(set); 1065176730Sjeff if (error == 0) 1066322876Sjkim error = copyout(&tmpid, setid, sizeof(tmpid)); 1067176730Sjeff 1068176730Sjeff return (error); 1069176730Sjeff} 1070176730Sjeff 1071176730Sjeff#ifndef _SYS_SYSPROTO_H_ 1072176730Sjeffstruct cpuset_getaffinity_args { 1073177597Sru cpulevel_t level; 1074177597Sru cpuwhich_t which; 1075177597Sru id_t id; 1076177597Sru size_t cpusetsize; 1077177597Sru cpuset_t *mask; 1078176730Sjeff}; 1079176730Sjeff#endif 1080176730Sjeffint 1081225617Skmacysys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap) 1082176730Sjeff{ 1083315555Strasz 1084315555Strasz return (kern_cpuset_getaffinity(td, uap->level, uap->which, 1085315555Strasz uap->id, uap->cpusetsize, uap->mask)); 1086315555Strasz} 1087315555Strasz 1088315555Straszint 1089315555Straszkern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, 1090315555Strasz id_t id, size_t cpusetsize, cpuset_t *maskp) 1091315555Strasz{ 1092176730Sjeff struct thread *ttd; 1093176730Sjeff struct cpuset *nset; 1094176730Sjeff struct cpuset *set; 1095176730Sjeff struct proc *p; 1096176730Sjeff cpuset_t *mask; 1097176730Sjeff int error; 1098177597Sru size_t size; 1099176730Sjeff 1100315555Strasz if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) 1101176730Sjeff return (ERANGE); 1102319819Sallanjude /* In Capability mode, you can only get your own CPU set. */ 1103319819Sallanjude if (IN_CAPABILITY_MODE(td)) { 1104319819Sallanjude if (level != CPU_LEVEL_WHICH) 1105319819Sallanjude return (ECAPMODE); 1106319819Sallanjude if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1107319819Sallanjude return (ECAPMODE); 1108319819Sallanjude if (id != -1) 1109319819Sallanjude return (ECAPMODE); 1110319819Sallanjude } 1111315555Strasz size = cpusetsize; 1112176730Sjeff mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 1113315555Strasz error = cpuset_which(which, id, &p, &ttd, &set); 1114176730Sjeff if (error) 1115176730Sjeff goto out; 1116315555Strasz switch (level) { 1117176730Sjeff case CPU_LEVEL_ROOT: 1118176730Sjeff case CPU_LEVEL_CPUSET: 1119315555Strasz switch (which) { 1120176730Sjeff case CPU_WHICH_TID: 1121176730Sjeff case CPU_WHICH_PID: 1122176730Sjeff thread_lock(ttd); 1123176730Sjeff set = cpuset_ref(ttd->td_cpuset); 1124176730Sjeff thread_unlock(ttd); 1125176730Sjeff break; 1126176730Sjeff case CPU_WHICH_CPUSET: 1127185435Sbz case CPU_WHICH_JAIL: 1128176730Sjeff break; 1129178092Sjeff case CPU_WHICH_IRQ: 1130333338Sshurd case CPU_WHICH_INTRHANDLER: 1131333338Sshurd case CPU_WHICH_ITHREAD: 1132276829Sjhb case CPU_WHICH_DOMAIN: 1133178092Sjeff error = EINVAL; 1134178092Sjeff goto out; 1135176730Sjeff } 1136315555Strasz if (level == CPU_LEVEL_ROOT) 1137177738Sjeff nset = cpuset_refroot(set); 1138176730Sjeff else 1139177738Sjeff nset = cpuset_refbase(set); 1140176730Sjeff CPU_COPY(&nset->cs_mask, mask); 1141176730Sjeff cpuset_rel(nset); 1142176730Sjeff break; 1143176730Sjeff case CPU_LEVEL_WHICH: 1144315555Strasz switch (which) { 1145176730Sjeff case CPU_WHICH_TID: 1146176730Sjeff thread_lock(ttd); 1147176730Sjeff CPU_COPY(&ttd->td_cpuset->cs_mask, mask); 1148176730Sjeff thread_unlock(ttd); 1149176730Sjeff break; 1150176730Sjeff case CPU_WHICH_PID: 1151176730Sjeff FOREACH_THREAD_IN_PROC(p, ttd) { 1152176730Sjeff thread_lock(ttd); 1153176730Sjeff CPU_OR(mask, &ttd->td_cpuset->cs_mask); 1154176730Sjeff thread_unlock(ttd); 1155176730Sjeff } 1156176730Sjeff break; 1157176730Sjeff case CPU_WHICH_CPUSET: 1158185435Sbz case CPU_WHICH_JAIL: 1159176730Sjeff CPU_COPY(&set->cs_mask, mask); 1160176730Sjeff break; 1161178092Sjeff case CPU_WHICH_IRQ: 1162333338Sshurd case CPU_WHICH_INTRHANDLER: 1163333338Sshurd case CPU_WHICH_ITHREAD: 1164333338Sshurd error = intr_getaffinity(id, which, mask); 1165178092Sjeff break; 1166276829Sjhb case CPU_WHICH_DOMAIN: 1167315555Strasz if (id < 0 || id >= MAXMEMDOM) 1168276829Sjhb error = ESRCH; 1169276829Sjhb else 1170315555Strasz CPU_COPY(&cpuset_domain[id], mask); 1171276829Sjhb break; 1172176730Sjeff } 1173176730Sjeff break; 1174176730Sjeff default: 1175176730Sjeff error = EINVAL; 1176176730Sjeff break; 1177176730Sjeff } 1178176730Sjeff if (set) 1179176730Sjeff cpuset_rel(set); 1180176730Sjeff if (p) 1181176730Sjeff PROC_UNLOCK(p); 1182176730Sjeff if (error == 0) 1183315555Strasz error = copyout(mask, maskp, size); 1184176730Sjeffout: 1185176730Sjeff free(mask, M_TEMP); 1186176730Sjeff return (error); 1187176730Sjeff} 1188176730Sjeff 1189176730Sjeff#ifndef _SYS_SYSPROTO_H_ 1190176730Sjeffstruct cpuset_setaffinity_args { 1191176730Sjeff cpulevel_t level; 1192177597Sru cpuwhich_t which; 1193177597Sru id_t id; 1194177597Sru size_t cpusetsize; 1195177597Sru const cpuset_t *mask; 1196176730Sjeff}; 1197176730Sjeff#endif 1198176730Sjeffint 1199225617Skmacysys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap) 1200176730Sjeff{ 1201315555Strasz 1202315555Strasz return (kern_cpuset_setaffinity(td, uap->level, uap->which, 1203315555Strasz uap->id, uap->cpusetsize, uap->mask)); 1204315555Strasz} 1205315555Strasz 1206315555Straszint 1207315555Straszkern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, 1208315555Strasz id_t id, size_t cpusetsize, const cpuset_t *maskp) 1209315555Strasz{ 1210176730Sjeff struct cpuset *nset; 1211176730Sjeff struct cpuset *set; 1212176730Sjeff struct thread *ttd; 1213176730Sjeff struct proc *p; 1214176730Sjeff cpuset_t *mask; 1215176730Sjeff int error; 1216176730Sjeff 1217315555Strasz if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) 1218176730Sjeff return (ERANGE); 1219319819Sallanjude /* In Capability mode, you can only set your own CPU set. */ 1220319819Sallanjude if (IN_CAPABILITY_MODE(td)) { 1221319819Sallanjude if (level != CPU_LEVEL_WHICH) 1222319819Sallanjude return (ECAPMODE); 1223319819Sallanjude if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1224319819Sallanjude return (ECAPMODE); 1225319819Sallanjude if (id != -1) 1226319819Sallanjude return (ECAPMODE); 1227319819Sallanjude } 1228315555Strasz mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO); 1229315555Strasz error = copyin(maskp, mask, cpusetsize); 1230176730Sjeff if (error) 1231176730Sjeff goto out; 1232176811Sjeff /* 1233176811Sjeff * Verify that no high bits are set. 1234176811Sjeff */ 1235315555Strasz if (cpusetsize > sizeof(cpuset_t)) { 1236176811Sjeff char *end; 1237176811Sjeff char *cp; 1238176811Sjeff 1239176811Sjeff end = cp = (char *)&mask->__bits; 1240315555Strasz end += cpusetsize; 1241176811Sjeff cp += sizeof(cpuset_t); 1242176811Sjeff while (cp != end) 1243176811Sjeff if (*cp++ != 0) { 1244176811Sjeff error = EINVAL; 1245176811Sjeff goto out; 1246176811Sjeff } 1247176811Sjeff 1248176811Sjeff } 1249315555Strasz switch (level) { 1250176730Sjeff case CPU_LEVEL_ROOT: 1251176730Sjeff case CPU_LEVEL_CPUSET: 1252315555Strasz error = cpuset_which(which, id, &p, &ttd, &set); 1253176730Sjeff if (error) 1254176730Sjeff break; 1255315555Strasz switch (which) { 1256176730Sjeff case CPU_WHICH_TID: 1257176730Sjeff case CPU_WHICH_PID: 1258176730Sjeff thread_lock(ttd); 1259176730Sjeff set = cpuset_ref(ttd->td_cpuset); 1260176730Sjeff thread_unlock(ttd); 1261176880Sjeff PROC_UNLOCK(p); 1262176730Sjeff break; 1263176730Sjeff case CPU_WHICH_CPUSET: 1264185435Sbz case CPU_WHICH_JAIL: 1265176730Sjeff break; 1266178092Sjeff case CPU_WHICH_IRQ: 1267333338Sshurd case CPU_WHICH_INTRHANDLER: 1268333338Sshurd case CPU_WHICH_ITHREAD: 1269276829Sjhb case CPU_WHICH_DOMAIN: 1270178092Sjeff error = EINVAL; 1271178092Sjeff goto out; 1272176730Sjeff } 1273315555Strasz if (level == CPU_LEVEL_ROOT) 1274177738Sjeff nset = cpuset_refroot(set); 1275176730Sjeff else 1276177738Sjeff nset = cpuset_refbase(set); 1277176730Sjeff error = cpuset_modify(nset, mask); 1278176730Sjeff cpuset_rel(nset); 1279176730Sjeff cpuset_rel(set); 1280176730Sjeff break; 1281176730Sjeff case CPU_LEVEL_WHICH: 1282315555Strasz switch (which) { 1283176730Sjeff case CPU_WHICH_TID: 1284315555Strasz error = cpuset_setthread(id, mask); 1285176730Sjeff break; 1286176730Sjeff case CPU_WHICH_PID: 1287315555Strasz error = cpuset_setproc(id, NULL, mask); 1288176730Sjeff break; 1289176730Sjeff case CPU_WHICH_CPUSET: 1290185435Sbz case CPU_WHICH_JAIL: 1291315555Strasz error = cpuset_which(which, id, &p, &ttd, &set); 1292176730Sjeff if (error == 0) { 1293176730Sjeff error = cpuset_modify(set, mask); 1294176730Sjeff cpuset_rel(set); 1295176730Sjeff } 1296176730Sjeff break; 1297178092Sjeff case CPU_WHICH_IRQ: 1298333338Sshurd case CPU_WHICH_INTRHANDLER: 1299333338Sshurd case CPU_WHICH_ITHREAD: 1300333338Sshurd error = intr_setaffinity(id, which, mask); 1301178092Sjeff break; 1302176730Sjeff default: 1303176730Sjeff error = EINVAL; 1304176730Sjeff break; 1305176730Sjeff } 1306176730Sjeff break; 1307176730Sjeff default: 1308176730Sjeff error = EINVAL; 1309176730Sjeff break; 1310176730Sjeff } 1311176730Sjeffout: 1312176730Sjeff free(mask, M_TEMP); 1313176730Sjeff return (error); 1314176730Sjeff} 1315180358Sbz 1316180358Sbz#ifdef DDB 1317252209Sjhbvoid 1318252209Sjhbddb_display_cpuset(const cpuset_t *set) 1319252209Sjhb{ 1320252209Sjhb int cpu, once; 1321252209Sjhb 1322252209Sjhb for (once = 0, cpu = 0; cpu < CPU_SETSIZE; cpu++) { 1323252209Sjhb if (CPU_ISSET(cpu, set)) { 1324252209Sjhb if (once == 0) { 1325252209Sjhb db_printf("%d", cpu); 1326252209Sjhb once = 1; 1327252209Sjhb } else 1328252209Sjhb db_printf(",%d", cpu); 1329252209Sjhb } 1330252209Sjhb } 1331252209Sjhb if (once == 0) 1332252209Sjhb db_printf("<none>"); 1333252209Sjhb} 1334252209Sjhb 1335180358SbzDB_SHOW_COMMAND(cpusets, db_show_cpusets) 1336180358Sbz{ 1337180358Sbz struct cpuset *set; 1338180358Sbz 1339180358Sbz LIST_FOREACH(set, &cpuset_ids, cs_link) { 1340180358Sbz db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n", 1341180358Sbz set, set->cs_id, set->cs_ref, set->cs_flags, 1342180358Sbz (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0); 1343180358Sbz db_printf(" mask="); 1344252209Sjhb ddb_display_cpuset(&set->cs_mask); 1345180358Sbz db_printf("\n"); 1346180358Sbz if (db_pager_quit) 1347180358Sbz break; 1348180358Sbz } 1349180358Sbz} 1350180358Sbz#endif /* DDB */ 1351