kern_cpuset.c revision 191639
1176730Sjeff/*- 2176730Sjeff * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org> 3176730Sjeff * All rights reserved. 4177904Sjeff * 5177904Sjeff * Copyright (c) 2008 Nokia Corporation 6177904Sjeff * All rights reserved. 7176730Sjeff * 8176730Sjeff * Redistribution and use in source and binary forms, with or without 9176730Sjeff * modification, are permitted provided that the following conditions 10176730Sjeff * are met: 11176730Sjeff * 1. Redistributions of source code must retain the above copyright 12176730Sjeff * notice unmodified, this list of conditions, and the following 13176730Sjeff * disclaimer. 14176730Sjeff * 2. Redistributions in binary form must reproduce the above copyright 15176730Sjeff * notice, this list of conditions and the following disclaimer in the 16176730Sjeff * documentation and/or other materials provided with the distribution. 17176730Sjeff * 18176730Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19176730Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20176730Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21176730Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22176730Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23176730Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24176730Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25176730Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26176730Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27176730Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28176730Sjeff * 29176730Sjeff */ 30176730Sjeff 31176730Sjeff#include <sys/cdefs.h> 32176730Sjeff__FBSDID("$FreeBSD: head/sys/kern/kern_cpuset.c 191639 2009-04-28 21:00:50Z bz $"); 33176730Sjeff 34180358Sbz#include "opt_ddb.h" 35180358Sbz 36176730Sjeff#include <sys/param.h> 37176730Sjeff#include <sys/systm.h> 38176730Sjeff#include <sys/sysproto.h> 39176730Sjeff#include <sys/kernel.h> 40176730Sjeff#include <sys/lock.h> 41176730Sjeff#include <sys/malloc.h> 42176730Sjeff#include <sys/mutex.h> 43176730Sjeff#include <sys/priv.h> 44176730Sjeff#include <sys/proc.h> 45176730Sjeff#include <sys/refcount.h> 46176730Sjeff#include <sys/sched.h> 47176730Sjeff#include <sys/smp.h> 48176730Sjeff#include <sys/syscallsubr.h> 49176730Sjeff#include <sys/cpuset.h> 50176730Sjeff#include <sys/sx.h> 51176730Sjeff#include <sys/refcount.h> 52176730Sjeff#include <sys/queue.h> 53176730Sjeff#include <sys/limits.h> 54177738Sjeff#include <sys/bus.h> 55177738Sjeff#include <sys/interrupt.h> 56185435Sbz#include <sys/jail.h> /* Must come after sys/proc.h */ 57176730Sjeff 58176730Sjeff#include <vm/uma.h> 59176730Sjeff 60180358Sbz#ifdef DDB 61180358Sbz#include <ddb/ddb.h> 62180358Sbz#endif /* DDB */ 63180358Sbz 64176730Sjeff/* 65176730Sjeff * cpusets provide a mechanism for creating and manipulating sets of 66176730Sjeff * processors for the purpose of constraining the scheduling of threads to 67176730Sjeff * specific processors. 68176730Sjeff * 69176730Sjeff * Each process belongs to an identified set, by default this is set 1. Each 70176730Sjeff * thread may further restrict the cpus it may run on to a subset of this 71176730Sjeff * named set. This creates an anonymous set which other threads and processes 72176730Sjeff * may not join by number. 73176730Sjeff * 74176730Sjeff * The named set is referred to herein as the 'base' set to avoid ambiguity. 75176730Sjeff * This set is usually a child of a 'root' set while the anonymous set may 76176730Sjeff * simply be referred to as a mask. In the syscall api these are referred to 77176730Sjeff * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here. 78176730Sjeff * 79176730Sjeff * Threads inherit their set from their creator whether it be anonymous or 80176730Sjeff * not. This means that anonymous sets are immutable because they may be 81176730Sjeff * shared. To modify an anonymous set a new set is created with the desired 82176730Sjeff * mask and the same parent as the existing anonymous set. This gives the 83176730Sjeff * illusion of each thread having a private mask.A 84176730Sjeff * 85176730Sjeff * Via the syscall apis a user may ask to retrieve or modify the root, base, 86176730Sjeff * or mask that is discovered via a pid, tid, or setid. Modifying a set 87176730Sjeff * modifies all numbered and anonymous child sets to comply with the new mask. 88176730Sjeff * Modifying a pid or tid's mask applies only to that tid but must still 89176730Sjeff * exist within the assigned parent set. 90176730Sjeff * 91176730Sjeff * A thread may not be assigned to a a group seperate from other threads in 92176730Sjeff * the process. This is to remove ambiguity when the setid is queried with 93176730Sjeff * a pid argument. There is no other technical limitation. 94176730Sjeff * 95176730Sjeff * This somewhat complex arrangement is intended to make it easy for 96176730Sjeff * applications to query available processors and bind their threads to 97176730Sjeff * specific processors while also allowing administrators to dynamically 98176730Sjeff * reprovision by changing sets which apply to groups of processes. 99176730Sjeff * 100176730Sjeff * A simple application should not concern itself with sets at all and 101176730Sjeff * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id 102176730Sjeff * meaning 'curthread'. It may query availble cpus for that tid with a 103176730Sjeff * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). 104176730Sjeff */ 105176730Sjeffstatic uma_zone_t cpuset_zone; 106176730Sjeffstatic struct mtx cpuset_lock; 107176730Sjeffstatic struct setlist cpuset_ids; 108176730Sjeffstatic struct unrhdr *cpuset_unr; 109177738Sjeffstatic struct cpuset *cpuset_zero; 110176730Sjeff 111177738Sjeffcpuset_t *cpuset_root; 112177738Sjeff 113176730Sjeff/* 114176730Sjeff * Acquire a reference to a cpuset, all pointers must be tracked with refs. 115176730Sjeff */ 116176730Sjeffstruct cpuset * 117176730Sjeffcpuset_ref(struct cpuset *set) 118176730Sjeff{ 119176730Sjeff 120176730Sjeff refcount_acquire(&set->cs_ref); 121176730Sjeff return (set); 122176730Sjeff} 123176730Sjeff 124176730Sjeff/* 125180356Sbz * Walks up the tree from 'set' to find the root. Returns the root 126180356Sbz * referenced. 127180356Sbz */ 128180356Sbzstatic struct cpuset * 129180356Sbzcpuset_refroot(struct cpuset *set) 130180356Sbz{ 131180356Sbz 132180356Sbz for (; set->cs_parent != NULL; set = set->cs_parent) 133180356Sbz if (set->cs_flags & CPU_SET_ROOT) 134180356Sbz break; 135180356Sbz cpuset_ref(set); 136180356Sbz 137180356Sbz return (set); 138180356Sbz} 139180356Sbz 140180356Sbz/* 141180356Sbz * Find the first non-anonymous set starting from 'set'. Returns this set 142180356Sbz * referenced. May return the passed in set with an extra ref if it is 143180356Sbz * not anonymous. 144180356Sbz */ 145180356Sbzstatic struct cpuset * 146180356Sbzcpuset_refbase(struct cpuset *set) 147180356Sbz{ 148180356Sbz 149180356Sbz if (set->cs_id == CPUSET_INVALID) 150180356Sbz set = set->cs_parent; 151180356Sbz cpuset_ref(set); 152180356Sbz 153180356Sbz return (set); 154180356Sbz} 155180356Sbz 156180356Sbz/* 157176730Sjeff * Release a reference in a context where it is safe to allocte. 158176730Sjeff */ 159176730Sjeffvoid 160176730Sjeffcpuset_rel(struct cpuset *set) 161176730Sjeff{ 162176730Sjeff cpusetid_t id; 163176730Sjeff 164176730Sjeff if (refcount_release(&set->cs_ref) == 0) 165176730Sjeff return; 166176730Sjeff mtx_lock_spin(&cpuset_lock); 167176730Sjeff LIST_REMOVE(set, cs_siblings); 168176730Sjeff id = set->cs_id; 169176730Sjeff if (id != CPUSET_INVALID) 170176730Sjeff LIST_REMOVE(set, cs_link); 171176730Sjeff mtx_unlock_spin(&cpuset_lock); 172176730Sjeff cpuset_rel(set->cs_parent); 173176730Sjeff uma_zfree(cpuset_zone, set); 174176730Sjeff if (id != CPUSET_INVALID) 175176730Sjeff free_unr(cpuset_unr, id); 176176730Sjeff} 177176730Sjeff 178176730Sjeff/* 179176730Sjeff * Deferred release must be used when in a context that is not safe to 180176730Sjeff * allocate/free. This places any unreferenced sets on the list 'head'. 181176730Sjeff */ 182176730Sjeffstatic void 183176730Sjeffcpuset_rel_defer(struct setlist *head, struct cpuset *set) 184176730Sjeff{ 185176730Sjeff 186176730Sjeff if (refcount_release(&set->cs_ref) == 0) 187176730Sjeff return; 188176730Sjeff mtx_lock_spin(&cpuset_lock); 189176730Sjeff LIST_REMOVE(set, cs_siblings); 190176730Sjeff if (set->cs_id != CPUSET_INVALID) 191176730Sjeff LIST_REMOVE(set, cs_link); 192176730Sjeff LIST_INSERT_HEAD(head, set, cs_link); 193176730Sjeff mtx_unlock_spin(&cpuset_lock); 194176730Sjeff} 195176730Sjeff 196176730Sjeff/* 197176730Sjeff * Complete a deferred release. Removes the set from the list provided to 198176730Sjeff * cpuset_rel_defer. 199176730Sjeff */ 200176730Sjeffstatic void 201176730Sjeffcpuset_rel_complete(struct cpuset *set) 202176730Sjeff{ 203176730Sjeff LIST_REMOVE(set, cs_link); 204176730Sjeff cpuset_rel(set->cs_parent); 205176730Sjeff uma_zfree(cpuset_zone, set); 206176730Sjeff} 207176730Sjeff 208176730Sjeff/* 209176730Sjeff * Find a set based on an id. Returns it with a ref. 210176730Sjeff */ 211176730Sjeffstatic struct cpuset * 212185435Sbzcpuset_lookup(cpusetid_t setid, struct thread *td) 213176730Sjeff{ 214176730Sjeff struct cpuset *set; 215176730Sjeff 216176730Sjeff if (setid == CPUSET_INVALID) 217176730Sjeff return (NULL); 218176730Sjeff mtx_lock_spin(&cpuset_lock); 219176730Sjeff LIST_FOREACH(set, &cpuset_ids, cs_link) 220176730Sjeff if (set->cs_id == setid) 221176730Sjeff break; 222176730Sjeff if (set) 223176730Sjeff cpuset_ref(set); 224176730Sjeff mtx_unlock_spin(&cpuset_lock); 225185435Sbz 226185435Sbz KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__)); 227185435Sbz if (set != NULL && jailed(td->td_ucred)) { 228185435Sbz struct cpuset *rset, *jset; 229185435Sbz struct prison *pr; 230185435Sbz 231185435Sbz rset = cpuset_refroot(set); 232185435Sbz 233185435Sbz pr = td->td_ucred->cr_prison; 234185435Sbz mtx_lock(&pr->pr_mtx); 235185435Sbz cpuset_ref(pr->pr_cpuset); 236185435Sbz jset = pr->pr_cpuset; 237185435Sbz mtx_unlock(&pr->pr_mtx); 238185435Sbz 239185435Sbz if (jset->cs_id != rset->cs_id) { 240185435Sbz cpuset_rel(set); 241185435Sbz set = NULL; 242185435Sbz } 243185435Sbz cpuset_rel(jset); 244185435Sbz cpuset_rel(rset); 245185435Sbz } 246185435Sbz 247176730Sjeff return (set); 248176730Sjeff} 249176730Sjeff 250176730Sjeff/* 251176730Sjeff * Create a set in the space provided in 'set' with the provided parameters. 252176730Sjeff * The set is returned with a single ref. May return EDEADLK if the set 253176730Sjeff * will have no valid cpu based on restrictions from the parent. 254176730Sjeff */ 255176730Sjeffstatic int 256176730Sjeff_cpuset_create(struct cpuset *set, struct cpuset *parent, cpuset_t *mask, 257176730Sjeff cpusetid_t id) 258176730Sjeff{ 259176730Sjeff 260176811Sjeff if (!CPU_OVERLAP(&parent->cs_mask, mask)) 261176811Sjeff return (EDEADLK); 262176730Sjeff CPU_COPY(mask, &set->cs_mask); 263176730Sjeff LIST_INIT(&set->cs_children); 264176730Sjeff refcount_init(&set->cs_ref, 1); 265176730Sjeff set->cs_flags = 0; 266176730Sjeff mtx_lock_spin(&cpuset_lock); 267176730Sjeff CPU_AND(mask, &parent->cs_mask); 268176811Sjeff set->cs_id = id; 269176811Sjeff set->cs_parent = cpuset_ref(parent); 270176811Sjeff LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings); 271176811Sjeff if (set->cs_id != CPUSET_INVALID) 272176811Sjeff LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 273176730Sjeff mtx_unlock_spin(&cpuset_lock); 274176730Sjeff 275176811Sjeff return (0); 276176730Sjeff} 277176730Sjeff 278176730Sjeff/* 279176730Sjeff * Create a new non-anonymous set with the requested parent and mask. May 280176730Sjeff * return failures if the mask is invalid or a new number can not be 281176730Sjeff * allocated. 282176730Sjeff */ 283176730Sjeffstatic int 284176730Sjeffcpuset_create(struct cpuset **setp, struct cpuset *parent, cpuset_t *mask) 285176730Sjeff{ 286176730Sjeff struct cpuset *set; 287176730Sjeff cpusetid_t id; 288176730Sjeff int error; 289176730Sjeff 290176730Sjeff id = alloc_unr(cpuset_unr); 291176730Sjeff if (id == -1) 292176730Sjeff return (ENFILE); 293176730Sjeff *setp = set = uma_zalloc(cpuset_zone, M_WAITOK); 294176730Sjeff error = _cpuset_create(set, parent, mask, id); 295176730Sjeff if (error == 0) 296176730Sjeff return (0); 297176730Sjeff free_unr(cpuset_unr, id); 298176730Sjeff uma_zfree(cpuset_zone, set); 299176730Sjeff 300176730Sjeff return (error); 301176730Sjeff} 302176730Sjeff 303176730Sjeff/* 304176730Sjeff * Recursively check for errors that would occur from applying mask to 305176730Sjeff * the tree of sets starting at 'set'. Checks for sets that would become 306176730Sjeff * empty as well as RDONLY flags. 307176730Sjeff */ 308176730Sjeffstatic int 309176730Sjeffcpuset_testupdate(struct cpuset *set, cpuset_t *mask) 310176730Sjeff{ 311176730Sjeff struct cpuset *nset; 312176730Sjeff cpuset_t newmask; 313176730Sjeff int error; 314176730Sjeff 315176730Sjeff mtx_assert(&cpuset_lock, MA_OWNED); 316176730Sjeff if (set->cs_flags & CPU_SET_RDONLY) 317176730Sjeff return (EPERM); 318176811Sjeff if (!CPU_OVERLAP(&set->cs_mask, mask)) 319176811Sjeff return (EDEADLK); 320176730Sjeff CPU_COPY(&set->cs_mask, &newmask); 321176730Sjeff CPU_AND(&newmask, mask); 322176811Sjeff error = 0; 323176730Sjeff LIST_FOREACH(nset, &set->cs_children, cs_siblings) 324176730Sjeff if ((error = cpuset_testupdate(nset, &newmask)) != 0) 325176730Sjeff break; 326176730Sjeff return (error); 327176730Sjeff} 328176730Sjeff 329176730Sjeff/* 330176730Sjeff * Applies the mask 'mask' without checking for empty sets or permissions. 331176730Sjeff */ 332176730Sjeffstatic void 333176730Sjeffcpuset_update(struct cpuset *set, cpuset_t *mask) 334176730Sjeff{ 335176730Sjeff struct cpuset *nset; 336176730Sjeff 337176730Sjeff mtx_assert(&cpuset_lock, MA_OWNED); 338176730Sjeff CPU_AND(&set->cs_mask, mask); 339176730Sjeff LIST_FOREACH(nset, &set->cs_children, cs_siblings) 340176730Sjeff cpuset_update(nset, &set->cs_mask); 341176730Sjeff 342176730Sjeff return; 343176730Sjeff} 344176730Sjeff 345176730Sjeff/* 346176730Sjeff * Modify the set 'set' to use a copy of the mask provided. Apply this new 347176730Sjeff * mask to restrict all children in the tree. Checks for validity before 348176730Sjeff * applying the changes. 349176730Sjeff */ 350176730Sjeffstatic int 351176730Sjeffcpuset_modify(struct cpuset *set, cpuset_t *mask) 352176730Sjeff{ 353176811Sjeff struct cpuset *root; 354176730Sjeff int error; 355176730Sjeff 356180098Sbz error = priv_check(curthread, PRIV_SCHED_CPUSET); 357176730Sjeff if (error) 358176730Sjeff return (error); 359176811Sjeff /* 360191639Sbz * In case we are called from within the jail 361191639Sbz * we do not allow modifying the dedicated root 362191639Sbz * cpuset of the jail but may still allow to 363191639Sbz * change child sets. 364191639Sbz */ 365191639Sbz if (jailed(curthread->td_ucred) && 366191639Sbz set->cs_flags & CPU_SET_ROOT) 367191639Sbz return (EPERM); 368191639Sbz /* 369176811Sjeff * Verify that we have access to this set of 370176811Sjeff * cpus. 371176811Sjeff */ 372176811Sjeff root = set->cs_parent; 373176811Sjeff if (root && !CPU_SUBSET(&root->cs_mask, mask)) 374176811Sjeff return (EINVAL); 375176730Sjeff mtx_lock_spin(&cpuset_lock); 376176730Sjeff error = cpuset_testupdate(set, mask); 377176730Sjeff if (error) 378176730Sjeff goto out; 379176730Sjeff cpuset_update(set, mask); 380176730Sjeff CPU_COPY(mask, &set->cs_mask); 381176730Sjeffout: 382176730Sjeff mtx_unlock_spin(&cpuset_lock); 383176730Sjeff 384176730Sjeff return (error); 385176730Sjeff} 386176730Sjeff 387176730Sjeff/* 388176730Sjeff * Resolve the 'which' parameter of several cpuset apis. 389176730Sjeff * 390176730Sjeff * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also 391176730Sjeff * checks for permission via p_cansched(). 392176730Sjeff * 393176730Sjeff * For WHICH_SET returns a valid set with a new reference. 394176730Sjeff * 395176730Sjeff * -1 may be supplied for any argument to mean the current proc/thread or 396176730Sjeff * the base set of the current thread. May fail with ESRCH/EPERM. 397176730Sjeff */ 398176730Sjeffstatic int 399176730Sjeffcpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, 400176730Sjeff struct cpuset **setp) 401176730Sjeff{ 402176730Sjeff struct cpuset *set; 403176730Sjeff struct thread *td; 404176730Sjeff struct proc *p; 405176730Sjeff int error; 406176730Sjeff 407176730Sjeff *pp = p = NULL; 408176730Sjeff *tdp = td = NULL; 409176730Sjeff *setp = set = NULL; 410176730Sjeff switch (which) { 411176730Sjeff case CPU_WHICH_PID: 412176730Sjeff if (id == -1) { 413176730Sjeff PROC_LOCK(curproc); 414176730Sjeff p = curproc; 415176730Sjeff break; 416176730Sjeff } 417176730Sjeff if ((p = pfind(id)) == NULL) 418176730Sjeff return (ESRCH); 419176730Sjeff break; 420176730Sjeff case CPU_WHICH_TID: 421176730Sjeff if (id == -1) { 422176730Sjeff PROC_LOCK(curproc); 423176730Sjeff p = curproc; 424176730Sjeff td = curthread; 425176730Sjeff break; 426176730Sjeff } 427176730Sjeff sx_slock(&allproc_lock); 428176730Sjeff FOREACH_PROC_IN_SYSTEM(p) { 429176730Sjeff PROC_LOCK(p); 430176730Sjeff FOREACH_THREAD_IN_PROC(p, td) 431176730Sjeff if (td->td_tid == id) 432176730Sjeff break; 433176730Sjeff if (td != NULL) 434176730Sjeff break; 435176730Sjeff PROC_UNLOCK(p); 436176730Sjeff } 437176730Sjeff sx_sunlock(&allproc_lock); 438176730Sjeff if (td == NULL) 439176730Sjeff return (ESRCH); 440176730Sjeff break; 441176730Sjeff case CPU_WHICH_CPUSET: 442176730Sjeff if (id == -1) { 443176730Sjeff thread_lock(curthread); 444177738Sjeff set = cpuset_refbase(curthread->td_cpuset); 445176730Sjeff thread_unlock(curthread); 446176730Sjeff } else 447185435Sbz set = cpuset_lookup(id, curthread); 448176730Sjeff if (set) { 449176730Sjeff *setp = set; 450176730Sjeff return (0); 451176730Sjeff } 452176730Sjeff return (ESRCH); 453185435Sbz case CPU_WHICH_JAIL: 454185435Sbz { 455185435Sbz /* Find `set' for prison with given id. */ 456185435Sbz struct prison *pr; 457185435Sbz 458185435Sbz sx_slock(&allprison_lock); 459185435Sbz pr = prison_find(id); 460185435Sbz sx_sunlock(&allprison_lock); 461185435Sbz if (pr == NULL) 462185435Sbz return (ESRCH); 463185435Sbz if (jailed(curthread->td_ucred)) { 464185435Sbz if (curthread->td_ucred->cr_prison == pr) { 465185435Sbz cpuset_ref(pr->pr_cpuset); 466185435Sbz set = pr->pr_cpuset; 467185435Sbz } 468185435Sbz } else { 469185435Sbz cpuset_ref(pr->pr_cpuset); 470185435Sbz set = pr->pr_cpuset; 471185435Sbz } 472185435Sbz mtx_unlock(&pr->pr_mtx); 473185435Sbz if (set) { 474185435Sbz *setp = set; 475185435Sbz return (0); 476185435Sbz } 477185435Sbz return (ESRCH); 478185435Sbz } 479178092Sjeff case CPU_WHICH_IRQ: 480178092Sjeff return (0); 481176730Sjeff default: 482176730Sjeff return (EINVAL); 483176730Sjeff } 484176730Sjeff error = p_cansched(curthread, p); 485176730Sjeff if (error) { 486176730Sjeff PROC_UNLOCK(p); 487176730Sjeff return (error); 488176730Sjeff } 489176730Sjeff if (td == NULL) 490176730Sjeff td = FIRST_THREAD_IN_PROC(p); 491176730Sjeff *pp = p; 492176730Sjeff *tdp = td; 493176730Sjeff return (0); 494176730Sjeff} 495176730Sjeff 496176730Sjeff/* 497176730Sjeff * Create an anonymous set with the provided mask in the space provided by 498176730Sjeff * 'fset'. If the passed in set is anonymous we use its parent otherwise 499176730Sjeff * the new set is a child of 'set'. 500176730Sjeff */ 501176730Sjeffstatic int 502176730Sjeffcpuset_shadow(struct cpuset *set, struct cpuset *fset, cpuset_t *mask) 503176730Sjeff{ 504176730Sjeff struct cpuset *parent; 505176730Sjeff 506176730Sjeff if (set->cs_id == CPUSET_INVALID) 507176730Sjeff parent = set->cs_parent; 508176730Sjeff else 509176730Sjeff parent = set; 510176811Sjeff if (!CPU_SUBSET(&parent->cs_mask, mask)) 511177738Sjeff return (EDEADLK); 512176730Sjeff return (_cpuset_create(fset, parent, mask, CPUSET_INVALID)); 513176730Sjeff} 514176730Sjeff 515176730Sjeff/* 516176730Sjeff * Handle two cases for replacing the base set or mask of an entire process. 517176730Sjeff * 518176730Sjeff * 1) Set is non-null and mask is null. This reparents all anonymous sets 519176730Sjeff * to the provided set and replaces all non-anonymous td_cpusets with the 520176730Sjeff * provided set. 521176730Sjeff * 2) Mask is non-null and set is null. This replaces or creates anonymous 522176730Sjeff * sets for every thread with the existing base as a parent. 523176730Sjeff * 524176730Sjeff * This is overly complicated because we can't allocate while holding a 525176730Sjeff * spinlock and spinlocks must be held while changing and examining thread 526176730Sjeff * state. 527176730Sjeff */ 528176730Sjeffstatic int 529176730Sjeffcpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask) 530176730Sjeff{ 531176730Sjeff struct setlist freelist; 532176730Sjeff struct setlist droplist; 533176811Sjeff struct cpuset *tdset; 534176730Sjeff struct cpuset *nset; 535176730Sjeff struct thread *td; 536176730Sjeff struct proc *p; 537176730Sjeff int threads; 538176730Sjeff int nfree; 539176730Sjeff int error; 540176730Sjeff /* 541176730Sjeff * The algorithm requires two passes due to locking considerations. 542176730Sjeff * 543176730Sjeff * 1) Lookup the process and acquire the locks in the required order. 544176730Sjeff * 2) If enough cpusets have not been allocated release the locks and 545176730Sjeff * allocate them. Loop. 546176730Sjeff */ 547176730Sjeff LIST_INIT(&freelist); 548176730Sjeff LIST_INIT(&droplist); 549176730Sjeff nfree = 0; 550176730Sjeff for (;;) { 551176730Sjeff error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset); 552176730Sjeff if (error) 553176730Sjeff goto out; 554176730Sjeff if (nfree >= p->p_numthreads) 555176730Sjeff break; 556176730Sjeff threads = p->p_numthreads; 557176730Sjeff PROC_UNLOCK(p); 558176730Sjeff for (; nfree < threads; nfree++) { 559176730Sjeff nset = uma_zalloc(cpuset_zone, M_WAITOK); 560176730Sjeff LIST_INSERT_HEAD(&freelist, nset, cs_link); 561176730Sjeff } 562176730Sjeff } 563176730Sjeff PROC_LOCK_ASSERT(p, MA_OWNED); 564176730Sjeff /* 565176730Sjeff * Now that the appropriate locks are held and we have enough cpusets, 566176811Sjeff * make sure the operation will succeed before applying changes. The 567176811Sjeff * proc lock prevents td_cpuset from changing between calls. 568176811Sjeff */ 569176811Sjeff error = 0; 570176811Sjeff FOREACH_THREAD_IN_PROC(p, td) { 571176811Sjeff thread_lock(td); 572176811Sjeff tdset = td->td_cpuset; 573176811Sjeff /* 574176811Sjeff * Verify that a new mask doesn't specify cpus outside of 575176811Sjeff * the set the thread is a member of. 576176811Sjeff */ 577176811Sjeff if (mask) { 578176811Sjeff if (tdset->cs_id == CPUSET_INVALID) 579176811Sjeff tdset = tdset->cs_parent; 580176811Sjeff if (!CPU_SUBSET(&tdset->cs_mask, mask)) 581177738Sjeff error = EDEADLK; 582176811Sjeff /* 583176811Sjeff * Verify that a new set won't leave an existing thread 584176811Sjeff * mask without a cpu to run on. It can, however, restrict 585176811Sjeff * the set. 586176811Sjeff */ 587176811Sjeff } else if (tdset->cs_id == CPUSET_INVALID) { 588176811Sjeff if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask)) 589177738Sjeff error = EDEADLK; 590176811Sjeff } 591176811Sjeff thread_unlock(td); 592176811Sjeff if (error) 593176811Sjeff goto unlock_out; 594176811Sjeff } 595176811Sjeff /* 596176811Sjeff * Replace each thread's cpuset while using deferred release. We 597177368Sjeff * must do this because the thread lock must be held while operating 598177368Sjeff * on the thread and this limits the type of operations allowed. 599176730Sjeff */ 600176730Sjeff FOREACH_THREAD_IN_PROC(p, td) { 601176730Sjeff thread_lock(td); 602176730Sjeff /* 603176730Sjeff * If we presently have an anonymous set or are applying a 604176730Sjeff * mask we must create an anonymous shadow set. That is 605176730Sjeff * either parented to our existing base or the supplied set. 606176730Sjeff * 607176730Sjeff * If we have a base set with no anonymous shadow we simply 608176730Sjeff * replace it outright. 609176730Sjeff */ 610176730Sjeff tdset = td->td_cpuset; 611176730Sjeff if (tdset->cs_id == CPUSET_INVALID || mask) { 612176730Sjeff nset = LIST_FIRST(&freelist); 613176730Sjeff LIST_REMOVE(nset, cs_link); 614176730Sjeff if (mask) 615176730Sjeff error = cpuset_shadow(tdset, nset, mask); 616176730Sjeff else 617176730Sjeff error = _cpuset_create(nset, set, 618176730Sjeff &tdset->cs_mask, CPUSET_INVALID); 619176730Sjeff if (error) { 620176730Sjeff LIST_INSERT_HEAD(&freelist, nset, cs_link); 621176730Sjeff thread_unlock(td); 622176730Sjeff break; 623176730Sjeff } 624176730Sjeff } else 625176730Sjeff nset = cpuset_ref(set); 626176730Sjeff cpuset_rel_defer(&droplist, tdset); 627176730Sjeff td->td_cpuset = nset; 628176730Sjeff sched_affinity(td); 629176730Sjeff thread_unlock(td); 630176730Sjeff } 631176811Sjeffunlock_out: 632176730Sjeff PROC_UNLOCK(p); 633176730Sjeffout: 634176730Sjeff while ((nset = LIST_FIRST(&droplist)) != NULL) 635176730Sjeff cpuset_rel_complete(nset); 636176730Sjeff while ((nset = LIST_FIRST(&freelist)) != NULL) { 637176730Sjeff LIST_REMOVE(nset, cs_link); 638176730Sjeff uma_zfree(cpuset_zone, nset); 639176730Sjeff } 640176730Sjeff return (error); 641176730Sjeff} 642176730Sjeff 643176730Sjeff/* 644176730Sjeff * Apply an anonymous mask to a single thread. 645176730Sjeff */ 646177738Sjeffint 647176730Sjeffcpuset_setthread(lwpid_t id, cpuset_t *mask) 648176730Sjeff{ 649176730Sjeff struct cpuset *nset; 650176730Sjeff struct cpuset *set; 651176730Sjeff struct thread *td; 652176730Sjeff struct proc *p; 653176730Sjeff int error; 654176730Sjeff 655176730Sjeff nset = uma_zalloc(cpuset_zone, M_WAITOK); 656176821Sjeff error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); 657176730Sjeff if (error) 658176730Sjeff goto out; 659177738Sjeff set = NULL; 660176730Sjeff thread_lock(td); 661177738Sjeff error = cpuset_shadow(td->td_cpuset, nset, mask); 662176730Sjeff if (error == 0) { 663177738Sjeff set = td->td_cpuset; 664176730Sjeff td->td_cpuset = nset; 665176730Sjeff sched_affinity(td); 666176730Sjeff nset = NULL; 667176730Sjeff } 668176730Sjeff thread_unlock(td); 669176730Sjeff PROC_UNLOCK(p); 670177738Sjeff if (set) 671177738Sjeff cpuset_rel(set); 672176730Sjeffout: 673176730Sjeff if (nset) 674176730Sjeff uma_zfree(cpuset_zone, nset); 675176730Sjeff return (error); 676176730Sjeff} 677176730Sjeff 678176730Sjeff/* 679176730Sjeff * Creates the cpuset for thread0. We make two sets: 680176730Sjeff * 681176730Sjeff * 0 - The root set which should represent all valid processors in the 682176730Sjeff * system. It is initially created with a mask of all processors 683176730Sjeff * because we don't know what processors are valid until cpuset_init() 684176730Sjeff * runs. This set is immutable. 685176730Sjeff * 1 - The default set which all processes are a member of until changed. 686176730Sjeff * This allows an administrator to move all threads off of given cpus to 687176730Sjeff * dedicate them to high priority tasks or save power etc. 688176730Sjeff */ 689176730Sjeffstruct cpuset * 690176730Sjeffcpuset_thread0(void) 691176730Sjeff{ 692176730Sjeff struct cpuset *set; 693176730Sjeff int error; 694176730Sjeff 695176730Sjeff cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, 696176730Sjeff NULL, NULL, UMA_ALIGN_PTR, 0); 697176730Sjeff mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); 698176730Sjeff /* 699176730Sjeff * Create the root system set for the whole machine. Doesn't use 700176730Sjeff * cpuset_create() due to NULL parent. 701176730Sjeff */ 702176730Sjeff set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 703176730Sjeff set->cs_mask.__bits[0] = -1; 704176730Sjeff LIST_INIT(&set->cs_children); 705176730Sjeff LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 706176730Sjeff set->cs_ref = 1; 707176730Sjeff set->cs_flags = CPU_SET_ROOT; 708176730Sjeff cpuset_zero = set; 709177738Sjeff cpuset_root = &set->cs_mask; 710176730Sjeff /* 711176730Sjeff * Now derive a default, modifiable set from that to give out. 712176730Sjeff */ 713176730Sjeff set = uma_zalloc(cpuset_zone, M_WAITOK); 714176730Sjeff error = _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask, 1); 715176730Sjeff KASSERT(error == 0, ("Error creating default set: %d\n", error)); 716176730Sjeff /* 717176730Sjeff * Initialize the unit allocator. 0 and 1 are allocated above. 718176730Sjeff */ 719176730Sjeff cpuset_unr = new_unrhdr(2, INT_MAX, NULL); 720176730Sjeff 721176730Sjeff return (set); 722176730Sjeff} 723176730Sjeff 724176730Sjeff/* 725185435Sbz * Create a cpuset, which would be cpuset_create() but 726185435Sbz * mark the new 'set' as root. 727185435Sbz * 728191403Sbz * We are not going to reparent the td to it. Use cpuset_setproc_update_set() 729191403Sbz * for that. 730185435Sbz * 731185435Sbz * In case of no error, returns the set in *setp locked with a reference. 732185435Sbz */ 733185435Sbzint 734185435Sbzcpuset_create_root(struct thread *td, struct cpuset **setp) 735185435Sbz{ 736185435Sbz struct cpuset *root; 737185435Sbz struct cpuset *set; 738185435Sbz int error; 739185435Sbz 740185435Sbz KASSERT(td != NULL, ("[%s:%d] invalid td", __func__, __LINE__)); 741185435Sbz KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__)); 742185435Sbz 743185435Sbz thread_lock(td); 744185435Sbz root = cpuset_refroot(td->td_cpuset); 745185435Sbz thread_unlock(td); 746185435Sbz 747185435Sbz error = cpuset_create(setp, td->td_cpuset, &root->cs_mask); 748185435Sbz cpuset_rel(root); 749185435Sbz if (error) 750185435Sbz return (error); 751185435Sbz 752185435Sbz KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data", 753185435Sbz __func__, __LINE__)); 754185435Sbz 755185435Sbz /* Mark the set as root. */ 756185435Sbz set = *setp; 757185435Sbz set->cs_flags |= CPU_SET_ROOT; 758185435Sbz 759185435Sbz return (0); 760185435Sbz} 761185435Sbz 762185435Sbzint 763185435Sbzcpuset_setproc_update_set(struct proc *p, struct cpuset *set) 764185435Sbz{ 765185435Sbz int error; 766185435Sbz 767185435Sbz KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__)); 768185435Sbz KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__)); 769185435Sbz 770185435Sbz cpuset_ref(set); 771185435Sbz error = cpuset_setproc(p->p_pid, set, NULL); 772185435Sbz if (error) 773185435Sbz return (error); 774185435Sbz cpuset_rel(set); 775185435Sbz return (0); 776185435Sbz} 777185435Sbz 778185435Sbz/* 779176730Sjeff * This is called once the final set of system cpus is known. Modifies 780176730Sjeff * the root set and all children and mark the root readonly. 781176730Sjeff */ 782176730Sjeffstatic void 783176730Sjeffcpuset_init(void *arg) 784176730Sjeff{ 785176730Sjeff cpuset_t mask; 786176730Sjeff 787176730Sjeff CPU_ZERO(&mask); 788176730Sjeff#ifdef SMP 789176730Sjeff mask.__bits[0] = all_cpus; 790176730Sjeff#else 791176730Sjeff mask.__bits[0] = 1; 792176730Sjeff#endif 793176730Sjeff if (cpuset_modify(cpuset_zero, &mask)) 794176730Sjeff panic("Can't set initial cpuset mask.\n"); 795176730Sjeff cpuset_zero->cs_flags |= CPU_SET_RDONLY; 796176730Sjeff} 797176730SjeffSYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL); 798176730Sjeff 799176730Sjeff#ifndef _SYS_SYSPROTO_H_ 800176730Sjeffstruct cpuset_args { 801176730Sjeff cpusetid_t *setid; 802176730Sjeff}; 803176730Sjeff#endif 804176730Sjeffint 805176730Sjeffcpuset(struct thread *td, struct cpuset_args *uap) 806176730Sjeff{ 807176730Sjeff struct cpuset *root; 808176730Sjeff struct cpuset *set; 809176730Sjeff int error; 810176730Sjeff 811176730Sjeff thread_lock(td); 812177738Sjeff root = cpuset_refroot(td->td_cpuset); 813176730Sjeff thread_unlock(td); 814176730Sjeff error = cpuset_create(&set, root, &root->cs_mask); 815176730Sjeff cpuset_rel(root); 816176730Sjeff if (error) 817176730Sjeff return (error); 818177738Sjeff error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id)); 819176730Sjeff if (error == 0) 820177738Sjeff error = cpuset_setproc(-1, set, NULL); 821176730Sjeff cpuset_rel(set); 822176730Sjeff return (error); 823176730Sjeff} 824176730Sjeff 825176730Sjeff#ifndef _SYS_SYSPROTO_H_ 826176730Sjeffstruct cpuset_setid_args { 827176730Sjeff cpuwhich_t which; 828176730Sjeff id_t id; 829176730Sjeff cpusetid_t setid; 830176730Sjeff}; 831176730Sjeff#endif 832176730Sjeffint 833176730Sjeffcpuset_setid(struct thread *td, struct cpuset_setid_args *uap) 834176730Sjeff{ 835176730Sjeff struct cpuset *set; 836176730Sjeff int error; 837176730Sjeff 838176730Sjeff /* 839176730Sjeff * Presently we only support per-process sets. 840176730Sjeff */ 841176730Sjeff if (uap->which != CPU_WHICH_PID) 842176730Sjeff return (EINVAL); 843185435Sbz set = cpuset_lookup(uap->setid, td); 844176730Sjeff if (set == NULL) 845176730Sjeff return (ESRCH); 846176730Sjeff error = cpuset_setproc(uap->id, set, NULL); 847176730Sjeff cpuset_rel(set); 848176730Sjeff return (error); 849176730Sjeff} 850176730Sjeff 851176730Sjeff#ifndef _SYS_SYSPROTO_H_ 852176730Sjeffstruct cpuset_getid_args { 853176730Sjeff cpulevel_t level; 854176730Sjeff cpuwhich_t which; 855176730Sjeff id_t id; 856176730Sjeff cpusetid_t *setid; 857176730Sjeff#endif 858176730Sjeffint 859176730Sjeffcpuset_getid(struct thread *td, struct cpuset_getid_args *uap) 860176730Sjeff{ 861176730Sjeff struct cpuset *nset; 862176730Sjeff struct cpuset *set; 863176730Sjeff struct thread *ttd; 864176730Sjeff struct proc *p; 865176730Sjeff cpusetid_t id; 866176730Sjeff int error; 867176730Sjeff 868176730Sjeff if (uap->level == CPU_LEVEL_WHICH && uap->which != CPU_WHICH_CPUSET) 869176730Sjeff return (EINVAL); 870176730Sjeff error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); 871176730Sjeff if (error) 872176730Sjeff return (error); 873176730Sjeff switch (uap->which) { 874176730Sjeff case CPU_WHICH_TID: 875176730Sjeff case CPU_WHICH_PID: 876176730Sjeff thread_lock(ttd); 877177738Sjeff set = cpuset_refbase(ttd->td_cpuset); 878176730Sjeff thread_unlock(ttd); 879176730Sjeff PROC_UNLOCK(p); 880176730Sjeff break; 881176730Sjeff case CPU_WHICH_CPUSET: 882185435Sbz case CPU_WHICH_JAIL: 883176730Sjeff break; 884178092Sjeff case CPU_WHICH_IRQ: 885178092Sjeff return (EINVAL); 886176730Sjeff } 887176730Sjeff switch (uap->level) { 888176730Sjeff case CPU_LEVEL_ROOT: 889177738Sjeff nset = cpuset_refroot(set); 890176730Sjeff cpuset_rel(set); 891176730Sjeff set = nset; 892176730Sjeff break; 893176730Sjeff case CPU_LEVEL_CPUSET: 894176730Sjeff break; 895176730Sjeff case CPU_LEVEL_WHICH: 896176730Sjeff break; 897176730Sjeff } 898176730Sjeff id = set->cs_id; 899176730Sjeff cpuset_rel(set); 900176730Sjeff if (error == 0) 901176730Sjeff error = copyout(&id, uap->setid, sizeof(id)); 902176730Sjeff 903176730Sjeff return (error); 904176730Sjeff} 905176730Sjeff 906176730Sjeff#ifndef _SYS_SYSPROTO_H_ 907176730Sjeffstruct cpuset_getaffinity_args { 908177597Sru cpulevel_t level; 909177597Sru cpuwhich_t which; 910177597Sru id_t id; 911177597Sru size_t cpusetsize; 912177597Sru cpuset_t *mask; 913176730Sjeff}; 914176730Sjeff#endif 915176730Sjeffint 916176730Sjeffcpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap) 917176730Sjeff{ 918176730Sjeff struct thread *ttd; 919176730Sjeff struct cpuset *nset; 920176730Sjeff struct cpuset *set; 921176730Sjeff struct proc *p; 922176730Sjeff cpuset_t *mask; 923176730Sjeff int error; 924177597Sru size_t size; 925176730Sjeff 926176811Sjeff if (uap->cpusetsize < sizeof(cpuset_t) || 927179313Skib uap->cpusetsize > CPU_MAXSIZE / NBBY) 928176730Sjeff return (ERANGE); 929176811Sjeff size = uap->cpusetsize; 930176730Sjeff mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 931176730Sjeff error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); 932176730Sjeff if (error) 933176730Sjeff goto out; 934176730Sjeff switch (uap->level) { 935176730Sjeff case CPU_LEVEL_ROOT: 936176730Sjeff case CPU_LEVEL_CPUSET: 937176730Sjeff switch (uap->which) { 938176730Sjeff case CPU_WHICH_TID: 939176730Sjeff case CPU_WHICH_PID: 940176730Sjeff thread_lock(ttd); 941176730Sjeff set = cpuset_ref(ttd->td_cpuset); 942176730Sjeff thread_unlock(ttd); 943176730Sjeff break; 944176730Sjeff case CPU_WHICH_CPUSET: 945185435Sbz case CPU_WHICH_JAIL: 946176730Sjeff break; 947178092Sjeff case CPU_WHICH_IRQ: 948178092Sjeff error = EINVAL; 949178092Sjeff goto out; 950176730Sjeff } 951176730Sjeff if (uap->level == CPU_LEVEL_ROOT) 952177738Sjeff nset = cpuset_refroot(set); 953176730Sjeff else 954177738Sjeff nset = cpuset_refbase(set); 955176730Sjeff CPU_COPY(&nset->cs_mask, mask); 956176730Sjeff cpuset_rel(nset); 957176730Sjeff break; 958176730Sjeff case CPU_LEVEL_WHICH: 959176730Sjeff switch (uap->which) { 960176730Sjeff case CPU_WHICH_TID: 961176730Sjeff thread_lock(ttd); 962176730Sjeff CPU_COPY(&ttd->td_cpuset->cs_mask, mask); 963176730Sjeff thread_unlock(ttd); 964176730Sjeff break; 965176730Sjeff case CPU_WHICH_PID: 966176730Sjeff FOREACH_THREAD_IN_PROC(p, ttd) { 967176730Sjeff thread_lock(ttd); 968176730Sjeff CPU_OR(mask, &ttd->td_cpuset->cs_mask); 969176730Sjeff thread_unlock(ttd); 970176730Sjeff } 971176730Sjeff break; 972176730Sjeff case CPU_WHICH_CPUSET: 973185435Sbz case CPU_WHICH_JAIL: 974176730Sjeff CPU_COPY(&set->cs_mask, mask); 975176730Sjeff break; 976178092Sjeff case CPU_WHICH_IRQ: 977178092Sjeff error = intr_getaffinity(uap->id, mask); 978178092Sjeff break; 979176730Sjeff } 980176730Sjeff break; 981176730Sjeff default: 982176730Sjeff error = EINVAL; 983176730Sjeff break; 984176730Sjeff } 985176730Sjeff if (set) 986176730Sjeff cpuset_rel(set); 987176730Sjeff if (p) 988176730Sjeff PROC_UNLOCK(p); 989176730Sjeff if (error == 0) 990176730Sjeff error = copyout(mask, uap->mask, size); 991176730Sjeffout: 992176730Sjeff free(mask, M_TEMP); 993176730Sjeff return (error); 994176730Sjeff} 995176730Sjeff 996176730Sjeff#ifndef _SYS_SYSPROTO_H_ 997176730Sjeffstruct cpuset_setaffinity_args { 998176730Sjeff cpulevel_t level; 999177597Sru cpuwhich_t which; 1000177597Sru id_t id; 1001177597Sru size_t cpusetsize; 1002177597Sru const cpuset_t *mask; 1003176730Sjeff}; 1004176730Sjeff#endif 1005176730Sjeffint 1006176730Sjeffcpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap) 1007176730Sjeff{ 1008176730Sjeff struct cpuset *nset; 1009176730Sjeff struct cpuset *set; 1010176730Sjeff struct thread *ttd; 1011176730Sjeff struct proc *p; 1012176730Sjeff cpuset_t *mask; 1013176730Sjeff int error; 1014176730Sjeff 1015176811Sjeff if (uap->cpusetsize < sizeof(cpuset_t) || 1016179313Skib uap->cpusetsize > CPU_MAXSIZE / NBBY) 1017176730Sjeff return (ERANGE); 1018176811Sjeff mask = malloc(uap->cpusetsize, M_TEMP, M_WAITOK | M_ZERO); 1019176811Sjeff error = copyin(uap->mask, mask, uap->cpusetsize); 1020176730Sjeff if (error) 1021176730Sjeff goto out; 1022176811Sjeff /* 1023176811Sjeff * Verify that no high bits are set. 1024176811Sjeff */ 1025176811Sjeff if (uap->cpusetsize > sizeof(cpuset_t)) { 1026176811Sjeff char *end; 1027176811Sjeff char *cp; 1028176811Sjeff 1029176811Sjeff end = cp = (char *)&mask->__bits; 1030176811Sjeff end += uap->cpusetsize; 1031176811Sjeff cp += sizeof(cpuset_t); 1032176811Sjeff while (cp != end) 1033176811Sjeff if (*cp++ != 0) { 1034176811Sjeff error = EINVAL; 1035176811Sjeff goto out; 1036176811Sjeff } 1037176811Sjeff 1038176811Sjeff } 1039176730Sjeff switch (uap->level) { 1040176730Sjeff case CPU_LEVEL_ROOT: 1041176730Sjeff case CPU_LEVEL_CPUSET: 1042176730Sjeff error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); 1043176730Sjeff if (error) 1044176730Sjeff break; 1045176730Sjeff switch (uap->which) { 1046176730Sjeff case CPU_WHICH_TID: 1047176730Sjeff case CPU_WHICH_PID: 1048176730Sjeff thread_lock(ttd); 1049176730Sjeff set = cpuset_ref(ttd->td_cpuset); 1050176730Sjeff thread_unlock(ttd); 1051176880Sjeff PROC_UNLOCK(p); 1052176730Sjeff break; 1053176730Sjeff case CPU_WHICH_CPUSET: 1054185435Sbz case CPU_WHICH_JAIL: 1055176730Sjeff break; 1056178092Sjeff case CPU_WHICH_IRQ: 1057178092Sjeff error = EINVAL; 1058178092Sjeff goto out; 1059176730Sjeff } 1060176730Sjeff if (uap->level == CPU_LEVEL_ROOT) 1061177738Sjeff nset = cpuset_refroot(set); 1062176730Sjeff else 1063177738Sjeff nset = cpuset_refbase(set); 1064176730Sjeff error = cpuset_modify(nset, mask); 1065176730Sjeff cpuset_rel(nset); 1066176730Sjeff cpuset_rel(set); 1067176730Sjeff break; 1068176730Sjeff case CPU_LEVEL_WHICH: 1069176730Sjeff switch (uap->which) { 1070176730Sjeff case CPU_WHICH_TID: 1071176730Sjeff error = cpuset_setthread(uap->id, mask); 1072176730Sjeff break; 1073176730Sjeff case CPU_WHICH_PID: 1074176730Sjeff error = cpuset_setproc(uap->id, NULL, mask); 1075176730Sjeff break; 1076176730Sjeff case CPU_WHICH_CPUSET: 1077185435Sbz case CPU_WHICH_JAIL: 1078185435Sbz error = cpuset_which(uap->which, uap->id, &p, 1079176730Sjeff &ttd, &set); 1080176730Sjeff if (error == 0) { 1081176730Sjeff error = cpuset_modify(set, mask); 1082176730Sjeff cpuset_rel(set); 1083176730Sjeff } 1084176730Sjeff break; 1085178092Sjeff case CPU_WHICH_IRQ: 1086178092Sjeff error = intr_setaffinity(uap->id, mask); 1087178092Sjeff break; 1088176730Sjeff default: 1089176730Sjeff error = EINVAL; 1090176730Sjeff break; 1091176730Sjeff } 1092176730Sjeff break; 1093176730Sjeff default: 1094176730Sjeff error = EINVAL; 1095176730Sjeff break; 1096176730Sjeff } 1097176730Sjeffout: 1098176730Sjeff free(mask, M_TEMP); 1099176730Sjeff return (error); 1100176730Sjeff} 1101180358Sbz 1102180358Sbz#ifdef DDB 1103180358SbzDB_SHOW_COMMAND(cpusets, db_show_cpusets) 1104180358Sbz{ 1105180358Sbz struct cpuset *set; 1106180358Sbz int cpu, once; 1107180358Sbz 1108180358Sbz LIST_FOREACH(set, &cpuset_ids, cs_link) { 1109180358Sbz db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n", 1110180358Sbz set, set->cs_id, set->cs_ref, set->cs_flags, 1111180358Sbz (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0); 1112180358Sbz db_printf(" mask="); 1113180358Sbz for (once = 0, cpu = 0; cpu < CPU_SETSIZE; cpu++) { 1114180358Sbz if (CPU_ISSET(cpu, &set->cs_mask)) { 1115180358Sbz if (once == 0) { 1116180358Sbz db_printf("%d", cpu); 1117180358Sbz once = 1; 1118180358Sbz } else 1119180358Sbz db_printf(",%d", cpu); 1120180358Sbz } 1121180358Sbz } 1122180358Sbz db_printf("\n"); 1123180358Sbz if (db_pager_quit) 1124180358Sbz break; 1125180358Sbz } 1126180358Sbz} 1127180358Sbz#endif /* DDB */ 1128