subr_percpu.c revision 1.14
1/* $NetBSD: subr_percpu.c,v 1.14 2011/07/27 14:35:34 uebayasi Exp $ */ 2 3/*- 4 * Copyright (c)2007,2008 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/* 30 * per-cpu storage. 31 */ 32 33#include <sys/cdefs.h> 34__KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.14 2011/07/27 14:35:34 uebayasi Exp $"); 35 36#include <sys/param.h> 37#include <sys/cpu.h> 38#include <sys/kmem.h> 39#include <sys/kernel.h> 40#include <sys/mutex.h> 41#include <sys/percpu.h> 42#include <sys/rwlock.h> 43#include <sys/vmem.h> 44#include <sys/xcall.h> 45 46#define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1) 47#define PERCPU_QCACHE_MAX 0 48#define PERCPU_IMPORT_SIZE 2048 49 50#if defined(DIAGNOSTIC) 51#define MAGIC 0x50435055 /* "PCPU" */ 52#define percpu_encrypt(pc) ((pc) ^ MAGIC) 53#define percpu_decrypt(pc) ((pc) ^ MAGIC) 54#else /* defined(DIAGNOSTIC) */ 55#define percpu_encrypt(pc) (pc) 56#define percpu_decrypt(pc) (pc) 57#endif /* defined(DIAGNOSTIC) */ 58 59static krwlock_t percpu_swap_lock __cacheline_aligned; 60static kmutex_t percpu_allocation_lock __cacheline_aligned; 61static vmem_t * percpu_offset_arena __cacheline_aligned; 62static unsigned int percpu_nextoff __cacheline_aligned; 63 64static percpu_cpu_t * 65cpu_percpu(struct cpu_info *ci) 66{ 67 68 return &ci->ci_data.cpu_percpu; 69} 70 71static unsigned int 72percpu_offset(percpu_t *pc) 73{ 74 const unsigned int off = percpu_decrypt((uintptr_t)pc); 75 76 KASSERT(off < percpu_nextoff); 77 return off; 78} 79 80/* 81 * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge 82 */ 83 84static void 85percpu_cpu_swap(void *p1, void *p2) 86{ 87 struct cpu_info * const ci = p1; 88 percpu_cpu_t * const newpcc = p2; 89 percpu_cpu_t * const pcc = cpu_percpu(ci); 90 91 KASSERT(ci == curcpu() || !mp_online); 92 93 /* 94 * swap *pcc and *newpcc unless anyone has beaten us. 95 */ 96 rw_enter(&percpu_swap_lock, RW_WRITER); 97 if (newpcc->pcc_size > pcc->pcc_size) { 98 percpu_cpu_t tmp; 99 int s; 100 101 tmp = *pcc; 102 103 /* 104 * block interrupts so that we don't lose their modifications. 105 */ 106 107 s = splhigh(); 108 109 /* 110 * copy data to new storage. 111 */ 112 113 memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size); 114 115 /* 116 * this assignment needs to be atomic for percpu_getptr_remote. 117 */ 118 119 pcc->pcc_data = newpcc->pcc_data; 120 121 splx(s); 122 123 pcc->pcc_size = newpcc->pcc_size; 124 *newpcc = tmp; 125 } 126 rw_exit(&percpu_swap_lock); 127} 128 129/* 130 * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space 131 */ 132 133static void 134percpu_cpu_enlarge(size_t size) 135{ 136 CPU_INFO_ITERATOR cii; 137 struct cpu_info *ci; 138 139 for (CPU_INFO_FOREACH(cii, ci)) { 140 percpu_cpu_t pcc; 141 142 pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ 143 pcc.pcc_size = size; 144 if (!mp_online) { 145 percpu_cpu_swap(ci, &pcc); 146 } else { 147 uint64_t where; 148 149 where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); 150 xc_wait(where); 151 } 152 KASSERT(pcc.pcc_size < size); 153 if (pcc.pcc_data != NULL) { 154 kmem_free(pcc.pcc_data, pcc.pcc_size); 155 } 156 } 157} 158 159/* 160 * percpu_backend_alloc: vmem import callback for percpu_offset_arena 161 */ 162 163static vmem_addr_t 164percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize, 165 vm_flag_t vmflags) 166{ 167 unsigned int offset; 168 unsigned int nextoff; 169 170 ASSERT_SLEEPABLE(); 171 KASSERT(dummy == NULL); 172 173 if ((vmflags & VM_NOSLEEP) != 0) 174 return VMEM_ADDR_NULL; 175 176 size = roundup(size, PERCPU_IMPORT_SIZE); 177 mutex_enter(&percpu_allocation_lock); 178 offset = percpu_nextoff; 179 percpu_nextoff = nextoff = percpu_nextoff + size; 180 mutex_exit(&percpu_allocation_lock); 181 182 percpu_cpu_enlarge(nextoff); 183 184 *resultsize = size; 185 return (vmem_addr_t)offset; 186} 187 188static void 189percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci) 190{ 191 size_t sz = (uintptr_t)vp2; 192 193 memset(vp, 0, sz); 194} 195 196/* 197 * percpu_zero: initialize percpu storage with zero. 198 */ 199 200static void 201percpu_zero(percpu_t *pc, size_t sz) 202{ 203 204 percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz); 205} 206 207/* 208 * percpu_init: subsystem initialization 209 */ 210 211void 212percpu_init(void) 213{ 214 215 ASSERT_SLEEPABLE(); 216 rw_init(&percpu_swap_lock); 217 mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE); 218 percpu_nextoff = PERCPU_QUANTUM_SIZE; 219 220 percpu_offset_arena = vmem_create("percpu", 0, 0, PERCPU_QUANTUM_SIZE, 221 percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP, 222 IPL_NONE); 223} 224 225/* 226 * percpu_init_cpu: cpu initialization 227 * 228 * => should be called before the cpu appears on the list for CPU_INFO_FOREACH. 229 */ 230 231void 232percpu_init_cpu(struct cpu_info *ci) 233{ 234 percpu_cpu_t * const pcc = cpu_percpu(ci); 235 size_t size = percpu_nextoff; /* XXX racy */ 236 237 ASSERT_SLEEPABLE(); 238 pcc->pcc_size = size; 239 if (size) { 240 pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP); 241 } 242} 243 244/* 245 * percpu_alloc: allocate percpu storage 246 * 247 * => called in thread context. 248 * => considered as an expensive and rare operation. 249 * => allocated storage is initialized with zeros. 250 */ 251 252percpu_t * 253percpu_alloc(size_t size) 254{ 255 unsigned int offset; 256 percpu_t *pc; 257 258 ASSERT_SLEEPABLE(); 259 offset = vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT); 260 pc = (percpu_t *)percpu_encrypt((uintptr_t)offset); 261 percpu_zero(pc, size); 262 return pc; 263} 264 265/* 266 * percpu_free: free percpu storage 267 * 268 * => called in thread context. 269 * => considered as an expensive and rare operation. 270 */ 271 272void 273percpu_free(percpu_t *pc, size_t size) 274{ 275 276 ASSERT_SLEEPABLE(); 277 vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size); 278} 279 280/* 281 * percpu_getref: 282 * 283 * => safe to be used in either thread or interrupt context 284 * => disables preemption; must be bracketed with a percpu_putref() 285 */ 286 287void * 288percpu_getref(percpu_t *pc) 289{ 290 291 KPREEMPT_DISABLE(curlwp); 292 return percpu_getptr_remote(pc, curcpu()); 293} 294 295/* 296 * percpu_putref: 297 * 298 * => drops the preemption-disabled count after caller is done with per-cpu 299 * data 300 */ 301 302void 303percpu_putref(percpu_t *pc) 304{ 305 306 KPREEMPT_ENABLE(curlwp); 307} 308 309/* 310 * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote: 311 * helpers to access remote cpu's percpu data. 312 * 313 * => called in thread context. 314 * => percpu_traverse_enter can block low-priority xcalls. 315 * => typical usage would be: 316 * 317 * sum = 0; 318 * percpu_traverse_enter(); 319 * for (CPU_INFO_FOREACH(cii, ci)) { 320 * unsigned int *p = percpu_getptr_remote(pc, ci); 321 * sum += *p; 322 * } 323 * percpu_traverse_exit(); 324 */ 325 326void 327percpu_traverse_enter(void) 328{ 329 330 ASSERT_SLEEPABLE(); 331 rw_enter(&percpu_swap_lock, RW_READER); 332} 333 334void 335percpu_traverse_exit(void) 336{ 337 338 rw_exit(&percpu_swap_lock); 339} 340 341void * 342percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci) 343{ 344 345 return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)]; 346} 347 348/* 349 * percpu_foreach: call the specified callback function for each cpus. 350 * 351 * => called in thread context. 352 * => caller should not rely on the cpu iteration order. 353 * => the callback function should be minimum because it is executed with 354 * holding a global lock, which can block low-priority xcalls. 355 * eg. it's illegal for a callback function to sleep for memory allocation. 356 */ 357void 358percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) 359{ 360 CPU_INFO_ITERATOR cii; 361 struct cpu_info *ci; 362 363 percpu_traverse_enter(); 364 for (CPU_INFO_FOREACH(cii, ci)) { 365 (*cb)(percpu_getptr_remote(pc, ci), arg, ci); 366 } 367 percpu_traverse_exit(); 368} 369