1/* 2 * Copyright (c) 2006-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Memory allocator with per-CPU caching, derived from the kmem magazine 31 * concept and implementation as described in the following paper: 32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf 33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights 34 * reserved. Use is subject to license terms. 35 * 36 * There are several major differences between this and the original kmem 37 * magazine: this derivative implementation allows for multiple objects to 38 * be allocated and freed from/to the object cache in one call; in addition, 39 * it provides for better flexibility where the user is allowed to define 40 * its own slab allocator (instead of the default zone allocator). Finally, 41 * no object construction/destruction takes place at the moment, although 42 * this could be added in future to improve efficiency. 43 */ 44 45#include <sys/param.h> 46#include <sys/types.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/queue.h> 50#include <sys/kernel.h> 51#include <sys/systm.h> 52 53#include <kern/debug.h> 54#include <kern/zalloc.h> 55#include <kern/cpu_number.h> 56#include <kern/locks.h> 57#include <kern/thread_call.h> 58 59#include <libkern/libkern.h> 60#include <libkern/OSAtomic.h> 61#include <libkern/OSDebug.h> 62 63#include <mach/vm_param.h> 64#include <machine/limits.h> 65#include <machine/machine_routines.h> 66 67#include <string.h> 68 69#include <sys/mcache.h> 70 71#define MCACHE_SIZE(n) \ 72 ((size_t)(&((mcache_t *)0)->mc_cpu[n])) 73 74/* Allocate extra in case we need to manually align the pointer */ 75#define MCACHE_ALLOC_SIZE \ 76 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE) 77 78#define MCACHE_CPU(c) \ 79 (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number()))) 80 81/* 82 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used 83 * to serialize accesses to the global list of caches in the system. 84 * They also record the thread currently running in the critical 85 * section, so that we can avoid recursive requests to reap the 86 * caches when memory runs low. 87 */ 88#define MCACHE_LIST_LOCK() { \ 89 lck_mtx_lock(mcache_llock); \ 90 mcache_llock_owner = current_thread(); \ 91} 92 93#define MCACHE_LIST_UNLOCK() { \ 94 mcache_llock_owner = NULL; \ 95 lck_mtx_unlock(mcache_llock); \ 96} 97 98#define MCACHE_LOCK(l) lck_mtx_lock(l) 99#define MCACHE_UNLOCK(l) lck_mtx_unlock(l) 100#define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l) 101 102static int ncpu; 103static unsigned int cache_line_size; 104static lck_mtx_t *mcache_llock; 105static struct thread *mcache_llock_owner; 106static lck_attr_t *mcache_llock_attr; 107static lck_grp_t *mcache_llock_grp; 108static lck_grp_attr_t *mcache_llock_grp_attr; 109static struct zone *mcache_zone; 110static const uint32_t mcache_reap_interval = 15; 111static const uint32_t mcache_reap_interval_leeway = 2; 112static UInt32 mcache_reaping; 113static int mcache_ready; 114static int mcache_updating; 115 116static int mcache_bkt_contention = 3; 117#if DEBUG 118static unsigned int mcache_flags = MCF_DEBUG; 119#else 120static unsigned int mcache_flags = 0; 121#endif 122 123int mca_trn_max = MCA_TRN_MAX; 124 125#define DUMP_MCA_BUF_SIZE 512 126static char *mca_dump_buf; 127 128static mcache_bkttype_t mcache_bkttype[] = { 129 { 1, 4096, 32768, NULL }, 130 { 3, 2048, 16384, NULL }, 131 { 7, 1024, 12288, NULL }, 132 { 15, 256, 8192, NULL }, 133 { 31, 64, 4096, NULL }, 134 { 47, 0, 2048, NULL }, 135 { 63, 0, 1024, NULL }, 136 { 95, 0, 512, NULL }, 137 { 143, 0, 256, NULL }, 138 { 165, 0, 0, NULL }, 139}; 140 141static mcache_t *mcache_create_common(const char *, size_t, size_t, 142 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t, 143 mcache_notifyfn_t, void *, u_int32_t, int, int); 144static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***, 145 unsigned int, int); 146static void mcache_slab_free(void *, mcache_obj_t *, boolean_t); 147static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t); 148static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int); 149static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *, 150 mcache_bkttype_t **); 151static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *); 152static void mcache_cache_bkt_enable(mcache_t *); 153static void mcache_bkt_purge(mcache_t *); 154static void mcache_bkt_destroy(mcache_t *, mcache_bkttype_t *, 155 mcache_bkt_t *, int); 156static void mcache_bkt_ws_update(mcache_t *); 157static void mcache_bkt_ws_reap(mcache_t *); 158static void mcache_dispatch(void (*)(void *), void *); 159static void mcache_cache_reap(mcache_t *); 160static void mcache_cache_update(mcache_t *); 161static void mcache_cache_bkt_resize(void *); 162static void mcache_cache_enable(void *); 163static void mcache_update(thread_call_param_t __unused, thread_call_param_t __unused); 164static void mcache_update_timeout(void *); 165static void mcache_applyall(void (*)(mcache_t *)); 166static void mcache_reap_start(void *); 167static void mcache_reap_done(void *); 168static void mcache_reap_timeout(thread_call_param_t __unused, thread_call_param_t); 169static void mcache_notify(mcache_t *, u_int32_t); 170static void mcache_purge(void *); 171 172static LIST_HEAD(, mcache) mcache_head; 173mcache_t *mcache_audit_cache; 174 175static thread_call_t mcache_reap_tcall; 176static thread_call_t mcache_update_tcall; 177 178/* 179 * Initialize the framework; this is currently called as part of BSD init. 180 */ 181__private_extern__ void 182mcache_init(void) 183{ 184 mcache_bkttype_t *btp; 185 unsigned int i; 186 char name[32]; 187 188 VERIFY(mca_trn_max >= 2); 189 190 ncpu = ml_get_max_cpus(); 191 (void) mcache_cache_line_size(); /* prime it */ 192 193 mcache_llock_grp_attr = lck_grp_attr_alloc_init(); 194 mcache_llock_grp = lck_grp_alloc_init("mcache.list", 195 mcache_llock_grp_attr); 196 mcache_llock_attr = lck_attr_alloc_init(); 197 mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr); 198 199 mcache_reap_tcall = thread_call_allocate(mcache_reap_timeout, NULL); 200 mcache_update_tcall = thread_call_allocate(mcache_update, NULL); 201 if (mcache_reap_tcall == NULL || mcache_update_tcall == NULL) 202 panic("mcache_init: thread_call_allocate failed"); 203 204 mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE, 205 PAGE_SIZE, "mcache"); 206 if (mcache_zone == NULL) 207 panic("mcache_init: failed to allocate mcache zone\n"); 208 zone_change(mcache_zone, Z_CALLERACCT, FALSE); 209 210 LIST_INIT(&mcache_head); 211 212 for (i = 0; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) { 213 btp = &mcache_bkttype[i]; 214 (void) snprintf(name, sizeof (name), "bkt_%d", 215 btp->bt_bktsize); 216 btp->bt_cache = mcache_create(name, 217 (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP); 218 } 219 220 PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof(mcache_flags)); 221 mcache_flags &= MCF_FLAGS_MASK; 222 223 mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t), 224 0, 0, MCR_SLEEP); 225 226 mcache_applyall(mcache_cache_bkt_enable); 227 mcache_ready = 1; 228 229 printf("mcache: %d CPU(s), %d bytes CPU cache line size\n", 230 ncpu, CPU_CACHE_LINE_SIZE); 231} 232 233/* 234 * Return the global mcache flags. 235 */ 236__private_extern__ unsigned int 237mcache_getflags(void) 238{ 239 return (mcache_flags); 240} 241 242/* 243 * Return the CPU cache line size. 244 */ 245__private_extern__ unsigned int 246mcache_cache_line_size(void) 247{ 248 if (cache_line_size == 0) { 249 ml_cpu_info_t cpu_info; 250 ml_cpu_get_info(&cpu_info); 251 cache_line_size = cpu_info.cache_line_size; 252 } 253 return (cache_line_size); 254} 255 256/* 257 * Create a cache using the zone allocator as the backend slab allocator. 258 * The caller may specify any alignment for the object; if it specifies 0 259 * the default alignment (MCACHE_ALIGN) will be used. 260 */ 261__private_extern__ mcache_t * 262mcache_create(const char *name, size_t bufsize, size_t align, 263 u_int32_t flags, int wait) 264{ 265 return (mcache_create_common(name, bufsize, align, mcache_slab_alloc, 266 mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1, 267 wait)); 268} 269 270/* 271 * Create a cache using a custom backend slab allocator. Since the caller 272 * is responsible for allocation, no alignment guarantee will be provided 273 * by this framework. 274 */ 275__private_extern__ mcache_t * 276mcache_create_ext(const char *name, size_t bufsize, 277 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 278 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, 279 u_int32_t flags, int wait) 280{ 281 return (mcache_create_common(name, bufsize, 0, allocfn, 282 freefn, auditfn, logfn, notifyfn, arg, flags, 0, wait)); 283} 284 285/* 286 * Common cache creation routine. 287 */ 288static mcache_t * 289mcache_create_common(const char *name, size_t bufsize, size_t align, 290 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 291 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, 292 u_int32_t flags, int need_zone, int wait) 293{ 294 mcache_bkttype_t *btp; 295 mcache_t *cp = NULL; 296 size_t chunksize; 297 void *buf, **pbuf; 298 int c; 299 char lck_name[64]; 300 301 /* If auditing is on and print buffer is NULL, allocate it now */ 302 if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) { 303 int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK; 304 MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP, 305 malloc_wait | M_ZERO); 306 if (mca_dump_buf == NULL) 307 return (NULL); 308 } 309 310 if (!(wait & MCR_NOSLEEP)) 311 buf = zalloc(mcache_zone); 312 else 313 buf = zalloc_noblock(mcache_zone); 314 315 if (buf == NULL) 316 goto fail; 317 318 bzero(buf, MCACHE_ALLOC_SIZE); 319 320 /* 321 * In case we didn't get a cache-aligned memory, round it up 322 * accordingly. This is needed in order to get the rest of 323 * structure members aligned properly. It also means that 324 * the memory span gets shifted due to the round up, but it 325 * is okay since we've allocated extra space for this. 326 */ 327 cp = (mcache_t *) 328 P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_LINE_SIZE); 329 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 330 *pbuf = buf; 331 332 /* 333 * Guaranteed alignment is valid only when we use the internal 334 * slab allocator (currently set to use the zone allocator). 335 */ 336 if (!need_zone) 337 align = 1; 338 else if (align == 0) 339 align = MCACHE_ALIGN; 340 341 if ((align & (align - 1)) != 0) 342 panic("mcache_create: bad alignment %lu", align); 343 344 cp->mc_align = align; 345 cp->mc_slab_alloc = allocfn; 346 cp->mc_slab_free = freefn; 347 cp->mc_slab_audit = auditfn; 348 cp->mc_slab_log = logfn; 349 cp->mc_slab_notify = notifyfn; 350 cp->mc_private = need_zone ? cp : arg; 351 cp->mc_bufsize = bufsize; 352 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags; 353 354 (void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name); 355 356 (void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name); 357 cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init(); 358 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, 359 cp->mc_cpu_lock_grp_attr); 360 cp->mc_cpu_lock_attr = lck_attr_alloc_init(); 361 362 /* 363 * Allocation chunk size is the object's size plus any extra size 364 * needed to satisfy the object's alignment. It is enforced to be 365 * at least the size of an LP64 pointer to simplify auditing and to 366 * handle multiple-element allocation requests, where the elements 367 * returned are linked together in a list. 368 */ 369 chunksize = MAX(bufsize, sizeof (u_int64_t)); 370 if (need_zone) { 371 /* Enforce 64-bit minimum alignment for zone-based buffers */ 372 align = MAX(align, sizeof (u_int64_t)); 373 chunksize += sizeof (void *) + align; 374 chunksize = P2ROUNDUP(chunksize, align); 375 if ((cp->mc_slab_zone = zinit(chunksize, 64 * 1024 * ncpu, 376 PAGE_SIZE, cp->mc_name)) == NULL) 377 goto fail; 378 zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE); 379 } 380 cp->mc_chunksize = chunksize; 381 382 /* 383 * Initialize the bucket layer. 384 */ 385 (void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name); 386 cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init(); 387 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name, 388 cp->mc_bkt_lock_grp_attr); 389 cp->mc_bkt_lock_attr = lck_attr_alloc_init(); 390 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, 391 cp->mc_bkt_lock_attr); 392 393 (void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name); 394 cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init(); 395 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name, 396 cp->mc_sync_lock_grp_attr); 397 cp->mc_sync_lock_attr = lck_attr_alloc_init(); 398 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, 399 cp->mc_sync_lock_attr); 400 401 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) 402 continue; 403 404 cp->cache_bkttype = btp; 405 406 /* 407 * Initialize the CPU layer. Each per-CPU structure is aligned 408 * on the CPU cache line boundary to prevent false sharing. 409 */ 410 for (c = 0; c < ncpu; c++) { 411 mcache_cpu_t *ccp = &cp->mc_cpu[c]; 412 413 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE)); 414 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, 415 cp->mc_cpu_lock_attr); 416 ccp->cc_objs = -1; 417 ccp->cc_pobjs = -1; 418 } 419 420 if (mcache_ready) 421 mcache_cache_bkt_enable(cp); 422 423 /* TODO: dynamically create sysctl for stats */ 424 425 MCACHE_LIST_LOCK(); 426 LIST_INSERT_HEAD(&mcache_head, cp, mc_list); 427 MCACHE_LIST_UNLOCK(); 428 429 /* 430 * If cache buckets are enabled and this is the first cache 431 * created, start the periodic cache update. 432 */ 433 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) { 434 mcache_updating = 1; 435 mcache_update_timeout(NULL); 436 } 437 if (cp->mc_flags & MCF_DEBUG) { 438 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu " 439 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e", 440 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize); 441 } 442 return (cp); 443 444fail: 445 if (buf != NULL) 446 zfree(mcache_zone, buf); 447 return (NULL); 448} 449 450/* 451 * Allocate one or more objects from a cache. 452 */ 453__private_extern__ unsigned int 454mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait) 455{ 456 mcache_cpu_t *ccp; 457 mcache_obj_t **top = &(*list); 458 mcache_bkt_t *bkt; 459 unsigned int need = num; 460 boolean_t nwretry = FALSE; 461 462 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */ 463 VERIFY((wait & (MCR_NOSLEEP|MCR_FAILOK)) != (MCR_NOSLEEP|MCR_FAILOK)); 464 465 ASSERT(list != NULL); 466 *list = NULL; 467 468 if (num == 0) 469 return (0); 470 471retry_alloc: 472 /* We may not always be running in the same CPU in case of retries */ 473 ccp = MCACHE_CPU(cp); 474 475 MCACHE_LOCK(&ccp->cc_lock); 476 for (;;) { 477 /* 478 * If we have an object in the current CPU's filled bucket, 479 * chain the object to any previous objects and return if 480 * we've satisfied the number of requested objects. 481 */ 482 if (ccp->cc_objs > 0) { 483 mcache_obj_t *tail; 484 int objs; 485 486 /* 487 * Objects in the bucket are already linked together 488 * with the most recently freed object at the head of 489 * the list; grab as many objects as we can. 490 */ 491 objs = MIN((unsigned int)ccp->cc_objs, need); 492 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 493 ccp->cc_objs -= objs; 494 ccp->cc_alloc += objs; 495 496 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs]; 497 list = &tail->obj_next; 498 *list = NULL; 499 500 /* If we got them all, return to caller */ 501 if ((need -= objs) == 0) { 502 MCACHE_UNLOCK(&ccp->cc_lock); 503 504 if (!(cp->mc_flags & MCF_NOLEAKLOG) && 505 cp->mc_slab_log != NULL) 506 (*cp->mc_slab_log)(num, *top, TRUE); 507 508 if (cp->mc_flags & MCF_DEBUG) 509 goto debug_alloc; 510 511 return (num); 512 } 513 } 514 515 /* 516 * The CPU's filled bucket is empty. If the previous filled 517 * bucket was full, exchange and try again. 518 */ 519 if (ccp->cc_pobjs > 0) { 520 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 521 continue; 522 } 523 524 /* 525 * If the bucket layer is disabled, allocate from slab. This 526 * can happen either because MCF_NOCPUCACHE is set, or because 527 * the bucket layer is currently being resized. 528 */ 529 if (ccp->cc_bktsize == 0) 530 break; 531 532 /* 533 * Both of the CPU's buckets are empty; try to get a full 534 * bucket from the bucket layer. Upon success, refill this 535 * CPU and place any empty bucket into the empty list. 536 */ 537 bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL); 538 if (bkt != NULL) { 539 if (ccp->cc_pfilled != NULL) 540 mcache_bkt_free(cp, &cp->mc_empty, 541 ccp->cc_pfilled); 542 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize); 543 continue; 544 } 545 546 /* 547 * The bucket layer has no full buckets; allocate the 548 * object(s) directly from the slab layer. 549 */ 550 break; 551 } 552 MCACHE_UNLOCK(&ccp->cc_lock); 553 554 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait); 555 556 /* 557 * If this is a blocking allocation, or if it is non-blocking and 558 * the cache's full bucket is non-empty, then retry the allocation. 559 */ 560 if (need > 0) { 561 if (!(wait & MCR_NONBLOCKING)) { 562 atomic_add_32(&cp->mc_wretry_cnt, 1); 563 goto retry_alloc; 564 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) && 565 !mcache_bkt_isempty(cp)) { 566 if (!nwretry) 567 nwretry = TRUE; 568 atomic_add_32(&cp->mc_nwretry_cnt, 1); 569 goto retry_alloc; 570 } else if (nwretry) { 571 atomic_add_32(&cp->mc_nwfail_cnt, 1); 572 } 573 } 574 575 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) 576 (*cp->mc_slab_log)((num - need), *top, TRUE); 577 578 if (!(cp->mc_flags & MCF_DEBUG)) 579 return (num - need); 580 581debug_alloc: 582 if (cp->mc_flags & MCF_DEBUG) { 583 mcache_obj_t **o = top; 584 unsigned int n; 585 586 n = 0; 587 /* 588 * Verify that the chain of objects have the same count as 589 * what we are about to report to the caller. Any mismatch 590 * here means that the object list is insanely broken and 591 * therefore we must panic. 592 */ 593 while (*o != NULL) { 594 o = &(*o)->obj_next; 595 ++n; 596 } 597 if (n != (num - need)) { 598 panic("mcache_alloc_ext: %s cp %p corrupted list " 599 "(got %d actual %d)\n", cp->mc_name, 600 (void *)cp, num - need, n); 601 } 602 } 603 604 /* Invoke the slab layer audit callback if auditing is enabled */ 605 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) 606 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE); 607 608 return (num - need); 609} 610 611/* 612 * Allocate a single object from a cache. 613 */ 614__private_extern__ void * 615mcache_alloc(mcache_t *cp, int wait) 616{ 617 mcache_obj_t *buf; 618 619 (void) mcache_alloc_ext(cp, &buf, 1, wait); 620 return (buf); 621} 622 623__private_extern__ void 624mcache_waiter_inc(mcache_t *cp) 625{ 626 atomic_add_32(&cp->mc_waiter_cnt, 1); 627} 628 629__private_extern__ void 630mcache_waiter_dec(mcache_t *cp) 631{ 632 atomic_add_32(&cp->mc_waiter_cnt, -1); 633} 634 635__private_extern__ boolean_t 636mcache_bkt_isempty(mcache_t *cp) 637{ 638 /* 639 * This isn't meant to accurately tell whether there are 640 * any full buckets in the cache; it is simply a way to 641 * obtain "hints" about the state of the cache. 642 */ 643 return (cp->mc_full.bl_total == 0); 644} 645 646/* 647 * Notify the slab layer about an event. 648 */ 649static void 650mcache_notify(mcache_t *cp, u_int32_t event) 651{ 652 if (cp->mc_slab_notify != NULL) 653 (*cp->mc_slab_notify)(cp->mc_private, event); 654} 655 656/* 657 * Purge the cache and disable its buckets. 658 */ 659static void 660mcache_purge(void *arg) 661{ 662 mcache_t *cp = arg; 663 664 mcache_bkt_purge(cp); 665 /* 666 * We cannot simply call mcache_cache_bkt_enable() from here as 667 * a bucket resize may be in flight and we would cause the CPU 668 * layers of the cache to point to different sizes. Therefore, 669 * we simply increment the enable count so that during the next 670 * periodic cache update the buckets can be reenabled. 671 */ 672 lck_mtx_lock_spin(&cp->mc_sync_lock); 673 cp->mc_enable_cnt++; 674 lck_mtx_unlock(&cp->mc_sync_lock); 675} 676 677__private_extern__ boolean_t 678mcache_purge_cache(mcache_t *cp, boolean_t async) 679{ 680 /* 681 * Purging a cache that has no per-CPU caches or is already 682 * in the process of being purged is rather pointless. 683 */ 684 if (cp->mc_flags & MCF_NOCPUCACHE) 685 return (FALSE); 686 687 lck_mtx_lock_spin(&cp->mc_sync_lock); 688 if (cp->mc_purge_cnt > 0) { 689 lck_mtx_unlock(&cp->mc_sync_lock); 690 return (FALSE); 691 } 692 cp->mc_purge_cnt++; 693 lck_mtx_unlock(&cp->mc_sync_lock); 694 695 if (async) 696 mcache_dispatch(mcache_purge, cp); 697 else 698 mcache_purge(cp); 699 700 return (TRUE); 701} 702 703/* 704 * Free a single object to a cache. 705 */ 706__private_extern__ void 707mcache_free(mcache_t *cp, void *buf) 708{ 709 ((mcache_obj_t *)buf)->obj_next = NULL; 710 mcache_free_ext(cp, (mcache_obj_t *)buf); 711} 712 713/* 714 * Free one or more objects to a cache. 715 */ 716__private_extern__ void 717mcache_free_ext(mcache_t *cp, mcache_obj_t *list) 718{ 719 mcache_cpu_t *ccp = MCACHE_CPU(cp); 720 mcache_bkttype_t *btp; 721 mcache_obj_t *nlist; 722 mcache_bkt_t *bkt; 723 724 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) 725 (*cp->mc_slab_log)(0, list, FALSE); 726 727 /* Invoke the slab layer audit callback if auditing is enabled */ 728 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) 729 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE); 730 731 MCACHE_LOCK(&ccp->cc_lock); 732 for (;;) { 733 /* 734 * If there is space in the current CPU's filled bucket, put 735 * the object there and return once all objects are freed. 736 * Note the cast to unsigned integer takes care of the case 737 * where the bucket layer is disabled (when cc_objs is -1). 738 */ 739 if ((unsigned int)ccp->cc_objs < 740 (unsigned int)ccp->cc_bktsize) { 741 /* 742 * Reverse the list while we place the object into the 743 * bucket; this effectively causes the most recently 744 * freed object(s) to be reused during allocation. 745 */ 746 nlist = list->obj_next; 747 list->obj_next = (ccp->cc_objs == 0) ? NULL : 748 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 749 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list; 750 ccp->cc_free++; 751 752 if ((list = nlist) != NULL) 753 continue; 754 755 /* We are done; return to caller */ 756 MCACHE_UNLOCK(&ccp->cc_lock); 757 758 /* If there is a waiter below, notify it */ 759 if (cp->mc_waiter_cnt > 0) 760 mcache_notify(cp, MCN_RETRYALLOC); 761 return; 762 } 763 764 /* 765 * The CPU's filled bucket is full. If the previous filled 766 * bucket was empty, exchange and try again. 767 */ 768 if (ccp->cc_pobjs == 0) { 769 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 770 continue; 771 } 772 773 /* 774 * If the bucket layer is disabled, free to slab. This can 775 * happen either because MCF_NOCPUCACHE is set, or because 776 * the bucket layer is currently being resized. 777 */ 778 if (ccp->cc_bktsize == 0) 779 break; 780 781 /* 782 * Both of the CPU's buckets are full; try to get an empty 783 * bucket from the bucket layer. Upon success, empty this 784 * CPU and place any full bucket into the full list. 785 */ 786 bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp); 787 if (bkt != NULL) { 788 if (ccp->cc_pfilled != NULL) 789 mcache_bkt_free(cp, &cp->mc_full, 790 ccp->cc_pfilled); 791 mcache_cpu_refill(ccp, bkt, 0); 792 continue; 793 } 794 795 /* 796 * We need an empty bucket to put our freed objects into 797 * but couldn't get an empty bucket from the bucket layer; 798 * attempt to allocate one. We do not want to block for 799 * allocation here, and if the bucket allocation fails 800 * we will simply fall through to the slab layer. 801 */ 802 MCACHE_UNLOCK(&ccp->cc_lock); 803 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP); 804 MCACHE_LOCK(&ccp->cc_lock); 805 806 if (bkt != NULL) { 807 /* 808 * We have an empty bucket, but since we drop the 809 * CPU lock above, the cache's bucket size may have 810 * changed. If so, free the bucket and try again. 811 */ 812 if (ccp->cc_bktsize != btp->bt_bktsize) { 813 MCACHE_UNLOCK(&ccp->cc_lock); 814 mcache_free(btp->bt_cache, bkt); 815 MCACHE_LOCK(&ccp->cc_lock); 816 continue; 817 } 818 819 /* 820 * We have an empty bucket of the right size; 821 * add it to the bucket layer and try again. 822 */ 823 mcache_bkt_free(cp, &cp->mc_empty, bkt); 824 continue; 825 } 826 827 /* 828 * The bucket layer has no empty buckets; free the 829 * object(s) directly to the slab layer. 830 */ 831 break; 832 } 833 MCACHE_UNLOCK(&ccp->cc_lock); 834 835 /* If there is a waiter below, notify it */ 836 if (cp->mc_waiter_cnt > 0) 837 mcache_notify(cp, MCN_RETRYALLOC); 838 839 /* Advise the slab layer to purge the object(s) */ 840 (*cp->mc_slab_free)(cp->mc_private, list, 841 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 842} 843 844/* 845 * Cache destruction routine. 846 */ 847__private_extern__ void 848mcache_destroy(mcache_t *cp) 849{ 850 void **pbuf; 851 852 MCACHE_LIST_LOCK(); 853 LIST_REMOVE(cp, mc_list); 854 MCACHE_LIST_UNLOCK(); 855 856 mcache_bkt_purge(cp); 857 858 /* 859 * This cache is dead; there should be no further transaction. 860 * If it's still invoked, make sure that it induces a fault. 861 */ 862 cp->mc_slab_alloc = NULL; 863 cp->mc_slab_free = NULL; 864 cp->mc_slab_audit = NULL; 865 866 lck_attr_free(cp->mc_bkt_lock_attr); 867 lck_grp_free(cp->mc_bkt_lock_grp); 868 lck_grp_attr_free(cp->mc_bkt_lock_grp_attr); 869 870 lck_attr_free(cp->mc_cpu_lock_attr); 871 lck_grp_free(cp->mc_cpu_lock_grp); 872 lck_grp_attr_free(cp->mc_cpu_lock_grp_attr); 873 874 lck_attr_free(cp->mc_sync_lock_attr); 875 lck_grp_free(cp->mc_sync_lock_grp); 876 lck_grp_attr_free(cp->mc_sync_lock_grp_attr); 877 878 /* 879 * TODO: We need to destroy the zone here, but cannot do it 880 * because there is no such way to achieve that. Until then 881 * the memory allocated for the zone structure is leaked. 882 * Once it is achievable, uncomment these lines: 883 * 884 * if (cp->mc_slab_zone != NULL) { 885 * zdestroy(cp->mc_slab_zone); 886 * cp->mc_slab_zone = NULL; 887 * } 888 */ 889 890 /* Get the original address since we're about to free it */ 891 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 892 893 zfree(mcache_zone, *pbuf); 894} 895 896/* 897 * Internal slab allocator used as a backend for simple caches. The current 898 * implementation uses the zone allocator for simplicity reasons. 899 */ 900static unsigned int 901mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait) 902{ 903 mcache_t *cp = arg; 904 unsigned int need = num; 905 size_t offset = 0; 906 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 907 u_int32_t flags = cp->mc_flags; 908 void *buf, *base, **pbuf; 909 mcache_obj_t **list = *plist; 910 911 *list = NULL; 912 913 /* 914 * The address of the object returned to the caller is an 915 * offset from the 64-bit aligned base address only if the 916 * cache's alignment requirement is neither 1 nor 8 bytes. 917 */ 918 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 919 offset = cp->mc_align; 920 921 for (;;) { 922 if (!(wait & MCR_NOSLEEP)) 923 buf = zalloc(cp->mc_slab_zone); 924 else 925 buf = zalloc_noblock(cp->mc_slab_zone); 926 927 if (buf == NULL) 928 break; 929 930 /* Get the 64-bit aligned base address for this object */ 931 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 932 sizeof (u_int64_t)); 933 934 /* 935 * Wind back a pointer size from the aligned base and 936 * save the original address so we can free it later. 937 */ 938 pbuf = (void **)((intptr_t)base - sizeof (void *)); 939 *pbuf = buf; 940 941 /* 942 * If auditing is enabled, patternize the contents of 943 * the buffer starting from the 64-bit aligned base to 944 * the end of the buffer; the length is rounded up to 945 * the nearest 64-bit multiply; this is because we use 946 * 64-bit memory access to set/check the pattern. 947 */ 948 if (flags & MCF_DEBUG) { 949 VERIFY(((intptr_t)base + rsize) <= 950 ((intptr_t)buf + cp->mc_chunksize)); 951 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 952 } 953 954 /* 955 * Fix up the object's address to fulfill the cache's 956 * alignment requirement (if needed) and return this 957 * to the caller. 958 */ 959 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 960 ((intptr_t)buf + cp->mc_chunksize)); 961 *list = (mcache_obj_t *)((intptr_t)base + offset); 962 963 (*list)->obj_next = NULL; 964 list = *plist = &(*list)->obj_next; 965 966 /* If we got them all, return to mcache */ 967 if (--need == 0) 968 break; 969 } 970 971 return (num - need); 972} 973 974/* 975 * Internal slab deallocator used as a backend for simple caches. 976 */ 977static void 978mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged) 979{ 980 mcache_t *cp = arg; 981 mcache_obj_t *nlist; 982 size_t offset = 0; 983 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 984 u_int32_t flags = cp->mc_flags; 985 void *base; 986 void **pbuf; 987 988 /* 989 * The address of the object is an offset from a 64-bit 990 * aligned base address only if the cache's alignment 991 * requirement is neither 1 nor 8 bytes. 992 */ 993 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 994 offset = cp->mc_align; 995 996 for (;;) { 997 nlist = list->obj_next; 998 list->obj_next = NULL; 999 1000 /* Get the 64-bit aligned base address of this object */ 1001 base = (void *)((intptr_t)list - offset); 1002 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 1003 1004 /* Get the original address since we're about to free it */ 1005 pbuf = (void **)((intptr_t)base - sizeof (void *)); 1006 1007 if (flags & MCF_DEBUG) { 1008 VERIFY(((intptr_t)base + rsize) <= 1009 ((intptr_t)*pbuf + cp->mc_chunksize)); 1010 mcache_audit_free_verify(NULL, base, offset, rsize); 1011 } 1012 1013 /* Free it to zone */ 1014 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 1015 ((intptr_t)*pbuf + cp->mc_chunksize)); 1016 zfree(cp->mc_slab_zone, *pbuf); 1017 1018 /* No more objects to free; return to mcache */ 1019 if ((list = nlist) == NULL) 1020 break; 1021 } 1022} 1023 1024/* 1025 * Internal slab auditor for simple caches. 1026 */ 1027static void 1028mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) 1029{ 1030 mcache_t *cp = arg; 1031 size_t offset = 0; 1032 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 1033 void *base, **pbuf; 1034 1035 /* 1036 * The address of the object returned to the caller is an 1037 * offset from the 64-bit aligned base address only if the 1038 * cache's alignment requirement is neither 1 nor 8 bytes. 1039 */ 1040 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 1041 offset = cp->mc_align; 1042 1043 while (list != NULL) { 1044 mcache_obj_t *next = list->obj_next; 1045 1046 /* Get the 64-bit aligned base address of this object */ 1047 base = (void *)((intptr_t)list - offset); 1048 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 1049 1050 /* Get the original address */ 1051 pbuf = (void **)((intptr_t)base - sizeof (void *)); 1052 1053 VERIFY(((intptr_t)base + rsize) <= 1054 ((intptr_t)*pbuf + cp->mc_chunksize)); 1055 1056 if (!alloc) 1057 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 1058 else 1059 mcache_audit_free_verify_set(NULL, base, offset, rsize); 1060 1061 list = list->obj_next = next; 1062 } 1063} 1064 1065/* 1066 * Refill the CPU's filled bucket with bkt and save the previous one. 1067 */ 1068static void 1069mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs) 1070{ 1071 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) || 1072 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize)); 1073 ASSERT(ccp->cc_bktsize > 0); 1074 1075 ccp->cc_pfilled = ccp->cc_filled; 1076 ccp->cc_pobjs = ccp->cc_objs; 1077 ccp->cc_filled = bkt; 1078 ccp->cc_objs = objs; 1079} 1080 1081/* 1082 * Allocate a bucket from the bucket layer. 1083 */ 1084static mcache_bkt_t * 1085mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkttype_t **btp) 1086{ 1087 mcache_bkt_t *bkt; 1088 1089 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) { 1090 /* 1091 * The bucket layer lock is held by another CPU; increase 1092 * the contention count so that we can later resize the 1093 * bucket size accordingly. 1094 */ 1095 MCACHE_LOCK(&cp->mc_bkt_lock); 1096 cp->mc_bkt_contention++; 1097 } 1098 1099 if ((bkt = blp->bl_list) != NULL) { 1100 blp->bl_list = bkt->bkt_next; 1101 if (--blp->bl_total < blp->bl_min) 1102 blp->bl_min = blp->bl_total; 1103 blp->bl_alloc++; 1104 } 1105 1106 if (btp != NULL) 1107 *btp = cp->cache_bkttype; 1108 1109 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1110 1111 return (bkt); 1112} 1113 1114/* 1115 * Free a bucket to the bucket layer. 1116 */ 1117static void 1118mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt) 1119{ 1120 MCACHE_LOCK(&cp->mc_bkt_lock); 1121 1122 bkt->bkt_next = blp->bl_list; 1123 blp->bl_list = bkt; 1124 blp->bl_total++; 1125 1126 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1127} 1128 1129/* 1130 * Enable the bucket layer of a cache. 1131 */ 1132static void 1133mcache_cache_bkt_enable(mcache_t *cp) 1134{ 1135 mcache_cpu_t *ccp; 1136 int cpu; 1137 1138 if (cp->mc_flags & MCF_NOCPUCACHE) 1139 return; 1140 1141 for (cpu = 0; cpu < ncpu; cpu++) { 1142 ccp = &cp->mc_cpu[cpu]; 1143 MCACHE_LOCK(&ccp->cc_lock); 1144 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize; 1145 MCACHE_UNLOCK(&ccp->cc_lock); 1146 } 1147} 1148 1149/* 1150 * Purge all buckets from a cache and disable its bucket layer. 1151 */ 1152static void 1153mcache_bkt_purge(mcache_t *cp) 1154{ 1155 mcache_cpu_t *ccp; 1156 mcache_bkt_t *bp, *pbp; 1157 mcache_bkttype_t *btp; 1158 int cpu, objs, pobjs; 1159 1160 for (cpu = 0; cpu < ncpu; cpu++) { 1161 ccp = &cp->mc_cpu[cpu]; 1162 1163 MCACHE_LOCK(&ccp->cc_lock); 1164 1165 btp = cp->cache_bkttype; 1166 bp = ccp->cc_filled; 1167 pbp = ccp->cc_pfilled; 1168 objs = ccp->cc_objs; 1169 pobjs = ccp->cc_pobjs; 1170 ccp->cc_filled = NULL; 1171 ccp->cc_pfilled = NULL; 1172 ccp->cc_objs = -1; 1173 ccp->cc_pobjs = -1; 1174 ccp->cc_bktsize = 0; 1175 1176 MCACHE_UNLOCK(&ccp->cc_lock); 1177 1178 if (bp != NULL) 1179 mcache_bkt_destroy(cp, btp, bp, objs); 1180 if (pbp != NULL) 1181 mcache_bkt_destroy(cp, btp, pbp, pobjs); 1182 } 1183 1184 /* 1185 * Updating the working set back to back essentially sets 1186 * the working set size to zero, so everything is reapable. 1187 */ 1188 mcache_bkt_ws_update(cp); 1189 mcache_bkt_ws_update(cp); 1190 1191 mcache_bkt_ws_reap(cp); 1192} 1193 1194/* 1195 * Free one or more objects in the bucket to the slab layer, 1196 * and also free the bucket itself. 1197 */ 1198static void 1199mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt, 1200 int nobjs) 1201{ 1202 if (nobjs > 0) { 1203 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1]; 1204 1205 if (cp->mc_flags & MCF_DEBUG) { 1206 mcache_obj_t *o = top; 1207 int cnt = 0; 1208 1209 /* 1210 * Verify that the chain of objects in the bucket is 1211 * valid. Any mismatch here means a mistake when the 1212 * object(s) were freed to the CPU layer, so we panic. 1213 */ 1214 while (o != NULL) { 1215 o = o->obj_next; 1216 ++cnt; 1217 } 1218 if (cnt != nobjs) { 1219 panic("mcache_bkt_destroy: %s cp %p corrupted " 1220 "list in bkt %p (nobjs %d actual %d)\n", 1221 cp->mc_name, (void *)cp, (void *)bkt, 1222 nobjs, cnt); 1223 } 1224 } 1225 1226 /* Advise the slab layer to purge the object(s) */ 1227 (*cp->mc_slab_free)(cp->mc_private, top, 1228 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 1229 } 1230 mcache_free(btp->bt_cache, bkt); 1231} 1232 1233/* 1234 * Update the bucket layer working set statistics. 1235 */ 1236static void 1237mcache_bkt_ws_update(mcache_t *cp) 1238{ 1239 MCACHE_LOCK(&cp->mc_bkt_lock); 1240 1241 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min; 1242 cp->mc_full.bl_min = cp->mc_full.bl_total; 1243 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min; 1244 cp->mc_empty.bl_min = cp->mc_empty.bl_total; 1245 1246 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1247} 1248 1249/* 1250 * Reap all buckets that are beyond the working set. 1251 */ 1252static void 1253mcache_bkt_ws_reap(mcache_t *cp) 1254{ 1255 long reap; 1256 mcache_bkt_t *bkt; 1257 mcache_bkttype_t *btp; 1258 1259 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min); 1260 while (reap-- && 1261 (bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL) 1262 mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize); 1263 1264 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min); 1265 while (reap-- && 1266 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL) 1267 mcache_bkt_destroy(cp, btp, bkt, 0); 1268} 1269 1270static void 1271mcache_reap_timeout(thread_call_param_t dummy __unused, 1272 thread_call_param_t arg) 1273{ 1274 volatile UInt32 *flag = arg; 1275 1276 ASSERT(flag == &mcache_reaping); 1277 1278 *flag = 0; 1279} 1280 1281static void 1282mcache_reap_done(void *flag) 1283{ 1284 uint64_t deadline, leeway; 1285 1286 clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC, 1287 &deadline); 1288 clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway, 1289 NSEC_PER_SEC, &leeway); 1290 thread_call_enter_delayed_with_leeway(mcache_reap_tcall, flag, 1291 deadline, leeway, THREAD_CALL_DELAY_LEEWAY); 1292} 1293 1294static void 1295mcache_reap_start(void *arg) 1296{ 1297 UInt32 *flag = arg; 1298 1299 ASSERT(flag == &mcache_reaping); 1300 1301 mcache_applyall(mcache_cache_reap); 1302 mcache_dispatch(mcache_reap_done, flag); 1303} 1304 1305__private_extern__ void 1306mcache_reap(void) 1307{ 1308 UInt32 *flag = &mcache_reaping; 1309 1310 if (mcache_llock_owner == current_thread() || 1311 !OSCompareAndSwap(0, 1, flag)) 1312 return; 1313 1314 mcache_dispatch(mcache_reap_start, flag); 1315} 1316 1317static void 1318mcache_cache_reap(mcache_t *cp) 1319{ 1320 mcache_bkt_ws_reap(cp); 1321} 1322 1323/* 1324 * Performs period maintenance on a cache. 1325 */ 1326static void 1327mcache_cache_update(mcache_t *cp) 1328{ 1329 int need_bkt_resize = 0; 1330 int need_bkt_reenable = 0; 1331 1332 lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED); 1333 1334 mcache_bkt_ws_update(cp); 1335 1336 /* 1337 * Cache resize and post-purge reenable are mutually exclusive. 1338 * If the cache was previously purged, there is no point of 1339 * increasing the bucket size as there was an indication of 1340 * memory pressure on the system. 1341 */ 1342 lck_mtx_lock_spin(&cp->mc_sync_lock); 1343 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt) 1344 need_bkt_reenable = 1; 1345 lck_mtx_unlock(&cp->mc_sync_lock); 1346 1347 MCACHE_LOCK(&cp->mc_bkt_lock); 1348 /* 1349 * If the contention count is greater than the threshold, and if 1350 * we are not already at the maximum bucket size, increase it. 1351 * Otherwise, if this cache was previously purged by the user 1352 * then we simply reenable it. 1353 */ 1354 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf && 1355 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) > 1356 mcache_bkt_contention && !need_bkt_reenable) 1357 need_bkt_resize = 1; 1358 1359 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention; 1360 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1361 1362 if (need_bkt_resize) 1363 mcache_dispatch(mcache_cache_bkt_resize, cp); 1364 else if (need_bkt_reenable) 1365 mcache_dispatch(mcache_cache_enable, cp); 1366} 1367 1368/* 1369 * Recompute a cache's bucket size. This is an expensive operation 1370 * and should not be done frequently; larger buckets provide for a 1371 * higher transfer rate with the bucket while smaller buckets reduce 1372 * the memory consumption. 1373 */ 1374static void 1375mcache_cache_bkt_resize(void *arg) 1376{ 1377 mcache_t *cp = arg; 1378 mcache_bkttype_t *btp = cp->cache_bkttype; 1379 1380 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) { 1381 mcache_bkt_purge(cp); 1382 1383 /* 1384 * Upgrade to the next bucket type with larger bucket size; 1385 * temporarily set the previous contention snapshot to a 1386 * negative number to prevent unnecessary resize request. 1387 */ 1388 MCACHE_LOCK(&cp->mc_bkt_lock); 1389 cp->cache_bkttype = ++btp; 1390 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX; 1391 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1392 1393 mcache_cache_enable(cp); 1394 } 1395} 1396 1397/* 1398 * Reenable a previously disabled cache due to purge. 1399 */ 1400static void 1401mcache_cache_enable(void *arg) 1402{ 1403 mcache_t *cp = arg; 1404 1405 lck_mtx_lock_spin(&cp->mc_sync_lock); 1406 cp->mc_purge_cnt = 0; 1407 cp->mc_enable_cnt = 0; 1408 lck_mtx_unlock(&cp->mc_sync_lock); 1409 1410 mcache_cache_bkt_enable(cp); 1411} 1412 1413static void 1414mcache_update_timeout(__unused void *arg) 1415{ 1416 uint64_t deadline, leeway; 1417 1418 clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC, 1419 &deadline); 1420 clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway, 1421 NSEC_PER_SEC, &leeway); 1422 thread_call_enter_delayed_with_leeway(mcache_update_tcall, NULL, 1423 deadline, leeway, THREAD_CALL_DELAY_LEEWAY); 1424} 1425 1426static void 1427mcache_update(thread_call_param_t arg __unused, 1428 thread_call_param_t dummy __unused) 1429{ 1430 mcache_applyall(mcache_cache_update); 1431 mcache_update_timeout(NULL); 1432} 1433 1434static void 1435mcache_applyall(void (*func)(mcache_t *)) 1436{ 1437 mcache_t *cp; 1438 1439 MCACHE_LIST_LOCK(); 1440 LIST_FOREACH(cp, &mcache_head, mc_list) { 1441 func(cp); 1442 } 1443 MCACHE_LIST_UNLOCK(); 1444} 1445 1446static void 1447mcache_dispatch(void (*func)(void *), void *arg) 1448{ 1449 ASSERT(func != NULL); 1450 timeout(func, arg, hz/1000); 1451} 1452 1453__private_extern__ void 1454mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp, 1455 struct timeval *base_ts) 1456{ 1457 struct timeval now, base = { 0, 0 }; 1458 void *stack[MCACHE_STACK_DEPTH + 1]; 1459 struct mca_trn *transaction; 1460 1461 transaction = &mca->mca_trns[mca->mca_next_trn]; 1462 1463 mca->mca_addr = addr; 1464 mca->mca_cache = cp; 1465 1466 transaction->mca_thread = current_thread(); 1467 1468 bzero(stack, sizeof (stack)); 1469 transaction->mca_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; 1470 bcopy(&stack[1], transaction->mca_stack, 1471 sizeof (transaction->mca_stack)); 1472 1473 microuptime(&now); 1474 if (base_ts != NULL) 1475 base = *base_ts; 1476 /* tstamp is in ms relative to base_ts */ 1477 transaction->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000); 1478 if ((now.tv_sec - base.tv_sec) > 0) 1479 transaction->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000); 1480 1481 mca->mca_next_trn = 1482 (mca->mca_next_trn + 1) % mca_trn_max; 1483} 1484 1485__private_extern__ void 1486mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1487{ 1488 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1489 u_int64_t *buf = (u_int64_t *)buf_arg; 1490 1491 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1492 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1493 1494 while (buf < buf_end) 1495 *buf++ = pattern; 1496} 1497 1498__private_extern__ void * 1499mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1500{ 1501 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1502 u_int64_t *buf; 1503 1504 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1505 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1506 1507 for (buf = buf_arg; buf < buf_end; buf++) { 1508 if (*buf != pattern) 1509 return (buf); 1510 } 1511 return (NULL); 1512} 1513 1514__private_extern__ void * 1515mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg, 1516 size_t size) 1517{ 1518 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1519 u_int64_t *buf; 1520 1521 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1522 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1523 1524 for (buf = buf_arg; buf < buf_end; buf++) { 1525 if (*buf != old) { 1526 mcache_set_pattern(old, buf_arg, 1527 (uintptr_t)buf - (uintptr_t)buf_arg); 1528 return (buf); 1529 } 1530 *buf = new; 1531 } 1532 return (NULL); 1533} 1534 1535__private_extern__ void 1536mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset, 1537 size_t size) 1538{ 1539 void *addr; 1540 u_int64_t *oaddr64; 1541 mcache_obj_t *next; 1542 1543 addr = (void *)((uintptr_t)base + offset); 1544 next = ((mcache_obj_t *)addr)->obj_next; 1545 1546 /* For the "obj_next" pointer in the buffer */ 1547 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1548 *oaddr64 = MCACHE_FREE_PATTERN; 1549 1550 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN, 1551 (caddr_t)base, size)) != NULL) { 1552 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1553 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1554 /* NOTREACHED */ 1555 } 1556 ((mcache_obj_t *)addr)->obj_next = next; 1557} 1558 1559__private_extern__ void 1560mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset, 1561 size_t size) 1562{ 1563 void *addr; 1564 u_int64_t *oaddr64; 1565 mcache_obj_t *next; 1566 1567 addr = (void *)((uintptr_t)base + offset); 1568 next = ((mcache_obj_t *)addr)->obj_next; 1569 1570 /* For the "obj_next" pointer in the buffer */ 1571 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1572 *oaddr64 = MCACHE_FREE_PATTERN; 1573 1574 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN, 1575 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) { 1576 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1577 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1578 /* NOTREACHED */ 1579 } 1580 ((mcache_obj_t *)addr)->obj_next = next; 1581} 1582 1583#undef panic 1584 1585#define DUMP_TRN_FMT() \ 1586 "%s transaction thread %p saved PC stack (%d deep):\n" \ 1587 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" \ 1588 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1589 1590#define DUMP_TRN_FIELDS(s, x) \ 1591 s, \ 1592 mca->mca_trns[x].mca_thread, mca->mca_trns[x].mca_depth, \ 1593 mca->mca_trns[x].mca_stack[0], mca->mca_trns[x].mca_stack[1], \ 1594 mca->mca_trns[x].mca_stack[2], mca->mca_trns[x].mca_stack[3], \ 1595 mca->mca_trns[x].mca_stack[4], mca->mca_trns[x].mca_stack[5], \ 1596 mca->mca_trns[x].mca_stack[6], mca->mca_trns[x].mca_stack[7], \ 1597 mca->mca_trns[x].mca_stack[8], mca->mca_trns[x].mca_stack[9], \ 1598 mca->mca_trns[x].mca_stack[10], mca->mca_trns[x].mca_stack[11], \ 1599 mca->mca_trns[x].mca_stack[12], mca->mca_trns[x].mca_stack[13], \ 1600 mca->mca_trns[x].mca_stack[14], mca->mca_trns[x].mca_stack[15] 1601 1602#define MCA_TRN_LAST ((mca->mca_next_trn + mca_trn_max) % mca_trn_max) 1603#define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max) 1604 1605__private_extern__ char * 1606mcache_dump_mca(mcache_audit_t *mca) 1607{ 1608 if (mca_dump_buf == NULL) 1609 return (NULL); 1610 1611 snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE, 1612 "mca %p: addr %p, cache %p (%s) nxttrn %d\n" 1613 DUMP_TRN_FMT() 1614 DUMP_TRN_FMT(), 1615 1616 mca, mca->mca_addr, mca->mca_cache, 1617 mca->mca_cache ? mca->mca_cache->mc_name : "?", 1618 mca->mca_next_trn, 1619 1620 DUMP_TRN_FIELDS("last", MCA_TRN_LAST), 1621 DUMP_TRN_FIELDS("previous", MCA_TRN_PREV)); 1622 1623 return (mca_dump_buf); 1624} 1625 1626__private_extern__ void 1627mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset, 1628 int64_t expected, int64_t got) 1629{ 1630 if (mca == NULL) { 1631 panic("mcache_audit: buffer %p modified after free at " 1632 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr, 1633 offset, got, expected); 1634 /* NOTREACHED */ 1635 } 1636 1637 panic("mcache_audit: buffer %p modified after free at offset 0x%lx " 1638 "(0x%llx instead of 0x%llx)\n%s\n", 1639 addr, offset, got, expected, mcache_dump_mca(mca)); 1640 /* NOTREACHED */ 1641} 1642 1643__private_extern__ int 1644assfail(const char *a, const char *f, int l) 1645{ 1646 panic("assertion failed: %s, file: %s, line: %d", a, f, l); 1647 return (0); 1648} 1649