1/* 2 * Copyright (c) 2006-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Memory allocator with per-CPU caching, derived from the kmem magazine 31 * concept and implementation as described in the following paper: 32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf 33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights 34 * reserved. Use is subject to license terms. 35 * 36 * There are several major differences between this and the original kmem 37 * magazine: this derivative implementation allows for multiple objects to 38 * be allocated and freed from/to the object cache in one call; in addition, 39 * it provides for better flexibility where the user is allowed to define 40 * its own slab allocator (instead of the default zone allocator). Finally, 41 * no object construction/destruction takes place at the moment, although 42 * this could be added in future to improve efficiency. 43 */ 44 45#include <sys/param.h> 46#include <sys/types.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/queue.h> 50#include <sys/kernel.h> 51#include <sys/systm.h> 52 53#include <kern/debug.h> 54#include <kern/zalloc.h> 55#include <kern/cpu_number.h> 56#include <kern/locks.h> 57 58#include <libkern/libkern.h> 59#include <libkern/OSAtomic.h> 60#include <libkern/OSDebug.h> 61 62#include <mach/vm_param.h> 63#include <machine/limits.h> 64#include <machine/machine_routines.h> 65 66#include <string.h> 67 68#include <sys/mcache.h> 69 70#define MCACHE_SIZE(n) \ 71 ((size_t)(&((mcache_t *)0)->mc_cpu[n])) 72 73/* Allocate extra in case we need to manually align the pointer */ 74#define MCACHE_ALLOC_SIZE \ 75 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_SIZE) 76 77#define MCACHE_CPU(c) \ 78 (mcache_cpu_t *)((char *)(c) + MCACHE_SIZE(cpu_number())) 79 80/* 81 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used 82 * to serialize accesses to the global list of caches in the system. 83 * They also record the thread currently running in the critical 84 * section, so that we can avoid recursive requests to reap the 85 * caches when memory runs low. 86 */ 87#define MCACHE_LIST_LOCK() { \ 88 lck_mtx_lock(mcache_llock); \ 89 mcache_llock_owner = current_thread(); \ 90} 91 92#define MCACHE_LIST_UNLOCK() { \ 93 mcache_llock_owner = NULL; \ 94 lck_mtx_unlock(mcache_llock); \ 95} 96 97#define MCACHE_LOCK(l) lck_mtx_lock(l) 98#define MCACHE_UNLOCK(l) lck_mtx_unlock(l) 99#define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l) 100 101/* This should be in a header file */ 102#define atomic_add_32(a, n) ((void) OSAddAtomic(n, (volatile SInt32 *)a)) 103 104static int ncpu; 105static lck_mtx_t *mcache_llock; 106static struct thread *mcache_llock_owner; 107static lck_attr_t *mcache_llock_attr; 108static lck_grp_t *mcache_llock_grp; 109static lck_grp_attr_t *mcache_llock_grp_attr; 110static struct zone *mcache_zone; 111static unsigned int mcache_reap_interval; 112static UInt32 mcache_reaping; 113static int mcache_ready; 114static int mcache_updating; 115 116static int mcache_bkt_contention = 3; 117#if DEBUG 118static unsigned int mcache_flags = MCF_DEBUG; 119#else 120static unsigned int mcache_flags = 0; 121#endif 122 123#define DUMP_MCA_BUF_SIZE 512 124static char *mca_dump_buf; 125 126static mcache_bkttype_t mcache_bkttype[] = { 127 { 1, 4096, 32768, NULL }, 128 { 3, 2048, 16384, NULL }, 129 { 7, 1024, 12288, NULL }, 130 { 15, 256, 8192, NULL }, 131 { 31, 64, 4096, NULL }, 132 { 47, 0, 2048, NULL }, 133 { 63, 0, 1024, NULL }, 134 { 95, 0, 512, NULL }, 135 { 143, 0, 256, NULL }, 136 { 165, 0, 0, NULL }, 137}; 138 139static mcache_t *mcache_create_common(const char *, size_t, size_t, 140 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_notifyfn_t, 141 void *, u_int32_t, int, int); 142static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***, 143 unsigned int, int); 144static void mcache_slab_free(void *, mcache_obj_t *, boolean_t); 145static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t); 146static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int); 147static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *, 148 mcache_bkttype_t **); 149static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *); 150static void mcache_cache_bkt_enable(mcache_t *); 151static void mcache_bkt_purge(mcache_t *); 152static void mcache_bkt_destroy(mcache_t *, mcache_bkttype_t *, 153 mcache_bkt_t *, int); 154static void mcache_bkt_ws_update(mcache_t *); 155static void mcache_bkt_ws_reap(mcache_t *); 156static void mcache_dispatch(void (*)(void *), void *); 157static void mcache_cache_reap(mcache_t *); 158static void mcache_cache_update(mcache_t *); 159static void mcache_cache_bkt_resize(void *); 160static void mcache_cache_enable(void *); 161static void mcache_update(void *); 162static void mcache_update_timeout(void *); 163static void mcache_applyall(void (*)(mcache_t *)); 164static void mcache_reap_start(void *); 165static void mcache_reap_done(void *); 166static void mcache_reap_timeout(void *); 167static void mcache_notify(mcache_t *, u_int32_t); 168static void mcache_purge(void *); 169 170static LIST_HEAD(, mcache) mcache_head; 171mcache_t *mcache_audit_cache; 172 173/* 174 * Initialize the framework; this is currently called as part of BSD init. 175 */ 176__private_extern__ void 177mcache_init(void) 178{ 179 mcache_bkttype_t *btp; 180 unsigned int i; 181 char name[32]; 182 183 ncpu = ml_get_max_cpus(); 184 185 mcache_llock_grp_attr = lck_grp_attr_alloc_init(); 186 mcache_llock_grp = lck_grp_alloc_init("mcache.list", 187 mcache_llock_grp_attr); 188 mcache_llock_attr = lck_attr_alloc_init(); 189 mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr); 190 191 mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE, 192 PAGE_SIZE, "mcache"); 193 if (mcache_zone == NULL) 194 panic("mcache_init: failed to allocate mcache zone\n"); 195 196 LIST_INIT(&mcache_head); 197 198 for (i = 0; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) { 199 btp = &mcache_bkttype[i]; 200 (void) snprintf(name, sizeof (name), "bkt_%d", 201 btp->bt_bktsize); 202 btp->bt_cache = mcache_create(name, 203 (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP); 204 } 205 206 PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags)); 207 mcache_flags &= MCF_FLAGS_MASK; 208 209 mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t), 210 0, 0, MCR_SLEEP); 211 212 mcache_reap_interval = 15 * hz; 213 mcache_applyall(mcache_cache_bkt_enable); 214 mcache_ready = 1; 215} 216 217/* 218 * Return the global mcache flags. 219 */ 220__private_extern__ unsigned int 221mcache_getflags(void) 222{ 223 return (mcache_flags); 224} 225 226/* 227 * Create a cache using the zone allocator as the backend slab allocator. 228 * The caller may specify any alignment for the object; if it specifies 0 229 * the default alignment (MCACHE_ALIGN) will be used. 230 */ 231__private_extern__ mcache_t * 232mcache_create(const char *name, size_t bufsize, size_t align, 233 u_int32_t flags, int wait) 234{ 235 return (mcache_create_common(name, bufsize, align, mcache_slab_alloc, 236 mcache_slab_free, mcache_slab_audit, NULL, NULL, flags, 1, wait)); 237} 238 239/* 240 * Create a cache using a custom backend slab allocator. Since the caller 241 * is responsible for allocation, no alignment guarantee will be provided 242 * by this framework. 243 */ 244__private_extern__ mcache_t * 245mcache_create_ext(const char *name, size_t bufsize, 246 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 247 mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int wait) 248{ 249 return (mcache_create_common(name, bufsize, 0, allocfn, 250 freefn, auditfn, notifyfn, arg, flags, 0, wait)); 251} 252 253/* 254 * Common cache creation routine. 255 */ 256static mcache_t * 257mcache_create_common(const char *name, size_t bufsize, size_t align, 258 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 259 mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int need_zone, 260 int wait) 261{ 262 mcache_bkttype_t *btp; 263 mcache_t *cp = NULL; 264 size_t chunksize; 265 void *buf, **pbuf; 266 int c; 267 char lck_name[64]; 268 269 /* If auditing is on and print buffer is NULL, allocate it now */ 270 if ((flags & MCF_AUDIT) && mca_dump_buf == NULL) { 271 int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK; 272 MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP, 273 malloc_wait | M_ZERO); 274 if (mca_dump_buf == NULL) 275 return (NULL); 276 } 277 278 if (!(wait & MCR_NOSLEEP)) 279 buf = zalloc(mcache_zone); 280 else 281 buf = zalloc_noblock(mcache_zone); 282 283 if (buf == NULL) 284 goto fail; 285 286 bzero(buf, MCACHE_ALLOC_SIZE); 287 288 /* 289 * In case we didn't get a cache-aligned memory, round it up 290 * accordingly. This is needed in order to get the rest of 291 * structure members aligned properly. It also means that 292 * the memory span gets shifted due to the round up, but it 293 * is okay since we've allocated extra space for this. 294 */ 295 cp = (mcache_t *) 296 P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_SIZE); 297 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 298 *pbuf = buf; 299 300 /* 301 * Guaranteed alignment is valid only when we use the internal 302 * slab allocator (currently set to use the zone allocator). 303 */ 304 if (!need_zone) 305 align = 1; 306 else if (align == 0) 307 align = MCACHE_ALIGN; 308 309 if ((align & (align - 1)) != 0) 310 panic("mcache_create: bad alignment %lu", align); 311 312 cp->mc_align = align; 313 cp->mc_slab_alloc = allocfn; 314 cp->mc_slab_free = freefn; 315 cp->mc_slab_audit = auditfn; 316 cp->mc_slab_notify = notifyfn; 317 cp->mc_private = need_zone ? cp : arg; 318 cp->mc_bufsize = bufsize; 319 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags; 320 321 (void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name); 322 323 (void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name); 324 cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init(); 325 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, 326 cp->mc_cpu_lock_grp_attr); 327 cp->mc_cpu_lock_attr = lck_attr_alloc_init(); 328 329 /* 330 * Allocation chunk size is the object's size plus any extra size 331 * needed to satisfy the object's alignment. It is enforced to be 332 * at least the size of an LP64 pointer to simplify auditing and to 333 * handle multiple-element allocation requests, where the elements 334 * returned are linked together in a list. 335 */ 336 chunksize = MAX(bufsize, sizeof (u_int64_t)); 337 if (need_zone) { 338 /* Enforce 64-bit minimum alignment for zone-based buffers */ 339 align = MAX(align, sizeof (u_int64_t)); 340 chunksize += sizeof (void *) + align; 341 chunksize = P2ROUNDUP(chunksize, align); 342 if ((cp->mc_slab_zone = zinit(chunksize, 64 * 1024 * ncpu, 343 PAGE_SIZE, cp->mc_name)) == NULL) 344 goto fail; 345 zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE); 346 } 347 cp->mc_chunksize = chunksize; 348 349 /* 350 * Initialize the bucket layer. 351 */ 352 (void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name); 353 cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init(); 354 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name, 355 cp->mc_bkt_lock_grp_attr); 356 cp->mc_bkt_lock_attr = lck_attr_alloc_init(); 357 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, 358 cp->mc_bkt_lock_attr); 359 360 (void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name); 361 cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init(); 362 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name, 363 cp->mc_sync_lock_grp_attr); 364 cp->mc_sync_lock_attr = lck_attr_alloc_init(); 365 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, 366 cp->mc_sync_lock_attr); 367 368 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) 369 continue; 370 371 cp->cache_bkttype = btp; 372 373 /* 374 * Initialize the CPU layer. Each per-CPU structure is aligned 375 * on the CPU cache line boundary to prevent false sharing. 376 */ 377 for (c = 0; c < ncpu; c++) { 378 mcache_cpu_t *ccp = &cp->mc_cpu[c]; 379 380 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_SIZE)); 381 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, 382 cp->mc_cpu_lock_attr); 383 ccp->cc_objs = -1; 384 ccp->cc_pobjs = -1; 385 } 386 387 if (mcache_ready) 388 mcache_cache_bkt_enable(cp); 389 390 /* TODO: dynamically create sysctl for stats */ 391 392 MCACHE_LIST_LOCK(); 393 LIST_INSERT_HEAD(&mcache_head, cp, mc_list); 394 MCACHE_LIST_UNLOCK(); 395 396 /* 397 * If cache buckets are enabled and this is the first cache 398 * created, start the periodic cache update. 399 */ 400 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) { 401 mcache_updating = 1; 402 mcache_update_timeout(NULL); 403 } 404 if (cp->mc_flags & MCF_DEBUG) { 405 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu " 406 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e", 407 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize); 408 } 409 return (cp); 410 411fail: 412 if (buf != NULL) 413 zfree(mcache_zone, buf); 414 return (NULL); 415} 416 417/* 418 * Allocate one or more objects from a cache. 419 */ 420__private_extern__ unsigned int 421mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait) 422{ 423 mcache_cpu_t *ccp; 424 mcache_obj_t **top = &(*list); 425 mcache_bkt_t *bkt; 426 unsigned int need = num; 427 boolean_t nwretry = FALSE; 428 429 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */ 430 VERIFY((wait & (MCR_NOSLEEP|MCR_FAILOK)) != (MCR_NOSLEEP|MCR_FAILOK)); 431 432 ASSERT(list != NULL); 433 *list = NULL; 434 435 if (num == 0) 436 return (0); 437 438retry_alloc: 439 /* We may not always be running in the same CPU in case of retries */ 440 ccp = MCACHE_CPU(cp); 441 442 MCACHE_LOCK(&ccp->cc_lock); 443 for (;;) { 444 /* 445 * If we have an object in the current CPU's filled bucket, 446 * chain the object to any previous objects and return if 447 * we've satisfied the number of requested objects. 448 */ 449 if (ccp->cc_objs > 0) { 450 mcache_obj_t *tail; 451 int objs; 452 453 /* 454 * Objects in the bucket are already linked together 455 * with the most recently freed object at the head of 456 * the list; grab as many objects as we can. 457 */ 458 objs = MIN((unsigned int)ccp->cc_objs, need); 459 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 460 ccp->cc_objs -= objs; 461 ccp->cc_alloc += objs; 462 463 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs]; 464 list = &tail->obj_next; 465 *list = NULL; 466 467 /* If we got them all, return to caller */ 468 if ((need -= objs) == 0) { 469 MCACHE_UNLOCK(&ccp->cc_lock); 470 if (cp->mc_flags & MCF_DEBUG) 471 goto debug_alloc; 472 473 return (num); 474 } 475 } 476 477 /* 478 * The CPU's filled bucket is empty. If the previous filled 479 * bucket was full, exchange and try again. 480 */ 481 if (ccp->cc_pobjs > 0) { 482 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 483 continue; 484 } 485 486 /* 487 * If the bucket layer is disabled, allocate from slab. This 488 * can happen either because MCF_NOCPUCACHE is set, or because 489 * the bucket layer is currently being resized. 490 */ 491 if (ccp->cc_bktsize == 0) 492 break; 493 494 /* 495 * Both of the CPU's buckets are empty; try to get a full 496 * bucket from the bucket layer. Upon success, refill this 497 * CPU and place any empty bucket into the empty list. 498 */ 499 bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL); 500 if (bkt != NULL) { 501 if (ccp->cc_pfilled != NULL) 502 mcache_bkt_free(cp, &cp->mc_empty, 503 ccp->cc_pfilled); 504 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize); 505 continue; 506 } 507 508 /* 509 * The bucket layer has no full buckets; allocate the 510 * object(s) directly from the slab layer. 511 */ 512 break; 513 } 514 MCACHE_UNLOCK(&ccp->cc_lock); 515 516 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait); 517 518 /* 519 * If this is a blocking allocation, or if it is non-blocking and 520 * the cache's full bucket is non-empty, then retry the allocation. 521 */ 522 if (need > 0) { 523 if (!(wait & MCR_NONBLOCKING)) { 524 atomic_add_32(&cp->mc_wretry_cnt, 1); 525 goto retry_alloc; 526 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) && 527 !mcache_bkt_isempty(cp)) { 528 if (!nwretry) 529 nwretry = TRUE; 530 atomic_add_32(&cp->mc_nwretry_cnt, 1); 531 goto retry_alloc; 532 } else if (nwretry) { 533 atomic_add_32(&cp->mc_nwfail_cnt, 1); 534 } 535 } 536 537 if (!(cp->mc_flags & MCF_DEBUG)) 538 return (num - need); 539 540debug_alloc: 541 if (cp->mc_flags & MCF_VERIFY) { 542 mcache_obj_t **o = top; 543 unsigned int n; 544 545 n = 0; 546 /* 547 * Verify that the chain of objects have the same count as 548 * what we are about to report to the caller. Any mismatch 549 * here means that the object list is insanely broken and 550 * therefore we must panic. 551 */ 552 while (*o != NULL) { 553 o = &(*o)->obj_next; 554 ++n; 555 } 556 if (n != (num - need)) { 557 panic("mcache_alloc_ext: %s cp %p corrupted list " 558 "(got %d actual %d)\n", cp->mc_name, 559 (void *)cp, num - need, n); 560 } 561 } 562 563 /* Invoke the slab layer audit callback if auditing is enabled */ 564 if ((cp->mc_flags & MCF_AUDIT) && cp->mc_slab_audit != NULL) 565 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE); 566 567 return (num - need); 568} 569 570/* 571 * Allocate a single object from a cache. 572 */ 573__private_extern__ void * 574mcache_alloc(mcache_t *cp, int wait) 575{ 576 mcache_obj_t *buf; 577 578 (void) mcache_alloc_ext(cp, &buf, 1, wait); 579 return (buf); 580} 581 582__private_extern__ void 583mcache_waiter_inc(mcache_t *cp) 584{ 585 atomic_add_32(&cp->mc_waiter_cnt, 1); 586} 587 588__private_extern__ void 589mcache_waiter_dec(mcache_t *cp) 590{ 591 atomic_add_32(&cp->mc_waiter_cnt, -1); 592} 593 594__private_extern__ boolean_t 595mcache_bkt_isempty(mcache_t *cp) 596{ 597 /* 598 * This isn't meant to accurately tell whether there are 599 * any full buckets in the cache; it is simply a way to 600 * obtain "hints" about the state of the cache. 601 */ 602 return (cp->mc_full.bl_total == 0); 603} 604 605/* 606 * Notify the slab layer about an event. 607 */ 608static void 609mcache_notify(mcache_t *cp, u_int32_t event) 610{ 611 if (cp->mc_slab_notify != NULL) 612 (*cp->mc_slab_notify)(cp->mc_private, event); 613} 614 615/* 616 * Purge the cache and disable its buckets. 617 */ 618static void 619mcache_purge(void *arg) 620{ 621 mcache_t *cp = arg; 622 623 mcache_bkt_purge(cp); 624 /* 625 * We cannot simply call mcache_cache_bkt_enable() from here as 626 * a bucket resize may be in flight and we would cause the CPU 627 * layers of the cache to point to different sizes. Therefore, 628 * we simply increment the enable count so that during the next 629 * periodic cache update the buckets can be reenabled. 630 */ 631 lck_mtx_lock_spin(&cp->mc_sync_lock); 632 cp->mc_enable_cnt++; 633 lck_mtx_unlock(&cp->mc_sync_lock); 634 635} 636 637__private_extern__ boolean_t 638mcache_purge_cache(mcache_t *cp) 639{ 640 /* 641 * Purging a cache that has no per-CPU caches or is already 642 * in the process of being purged is rather pointless. 643 */ 644 if (cp->mc_flags & MCF_NOCPUCACHE) 645 return (FALSE); 646 647 lck_mtx_lock_spin(&cp->mc_sync_lock); 648 if (cp->mc_purge_cnt > 0) { 649 lck_mtx_unlock(&cp->mc_sync_lock); 650 return (FALSE); 651 } 652 cp->mc_purge_cnt++; 653 lck_mtx_unlock(&cp->mc_sync_lock); 654 655 mcache_dispatch(mcache_purge, cp); 656 657 return (TRUE); 658} 659 660/* 661 * Free a single object to a cache. 662 */ 663__private_extern__ void 664mcache_free(mcache_t *cp, void *buf) 665{ 666 ((mcache_obj_t *)buf)->obj_next = NULL; 667 mcache_free_ext(cp, (mcache_obj_t *)buf); 668} 669 670/* 671 * Free one or more objects to a cache. 672 */ 673__private_extern__ void 674mcache_free_ext(mcache_t *cp, mcache_obj_t *list) 675{ 676 mcache_cpu_t *ccp = MCACHE_CPU(cp); 677 mcache_bkttype_t *btp; 678 mcache_obj_t *nlist; 679 mcache_bkt_t *bkt; 680 681 /* Invoke the slab layer audit callback if auditing is enabled */ 682 if ((cp->mc_flags & MCF_AUDIT) && cp->mc_slab_audit != NULL) 683 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE); 684 685 MCACHE_LOCK(&ccp->cc_lock); 686 for (;;) { 687 /* 688 * If there is space in the current CPU's filled bucket, put 689 * the object there and return once all objects are freed. 690 * Note the cast to unsigned integer takes care of the case 691 * where the bucket layer is disabled (when cc_objs is -1). 692 */ 693 if ((unsigned int)ccp->cc_objs < 694 (unsigned int)ccp->cc_bktsize) { 695 /* 696 * Reverse the list while we place the object into the 697 * bucket; this effectively causes the most recently 698 * freed object(s) to be reused during allocation. 699 */ 700 nlist = list->obj_next; 701 list->obj_next = (ccp->cc_objs == 0) ? NULL : 702 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 703 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list; 704 ccp->cc_free++; 705 706 if ((list = nlist) != NULL) 707 continue; 708 709 /* We are done; return to caller */ 710 MCACHE_UNLOCK(&ccp->cc_lock); 711 712 /* If there is a waiter below, notify it */ 713 if (cp->mc_waiter_cnt > 0) 714 mcache_notify(cp, MCN_RETRYALLOC); 715 return; 716 } 717 718 /* 719 * The CPU's filled bucket is full. If the previous filled 720 * bucket was empty, exchange and try again. 721 */ 722 if (ccp->cc_pobjs == 0) { 723 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 724 continue; 725 } 726 727 /* 728 * If the bucket layer is disabled, free to slab. This can 729 * happen either because MCF_NOCPUCACHE is set, or because 730 * the bucket layer is currently being resized. 731 */ 732 if (ccp->cc_bktsize == 0) 733 break; 734 735 /* 736 * Both of the CPU's buckets are full; try to get an empty 737 * bucket from the bucket layer. Upon success, empty this 738 * CPU and place any full bucket into the full list. 739 */ 740 bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp); 741 if (bkt != NULL) { 742 if (ccp->cc_pfilled != NULL) 743 mcache_bkt_free(cp, &cp->mc_full, 744 ccp->cc_pfilled); 745 mcache_cpu_refill(ccp, bkt, 0); 746 continue; 747 } 748 749 /* 750 * We need an empty bucket to put our freed objects into 751 * but couldn't get an empty bucket from the bucket layer; 752 * attempt to allocate one. We do not want to block for 753 * allocation here, and if the bucket allocation fails 754 * we will simply fall through to the slab layer. 755 */ 756 MCACHE_UNLOCK(&ccp->cc_lock); 757 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP); 758 MCACHE_LOCK(&ccp->cc_lock); 759 760 if (bkt != NULL) { 761 /* 762 * We have an empty bucket, but since we drop the 763 * CPU lock above, the cache's bucket size may have 764 * changed. If so, free the bucket and try again. 765 */ 766 if (ccp->cc_bktsize != btp->bt_bktsize) { 767 MCACHE_UNLOCK(&ccp->cc_lock); 768 mcache_free(btp->bt_cache, bkt); 769 MCACHE_LOCK(&ccp->cc_lock); 770 continue; 771 } 772 773 /* 774 * We have an empty bucket of the right size; 775 * add it to the bucket layer and try again. 776 */ 777 mcache_bkt_free(cp, &cp->mc_empty, bkt); 778 continue; 779 } 780 781 /* 782 * The bucket layer has no empty buckets; free the 783 * object(s) directly to the slab layer. 784 */ 785 break; 786 } 787 MCACHE_UNLOCK(&ccp->cc_lock); 788 789 /* If there is a waiter below, notify it */ 790 if (cp->mc_waiter_cnt > 0) 791 mcache_notify(cp, MCN_RETRYALLOC); 792 793 /* Advise the slab layer to purge the object(s) */ 794 (*cp->mc_slab_free)(cp->mc_private, list, 795 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 796} 797 798/* 799 * Cache destruction routine. 800 */ 801__private_extern__ void 802mcache_destroy(mcache_t *cp) 803{ 804 void **pbuf; 805 806 MCACHE_LIST_LOCK(); 807 LIST_REMOVE(cp, mc_list); 808 MCACHE_LIST_UNLOCK(); 809 810 mcache_bkt_purge(cp); 811 812 /* 813 * This cache is dead; there should be no further transaction. 814 * If it's still invoked, make sure that it induces a fault. 815 */ 816 cp->mc_slab_alloc = NULL; 817 cp->mc_slab_free = NULL; 818 cp->mc_slab_audit = NULL; 819 820 lck_attr_free(cp->mc_bkt_lock_attr); 821 lck_grp_free(cp->mc_bkt_lock_grp); 822 lck_grp_attr_free(cp->mc_bkt_lock_grp_attr); 823 824 lck_attr_free(cp->mc_cpu_lock_attr); 825 lck_grp_free(cp->mc_cpu_lock_grp); 826 lck_grp_attr_free(cp->mc_cpu_lock_grp_attr); 827 828 lck_attr_free(cp->mc_sync_lock_attr); 829 lck_grp_free(cp->mc_sync_lock_grp); 830 lck_grp_attr_free(cp->mc_sync_lock_grp_attr); 831 832 /* 833 * TODO: We need to destroy the zone here, but cannot do it 834 * because there is no such way to achieve that. Until then 835 * the memory allocated for the zone structure is leaked. 836 * Once it is achievable, uncomment these lines: 837 * 838 * if (cp->mc_slab_zone != NULL) { 839 * zdestroy(cp->mc_slab_zone); 840 * cp->mc_slab_zone = NULL; 841 * } 842 */ 843 844 /* Get the original address since we're about to free it */ 845 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 846 847 zfree(mcache_zone, *pbuf); 848} 849 850/* 851 * Internal slab allocator used as a backend for simple caches. The current 852 * implementation uses the zone allocator for simplicity reasons. 853 */ 854static unsigned int 855mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait) 856{ 857 mcache_t *cp = arg; 858 unsigned int need = num; 859 size_t offset = 0; 860 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 861 u_int32_t flags = cp->mc_flags; 862 void *buf, *base, **pbuf; 863 mcache_obj_t **list = *plist; 864 865 *list = NULL; 866 867 /* 868 * The address of the object returned to the caller is an 869 * offset from the 64-bit aligned base address only if the 870 * cache's alignment requirement is neither 1 nor 8 bytes. 871 */ 872 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 873 offset = cp->mc_align; 874 875 for (;;) { 876 if (!(wait & MCR_NOSLEEP)) 877 buf = zalloc(cp->mc_slab_zone); 878 else 879 buf = zalloc_noblock(cp->mc_slab_zone); 880 881 if (buf == NULL) 882 break; 883 884 /* Get the 64-bit aligned base address for this object */ 885 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 886 sizeof (u_int64_t)); 887 888 /* 889 * Wind back a pointer size from the aligned base and 890 * save the original address so we can free it later. 891 */ 892 pbuf = (void **)((intptr_t)base - sizeof (void *)); 893 *pbuf = buf; 894 895 /* 896 * If auditing is enabled, patternize the contents of 897 * the buffer starting from the 64-bit aligned base to 898 * the end of the buffer; the length is rounded up to 899 * the nearest 64-bit multiply; this is because we use 900 * 64-bit memory access to set/check the pattern. 901 */ 902 if (flags & MCF_AUDIT) { 903 VERIFY(((intptr_t)base + rsize) <= 904 ((intptr_t)buf + cp->mc_chunksize)); 905 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 906 } 907 908 /* 909 * Fix up the object's address to fulfill the cache's 910 * alignment requirement (if needed) and return this 911 * to the caller. 912 */ 913 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 914 ((intptr_t)buf + cp->mc_chunksize)); 915 *list = (mcache_obj_t *)((intptr_t)base + offset); 916 917 (*list)->obj_next = NULL; 918 list = *plist = &(*list)->obj_next; 919 920 /* If we got them all, return to mcache */ 921 if (--need == 0) 922 break; 923 } 924 925 return (num - need); 926} 927 928/* 929 * Internal slab deallocator used as a backend for simple caches. 930 */ 931static void 932mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged) 933{ 934 mcache_t *cp = arg; 935 mcache_obj_t *nlist; 936 size_t offset = 0; 937 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 938 u_int32_t flags = cp->mc_flags; 939 void *base; 940 void **pbuf; 941 942 /* 943 * The address of the object is an offset from a 64-bit 944 * aligned base address only if the cache's alignment 945 * requirement is neither 1 nor 8 bytes. 946 */ 947 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 948 offset = cp->mc_align; 949 950 for (;;) { 951 nlist = list->obj_next; 952 list->obj_next = NULL; 953 954 /* Get the 64-bit aligned base address of this object */ 955 base = (void *)((intptr_t)list - offset); 956 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 957 958 /* Get the original address since we're about to free it */ 959 pbuf = (void **)((intptr_t)base - sizeof (void *)); 960 961 if (flags & MCF_AUDIT) { 962 VERIFY(((intptr_t)base + rsize) <= 963 ((intptr_t)*pbuf + cp->mc_chunksize)); 964 mcache_audit_free_verify(NULL, base, offset, rsize); 965 } 966 967 /* Free it to zone */ 968 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 969 ((intptr_t)*pbuf + cp->mc_chunksize)); 970 zfree(cp->mc_slab_zone, *pbuf); 971 972 /* No more objects to free; return to mcache */ 973 if ((list = nlist) == NULL) 974 break; 975 } 976} 977 978/* 979 * Internal slab auditor for simple caches. 980 */ 981static void 982mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) 983{ 984 mcache_t *cp = arg; 985 size_t offset = 0; 986 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 987 void *base, **pbuf; 988 989 /* 990 * The address of the object returned to the caller is an 991 * offset from the 64-bit aligned base address only if the 992 * cache's alignment requirement is neither 1 nor 8 bytes. 993 */ 994 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 995 offset = cp->mc_align; 996 997 while (list != NULL) { 998 mcache_obj_t *next = list->obj_next; 999 1000 /* Get the 64-bit aligned base address of this object */ 1001 base = (void *)((intptr_t)list - offset); 1002 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 1003 1004 /* Get the original address */ 1005 pbuf = (void **)((intptr_t)base - sizeof (void *)); 1006 1007 VERIFY(((intptr_t)base + rsize) <= 1008 ((intptr_t)*pbuf + cp->mc_chunksize)); 1009 1010 if (!alloc) 1011 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 1012 else 1013 mcache_audit_free_verify_set(NULL, base, offset, rsize); 1014 1015 list = list->obj_next = next; 1016 } 1017} 1018 1019/* 1020 * Refill the CPU's filled bucket with bkt and save the previous one. 1021 */ 1022static void 1023mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs) 1024{ 1025 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) || 1026 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize)); 1027 ASSERT(ccp->cc_bktsize > 0); 1028 1029 ccp->cc_pfilled = ccp->cc_filled; 1030 ccp->cc_pobjs = ccp->cc_objs; 1031 ccp->cc_filled = bkt; 1032 ccp->cc_objs = objs; 1033} 1034 1035/* 1036 * Allocate a bucket from the bucket layer. 1037 */ 1038static mcache_bkt_t * 1039mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkttype_t **btp) 1040{ 1041 mcache_bkt_t *bkt; 1042 1043 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) { 1044 /* 1045 * The bucket layer lock is held by another CPU; increase 1046 * the contention count so that we can later resize the 1047 * bucket size accordingly. 1048 */ 1049 MCACHE_LOCK(&cp->mc_bkt_lock); 1050 cp->mc_bkt_contention++; 1051 } 1052 1053 if ((bkt = blp->bl_list) != NULL) { 1054 blp->bl_list = bkt->bkt_next; 1055 if (--blp->bl_total < blp->bl_min) 1056 blp->bl_min = blp->bl_total; 1057 blp->bl_alloc++; 1058 } 1059 1060 if (btp != NULL) 1061 *btp = cp->cache_bkttype; 1062 1063 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1064 1065 return (bkt); 1066} 1067 1068/* 1069 * Free a bucket to the bucket layer. 1070 */ 1071static void 1072mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt) 1073{ 1074 MCACHE_LOCK(&cp->mc_bkt_lock); 1075 1076 bkt->bkt_next = blp->bl_list; 1077 blp->bl_list = bkt; 1078 blp->bl_total++; 1079 1080 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1081} 1082 1083/* 1084 * Enable the bucket layer of a cache. 1085 */ 1086static void 1087mcache_cache_bkt_enable(mcache_t *cp) 1088{ 1089 mcache_cpu_t *ccp; 1090 int cpu; 1091 1092 if (cp->mc_flags & MCF_NOCPUCACHE) 1093 return; 1094 1095 for (cpu = 0; cpu < ncpu; cpu++) { 1096 ccp = &cp->mc_cpu[cpu]; 1097 MCACHE_LOCK(&ccp->cc_lock); 1098 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize; 1099 MCACHE_UNLOCK(&ccp->cc_lock); 1100 } 1101} 1102 1103/* 1104 * Purge all buckets from a cache and disable its bucket layer. 1105 */ 1106static void 1107mcache_bkt_purge(mcache_t *cp) 1108{ 1109 mcache_cpu_t *ccp; 1110 mcache_bkt_t *bp, *pbp; 1111 mcache_bkttype_t *btp; 1112 int cpu, objs, pobjs; 1113 1114 for (cpu = 0; cpu < ncpu; cpu++) { 1115 ccp = &cp->mc_cpu[cpu]; 1116 1117 MCACHE_LOCK(&ccp->cc_lock); 1118 1119 btp = cp->cache_bkttype; 1120 bp = ccp->cc_filled; 1121 pbp = ccp->cc_pfilled; 1122 objs = ccp->cc_objs; 1123 pobjs = ccp->cc_pobjs; 1124 ccp->cc_filled = NULL; 1125 ccp->cc_pfilled = NULL; 1126 ccp->cc_objs = -1; 1127 ccp->cc_pobjs = -1; 1128 ccp->cc_bktsize = 0; 1129 1130 MCACHE_UNLOCK(&ccp->cc_lock); 1131 1132 if (bp != NULL) 1133 mcache_bkt_destroy(cp, btp, bp, objs); 1134 if (pbp != NULL) 1135 mcache_bkt_destroy(cp, btp, pbp, pobjs); 1136 } 1137 1138 /* 1139 * Updating the working set back to back essentially sets 1140 * the working set size to zero, so everything is reapable. 1141 */ 1142 mcache_bkt_ws_update(cp); 1143 mcache_bkt_ws_update(cp); 1144 1145 mcache_bkt_ws_reap(cp); 1146} 1147 1148/* 1149 * Free one or more objects in the bucket to the slab layer, 1150 * and also free the bucket itself. 1151 */ 1152static void 1153mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt, 1154 int nobjs) 1155{ 1156 if (nobjs > 0) { 1157 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1]; 1158 1159 if (cp->mc_flags & MCF_VERIFY) { 1160 mcache_obj_t *o = top; 1161 int cnt = 0; 1162 1163 /* 1164 * Verify that the chain of objects in the bucket is 1165 * valid. Any mismatch here means a mistake when the 1166 * object(s) were freed to the CPU layer, so we panic. 1167 */ 1168 while (o != NULL) { 1169 o = o->obj_next; 1170 ++cnt; 1171 } 1172 if (cnt != nobjs) { 1173 panic("mcache_bkt_destroy: %s cp %p corrupted " 1174 "list in bkt %p (nobjs %d actual %d)\n", 1175 cp->mc_name, (void *)cp, (void *)bkt, 1176 nobjs, cnt); 1177 } 1178 } 1179 1180 /* Advise the slab layer to purge the object(s) */ 1181 (*cp->mc_slab_free)(cp->mc_private, top, 1182 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 1183 } 1184 mcache_free(btp->bt_cache, bkt); 1185} 1186 1187/* 1188 * Update the bucket layer working set statistics. 1189 */ 1190static void 1191mcache_bkt_ws_update(mcache_t *cp) 1192{ 1193 MCACHE_LOCK(&cp->mc_bkt_lock); 1194 1195 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min; 1196 cp->mc_full.bl_min = cp->mc_full.bl_total; 1197 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min; 1198 cp->mc_empty.bl_min = cp->mc_empty.bl_total; 1199 1200 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1201} 1202 1203/* 1204 * Reap all buckets that are beyond the working set. 1205 */ 1206static void 1207mcache_bkt_ws_reap(mcache_t *cp) 1208{ 1209 long reap; 1210 mcache_bkt_t *bkt; 1211 mcache_bkttype_t *btp; 1212 1213 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min); 1214 while (reap-- && 1215 (bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL) 1216 mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize); 1217 1218 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min); 1219 while (reap-- && 1220 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL) 1221 mcache_bkt_destroy(cp, btp, bkt, 0); 1222} 1223 1224static void 1225mcache_reap_timeout(void *arg) 1226{ 1227 volatile UInt32 *flag = arg; 1228 1229 ASSERT(flag == &mcache_reaping); 1230 1231 *flag = 0; 1232} 1233 1234static void 1235mcache_reap_done(void *flag) 1236{ 1237 timeout(mcache_reap_timeout, flag, mcache_reap_interval); 1238} 1239 1240static void 1241mcache_reap_start(void *arg) 1242{ 1243 UInt32 *flag = arg; 1244 1245 ASSERT(flag == &mcache_reaping); 1246 1247 mcache_applyall(mcache_cache_reap); 1248 mcache_dispatch(mcache_reap_done, flag); 1249} 1250 1251__private_extern__ void 1252mcache_reap(void) 1253{ 1254 UInt32 *flag = &mcache_reaping; 1255 1256 if (mcache_llock_owner == current_thread() || 1257 !OSCompareAndSwap(0, 1, flag)) 1258 return; 1259 1260 mcache_dispatch(mcache_reap_start, flag); 1261} 1262 1263static void 1264mcache_cache_reap(mcache_t *cp) 1265{ 1266 mcache_bkt_ws_reap(cp); 1267} 1268 1269/* 1270 * Performs period maintenance on a cache. 1271 */ 1272static void 1273mcache_cache_update(mcache_t *cp) 1274{ 1275 int need_bkt_resize = 0; 1276 int need_bkt_reenable = 0; 1277 1278 lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED); 1279 1280 mcache_bkt_ws_update(cp); 1281 1282 /* 1283 * Cache resize and post-purge reenable are mutually exclusive. 1284 * If the cache was previously purged, there is no point of 1285 * increasing the bucket size as there was an indication of 1286 * memory pressure on the system. 1287 */ 1288 lck_mtx_lock_spin(&cp->mc_sync_lock); 1289 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt) 1290 need_bkt_reenable = 1; 1291 lck_mtx_unlock(&cp->mc_sync_lock); 1292 1293 MCACHE_LOCK(&cp->mc_bkt_lock); 1294 /* 1295 * If the contention count is greater than the threshold, and if 1296 * we are not already at the maximum bucket size, increase it. 1297 * Otherwise, if this cache was previously purged by the user 1298 * then we simply reenable it. 1299 */ 1300 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf && 1301 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) > 1302 mcache_bkt_contention && !need_bkt_reenable) 1303 need_bkt_resize = 1; 1304 1305 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention; 1306 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1307 1308 if (need_bkt_resize) 1309 mcache_dispatch(mcache_cache_bkt_resize, cp); 1310 else if (need_bkt_reenable) 1311 mcache_dispatch(mcache_cache_enable, cp); 1312} 1313 1314/* 1315 * Recompute a cache's bucket size. This is an expensive operation 1316 * and should not be done frequently; larger buckets provide for a 1317 * higher transfer rate with the bucket while smaller buckets reduce 1318 * the memory consumption. 1319 */ 1320static void 1321mcache_cache_bkt_resize(void *arg) 1322{ 1323 mcache_t *cp = arg; 1324 mcache_bkttype_t *btp = cp->cache_bkttype; 1325 1326 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) { 1327 mcache_bkt_purge(cp); 1328 1329 /* 1330 * Upgrade to the next bucket type with larger bucket size; 1331 * temporarily set the previous contention snapshot to a 1332 * negative number to prevent unnecessary resize request. 1333 */ 1334 MCACHE_LOCK(&cp->mc_bkt_lock); 1335 cp->cache_bkttype = ++btp; 1336 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX; 1337 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1338 1339 mcache_cache_enable(cp); 1340 } 1341} 1342 1343/* 1344 * Reenable a previously disabled cache due to purge. 1345 */ 1346static void 1347mcache_cache_enable(void *arg) 1348{ 1349 mcache_t *cp = arg; 1350 1351 lck_mtx_lock_spin(&cp->mc_sync_lock); 1352 cp->mc_purge_cnt = 0; 1353 cp->mc_enable_cnt = 0; 1354 lck_mtx_unlock(&cp->mc_sync_lock); 1355 1356 mcache_cache_bkt_enable(cp); 1357} 1358 1359static void 1360mcache_update_timeout(__unused void *arg) 1361{ 1362 timeout(mcache_update, NULL, mcache_reap_interval); 1363} 1364 1365static void 1366mcache_update(__unused void *arg) 1367{ 1368 mcache_applyall(mcache_cache_update); 1369 mcache_dispatch(mcache_update_timeout, NULL); 1370} 1371 1372static void 1373mcache_applyall(void (*func)(mcache_t *)) 1374{ 1375 mcache_t *cp; 1376 1377 MCACHE_LIST_LOCK(); 1378 LIST_FOREACH(cp, &mcache_head, mc_list) { 1379 func(cp); 1380 } 1381 MCACHE_LIST_UNLOCK(); 1382} 1383 1384static void 1385mcache_dispatch(void (*func)(void *), void *arg) 1386{ 1387 ASSERT(func != NULL); 1388 timeout(func, arg, hz/1000); 1389} 1390 1391__private_extern__ void 1392mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp) 1393{ 1394 mca->mca_addr = addr; 1395 mca->mca_cache = cp; 1396 mca->mca_pthread = mca->mca_thread; 1397 mca->mca_thread = current_thread(); 1398 bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack)); 1399 mca->mca_pdepth = mca->mca_depth; 1400 bzero(mca->mca_stack, sizeof (mca->mca_stack)); 1401 mca->mca_depth = OSBacktrace(mca->mca_stack, MCACHE_STACK_DEPTH); 1402} 1403 1404__private_extern__ void 1405mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1406{ 1407 u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size); 1408 u_int64_t *buf = (u_int64_t *)buf_arg; 1409 1410 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1411 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1412 1413 while (buf < buf_end) 1414 *buf++ = pattern; 1415} 1416 1417__private_extern__ void * 1418mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1419{ 1420 u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size); 1421 u_int64_t *buf; 1422 1423 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1424 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1425 1426 for (buf = buf_arg; buf < buf_end; buf++) { 1427 if (*buf != pattern) 1428 return (buf); 1429 } 1430 return (NULL); 1431} 1432 1433__private_extern__ void * 1434mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg, 1435 size_t size) 1436{ 1437 u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size); 1438 u_int64_t *buf; 1439 1440 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1441 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1442 1443 for (buf = buf_arg; buf < buf_end; buf++) { 1444 if (*buf != old) { 1445 mcache_set_pattern(old, buf_arg, 1446 (uintptr_t)buf - (uintptr_t)buf_arg); 1447 return (buf); 1448 } 1449 *buf = new; 1450 } 1451 return (NULL); 1452} 1453 1454__private_extern__ void 1455mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset, 1456 size_t size) 1457{ 1458 void *addr; 1459 u_int64_t *oaddr64; 1460 mcache_obj_t *next; 1461 1462 addr = (void *)((uintptr_t)base + offset); 1463 next = ((mcache_obj_t *)addr)->obj_next; 1464 1465 /* For the "obj_next" pointer in the buffer */ 1466 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1467 *oaddr64 = MCACHE_FREE_PATTERN; 1468 1469 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN, 1470 (caddr_t)base, size)) != NULL) { 1471 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1472 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1473 /* NOTREACHED */ 1474 } 1475 ((mcache_obj_t *)addr)->obj_next = next; 1476} 1477 1478__private_extern__ void 1479mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset, 1480 size_t size) 1481{ 1482 void *addr; 1483 u_int64_t *oaddr64; 1484 mcache_obj_t *next; 1485 1486 addr = (void *)((uintptr_t)base + offset); 1487 next = ((mcache_obj_t *)addr)->obj_next; 1488 1489 /* For the "obj_next" pointer in the buffer */ 1490 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1491 *oaddr64 = MCACHE_FREE_PATTERN; 1492 1493 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN, 1494 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) { 1495 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1496 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1497 /* NOTREACHED */ 1498 } 1499 ((mcache_obj_t *)addr)->obj_next = next; 1500} 1501 1502#undef panic(...) 1503 1504__private_extern__ char * 1505mcache_dump_mca(mcache_audit_t *mca) 1506{ 1507 if (mca_dump_buf == NULL) 1508 return (NULL); 1509 1510 snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE, 1511 "mca %p: addr %p, cache %p (%s)\n" 1512 "last transaction; thread %p, saved PC stack (%d deep):\n" 1513 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1514 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1515 "previous transaction; thread %p, saved PC stack (%d deep):\n" 1516 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1517 "\t%p, %p, %p, %p, %p, %p, %p, %p\n", 1518 mca, mca->mca_addr, mca->mca_cache, 1519 mca->mca_cache ? mca->mca_cache->mc_name : "?", 1520 mca->mca_thread, mca->mca_depth, 1521 mca->mca_stack[0], mca->mca_stack[1], mca->mca_stack[2], 1522 mca->mca_stack[3], mca->mca_stack[4], mca->mca_stack[5], 1523 mca->mca_stack[6], mca->mca_stack[7], mca->mca_stack[8], 1524 mca->mca_stack[9], mca->mca_stack[10], mca->mca_stack[11], 1525 mca->mca_stack[12], mca->mca_stack[13], mca->mca_stack[14], 1526 mca->mca_stack[15], 1527 mca->mca_pthread, mca->mca_pdepth, 1528 mca->mca_pstack[0], mca->mca_pstack[1], mca->mca_pstack[2], 1529 mca->mca_pstack[3], mca->mca_pstack[4], mca->mca_pstack[5], 1530 mca->mca_pstack[6], mca->mca_pstack[7], mca->mca_pstack[8], 1531 mca->mca_pstack[9], mca->mca_pstack[10], mca->mca_pstack[11], 1532 mca->mca_pstack[12], mca->mca_pstack[13], mca->mca_pstack[14], 1533 mca->mca_pstack[15]); 1534 1535 return (mca_dump_buf); 1536} 1537 1538__private_extern__ void 1539mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset, 1540 int64_t expected, int64_t got) 1541{ 1542 if (mca == NULL) { 1543 panic("mcache_audit: buffer %p modified after free at " 1544 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr, 1545 offset, got, expected); 1546 /* NOTREACHED */ 1547 } 1548 1549 panic("mcache_audit: buffer %p modified after free at offset 0x%lx " 1550 "(0x%llx instead of 0x%llx)\n%s\n", 1551 addr, offset, got, expected, mcache_dump_mca(mca)); 1552 /* NOTREACHED */ 1553} 1554 1555__private_extern__ int 1556assfail(const char *a, const char *f, int l) 1557{ 1558 panic("assertion failed: %s, file: %s, line: %d", a, f, l); 1559 return (0); 1560} 1561