1/* 2 * Copyright (c) 2006-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Memory allocator with per-CPU caching, derived from the kmem magazine 31 * concept and implementation as described in the following paper: 32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf 33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights 34 * reserved. Use is subject to license terms. 35 * 36 * There are several major differences between this and the original kmem 37 * magazine: this derivative implementation allows for multiple objects to 38 * be allocated and freed from/to the object cache in one call; in addition, 39 * it provides for better flexibility where the user is allowed to define 40 * its own slab allocator (instead of the default zone allocator). Finally, 41 * no object construction/destruction takes place at the moment, although 42 * this could be added in future to improve efficiency. 43 */ 44 45#include <sys/param.h> 46#include <sys/types.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/queue.h> 50#include <sys/kernel.h> 51#include <sys/systm.h> 52 53#include <kern/debug.h> 54#include <kern/zalloc.h> 55#include <kern/cpu_number.h> 56#include <kern/locks.h> 57 58#include <libkern/libkern.h> 59#include <libkern/OSAtomic.h> 60#include <libkern/OSDebug.h> 61 62#include <mach/vm_param.h> 63#include <machine/limits.h> 64#include <machine/machine_routines.h> 65 66#include <string.h> 67 68#include <sys/mcache.h> 69 70#define MCACHE_SIZE(n) \ 71 ((size_t)(&((mcache_t *)0)->mc_cpu[n])) 72 73/* Allocate extra in case we need to manually align the pointer */ 74#define MCACHE_ALLOC_SIZE \ 75 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE) 76 77#define MCACHE_CPU(c) \ 78 (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number()))) 79 80/* 81 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used 82 * to serialize accesses to the global list of caches in the system. 83 * They also record the thread currently running in the critical 84 * section, so that we can avoid recursive requests to reap the 85 * caches when memory runs low. 86 */ 87#define MCACHE_LIST_LOCK() { \ 88 lck_mtx_lock(mcache_llock); \ 89 mcache_llock_owner = current_thread(); \ 90} 91 92#define MCACHE_LIST_UNLOCK() { \ 93 mcache_llock_owner = NULL; \ 94 lck_mtx_unlock(mcache_llock); \ 95} 96 97#define MCACHE_LOCK(l) lck_mtx_lock(l) 98#define MCACHE_UNLOCK(l) lck_mtx_unlock(l) 99#define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l) 100 101static int ncpu; 102static unsigned int cache_line_size; 103static lck_mtx_t *mcache_llock; 104static struct thread *mcache_llock_owner; 105static lck_attr_t *mcache_llock_attr; 106static lck_grp_t *mcache_llock_grp; 107static lck_grp_attr_t *mcache_llock_grp_attr; 108static struct zone *mcache_zone; 109static unsigned int mcache_reap_interval; 110static UInt32 mcache_reaping; 111static int mcache_ready; 112static int mcache_updating; 113 114static int mcache_bkt_contention = 3; 115#if DEBUG 116static unsigned int mcache_flags = MCF_DEBUG; 117#else 118static unsigned int mcache_flags = 0; 119#endif 120 121#define DUMP_MCA_BUF_SIZE 512 122static char *mca_dump_buf; 123 124static mcache_bkttype_t mcache_bkttype[] = { 125 { 1, 4096, 32768, NULL }, 126 { 3, 2048, 16384, NULL }, 127 { 7, 1024, 12288, NULL }, 128 { 15, 256, 8192, NULL }, 129 { 31, 64, 4096, NULL }, 130 { 47, 0, 2048, NULL }, 131 { 63, 0, 1024, NULL }, 132 { 95, 0, 512, NULL }, 133 { 143, 0, 256, NULL }, 134 { 165, 0, 0, NULL }, 135}; 136 137static mcache_t *mcache_create_common(const char *, size_t, size_t, 138 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t, 139 mcache_notifyfn_t, void *, u_int32_t, int, int); 140static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***, 141 unsigned int, int); 142static void mcache_slab_free(void *, mcache_obj_t *, boolean_t); 143static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t); 144static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int); 145static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *, 146 mcache_bkttype_t **); 147static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *); 148static void mcache_cache_bkt_enable(mcache_t *); 149static void mcache_bkt_purge(mcache_t *); 150static void mcache_bkt_destroy(mcache_t *, mcache_bkttype_t *, 151 mcache_bkt_t *, int); 152static void mcache_bkt_ws_update(mcache_t *); 153static void mcache_bkt_ws_reap(mcache_t *); 154static void mcache_dispatch(void (*)(void *), void *); 155static void mcache_cache_reap(mcache_t *); 156static void mcache_cache_update(mcache_t *); 157static void mcache_cache_bkt_resize(void *); 158static void mcache_cache_enable(void *); 159static void mcache_update(void *); 160static void mcache_update_timeout(void *); 161static void mcache_applyall(void (*)(mcache_t *)); 162static void mcache_reap_start(void *); 163static void mcache_reap_done(void *); 164static void mcache_reap_timeout(void *); 165static void mcache_notify(mcache_t *, u_int32_t); 166static void mcache_purge(void *); 167 168static LIST_HEAD(, mcache) mcache_head; 169mcache_t *mcache_audit_cache; 170 171/* 172 * Initialize the framework; this is currently called as part of BSD init. 173 */ 174__private_extern__ void 175mcache_init(void) 176{ 177 mcache_bkttype_t *btp; 178 unsigned int i; 179 char name[32]; 180 181 ncpu = ml_get_max_cpus(); 182 (void) mcache_cache_line_size(); /* prime it */ 183 184 mcache_llock_grp_attr = lck_grp_attr_alloc_init(); 185 mcache_llock_grp = lck_grp_alloc_init("mcache.list", 186 mcache_llock_grp_attr); 187 mcache_llock_attr = lck_attr_alloc_init(); 188 mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr); 189 190 mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE, 191 PAGE_SIZE, "mcache"); 192 if (mcache_zone == NULL) 193 panic("mcache_init: failed to allocate mcache zone\n"); 194 zone_change(mcache_zone, Z_CALLERACCT, FALSE); 195 196 LIST_INIT(&mcache_head); 197 198 for (i = 0; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) { 199 btp = &mcache_bkttype[i]; 200 (void) snprintf(name, sizeof (name), "bkt_%d", 201 btp->bt_bktsize); 202 btp->bt_cache = mcache_create(name, 203 (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP); 204 } 205 206 PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags)); 207 mcache_flags &= MCF_FLAGS_MASK; 208 209 mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t), 210 0, 0, MCR_SLEEP); 211 212 mcache_reap_interval = 15 * hz; 213 mcache_applyall(mcache_cache_bkt_enable); 214 mcache_ready = 1; 215 216 printf("mcache: %d CPU(s), %d bytes CPU cache line size\n", 217 ncpu, CPU_CACHE_LINE_SIZE); 218} 219 220/* 221 * Return the global mcache flags. 222 */ 223__private_extern__ unsigned int 224mcache_getflags(void) 225{ 226 return (mcache_flags); 227} 228 229/* 230 * Return the CPU cache line size. 231 */ 232__private_extern__ unsigned int 233mcache_cache_line_size(void) 234{ 235 if (cache_line_size == 0) { 236 ml_cpu_info_t cpu_info; 237 ml_cpu_get_info(&cpu_info); 238 cache_line_size = cpu_info.cache_line_size; 239 } 240 return (cache_line_size); 241} 242 243/* 244 * Create a cache using the zone allocator as the backend slab allocator. 245 * The caller may specify any alignment for the object; if it specifies 0 246 * the default alignment (MCACHE_ALIGN) will be used. 247 */ 248__private_extern__ mcache_t * 249mcache_create(const char *name, size_t bufsize, size_t align, 250 u_int32_t flags, int wait) 251{ 252 return (mcache_create_common(name, bufsize, align, mcache_slab_alloc, 253 mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1, 254 wait)); 255} 256 257/* 258 * Create a cache using a custom backend slab allocator. Since the caller 259 * is responsible for allocation, no alignment guarantee will be provided 260 * by this framework. 261 */ 262__private_extern__ mcache_t * 263mcache_create_ext(const char *name, size_t bufsize, 264 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 265 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, 266 u_int32_t flags, int wait) 267{ 268 return (mcache_create_common(name, bufsize, 0, allocfn, 269 freefn, auditfn, logfn, notifyfn, arg, flags, 0, wait)); 270} 271 272/* 273 * Common cache creation routine. 274 */ 275static mcache_t * 276mcache_create_common(const char *name, size_t bufsize, size_t align, 277 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 278 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, 279 u_int32_t flags, int need_zone, int wait) 280{ 281 mcache_bkttype_t *btp; 282 mcache_t *cp = NULL; 283 size_t chunksize; 284 void *buf, **pbuf; 285 int c; 286 char lck_name[64]; 287 288 /* If auditing is on and print buffer is NULL, allocate it now */ 289 if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) { 290 int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK; 291 MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP, 292 malloc_wait | M_ZERO); 293 if (mca_dump_buf == NULL) 294 return (NULL); 295 } 296 297 if (!(wait & MCR_NOSLEEP)) 298 buf = zalloc(mcache_zone); 299 else 300 buf = zalloc_noblock(mcache_zone); 301 302 if (buf == NULL) 303 goto fail; 304 305 bzero(buf, MCACHE_ALLOC_SIZE); 306 307 /* 308 * In case we didn't get a cache-aligned memory, round it up 309 * accordingly. This is needed in order to get the rest of 310 * structure members aligned properly. It also means that 311 * the memory span gets shifted due to the round up, but it 312 * is okay since we've allocated extra space for this. 313 */ 314 cp = (mcache_t *) 315 P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_LINE_SIZE); 316 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 317 *pbuf = buf; 318 319 /* 320 * Guaranteed alignment is valid only when we use the internal 321 * slab allocator (currently set to use the zone allocator). 322 */ 323 if (!need_zone) 324 align = 1; 325 else if (align == 0) 326 align = MCACHE_ALIGN; 327 328 if ((align & (align - 1)) != 0) 329 panic("mcache_create: bad alignment %lu", align); 330 331 cp->mc_align = align; 332 cp->mc_slab_alloc = allocfn; 333 cp->mc_slab_free = freefn; 334 cp->mc_slab_audit = auditfn; 335 cp->mc_slab_log = logfn; 336 cp->mc_slab_notify = notifyfn; 337 cp->mc_private = need_zone ? cp : arg; 338 cp->mc_bufsize = bufsize; 339 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags; 340 341 (void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name); 342 343 (void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name); 344 cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init(); 345 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, 346 cp->mc_cpu_lock_grp_attr); 347 cp->mc_cpu_lock_attr = lck_attr_alloc_init(); 348 349 /* 350 * Allocation chunk size is the object's size plus any extra size 351 * needed to satisfy the object's alignment. It is enforced to be 352 * at least the size of an LP64 pointer to simplify auditing and to 353 * handle multiple-element allocation requests, where the elements 354 * returned are linked together in a list. 355 */ 356 chunksize = MAX(bufsize, sizeof (u_int64_t)); 357 if (need_zone) { 358 /* Enforce 64-bit minimum alignment for zone-based buffers */ 359 align = MAX(align, sizeof (u_int64_t)); 360 chunksize += sizeof (void *) + align; 361 chunksize = P2ROUNDUP(chunksize, align); 362 if ((cp->mc_slab_zone = zinit(chunksize, 64 * 1024 * ncpu, 363 PAGE_SIZE, cp->mc_name)) == NULL) 364 goto fail; 365 zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE); 366 } 367 cp->mc_chunksize = chunksize; 368 369 /* 370 * Initialize the bucket layer. 371 */ 372 (void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name); 373 cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init(); 374 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name, 375 cp->mc_bkt_lock_grp_attr); 376 cp->mc_bkt_lock_attr = lck_attr_alloc_init(); 377 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, 378 cp->mc_bkt_lock_attr); 379 380 (void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name); 381 cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init(); 382 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name, 383 cp->mc_sync_lock_grp_attr); 384 cp->mc_sync_lock_attr = lck_attr_alloc_init(); 385 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, 386 cp->mc_sync_lock_attr); 387 388 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) 389 continue; 390 391 cp->cache_bkttype = btp; 392 393 /* 394 * Initialize the CPU layer. Each per-CPU structure is aligned 395 * on the CPU cache line boundary to prevent false sharing. 396 */ 397 for (c = 0; c < ncpu; c++) { 398 mcache_cpu_t *ccp = &cp->mc_cpu[c]; 399 400 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE)); 401 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, 402 cp->mc_cpu_lock_attr); 403 ccp->cc_objs = -1; 404 ccp->cc_pobjs = -1; 405 } 406 407 if (mcache_ready) 408 mcache_cache_bkt_enable(cp); 409 410 /* TODO: dynamically create sysctl for stats */ 411 412 MCACHE_LIST_LOCK(); 413 LIST_INSERT_HEAD(&mcache_head, cp, mc_list); 414 MCACHE_LIST_UNLOCK(); 415 416 /* 417 * If cache buckets are enabled and this is the first cache 418 * created, start the periodic cache update. 419 */ 420 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) { 421 mcache_updating = 1; 422 mcache_update_timeout(NULL); 423 } 424 if (cp->mc_flags & MCF_DEBUG) { 425 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu " 426 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e", 427 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize); 428 } 429 return (cp); 430 431fail: 432 if (buf != NULL) 433 zfree(mcache_zone, buf); 434 return (NULL); 435} 436 437/* 438 * Allocate one or more objects from a cache. 439 */ 440__private_extern__ unsigned int 441mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait) 442{ 443 mcache_cpu_t *ccp; 444 mcache_obj_t **top = &(*list); 445 mcache_bkt_t *bkt; 446 unsigned int need = num; 447 boolean_t nwretry = FALSE; 448 449 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */ 450 VERIFY((wait & (MCR_NOSLEEP|MCR_FAILOK)) != (MCR_NOSLEEP|MCR_FAILOK)); 451 452 ASSERT(list != NULL); 453 *list = NULL; 454 455 if (num == 0) 456 return (0); 457 458retry_alloc: 459 /* We may not always be running in the same CPU in case of retries */ 460 ccp = MCACHE_CPU(cp); 461 462 MCACHE_LOCK(&ccp->cc_lock); 463 for (;;) { 464 /* 465 * If we have an object in the current CPU's filled bucket, 466 * chain the object to any previous objects and return if 467 * we've satisfied the number of requested objects. 468 */ 469 if (ccp->cc_objs > 0) { 470 mcache_obj_t *tail; 471 int objs; 472 473 /* 474 * Objects in the bucket are already linked together 475 * with the most recently freed object at the head of 476 * the list; grab as many objects as we can. 477 */ 478 objs = MIN((unsigned int)ccp->cc_objs, need); 479 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 480 ccp->cc_objs -= objs; 481 ccp->cc_alloc += objs; 482 483 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs]; 484 list = &tail->obj_next; 485 *list = NULL; 486 487 /* If we got them all, return to caller */ 488 if ((need -= objs) == 0) { 489 MCACHE_UNLOCK(&ccp->cc_lock); 490 491 if (!(cp->mc_flags & MCF_NOLEAKLOG) && 492 cp->mc_slab_log != NULL) 493 (*cp->mc_slab_log)(num, *top, TRUE); 494 495 if (cp->mc_flags & MCF_DEBUG) 496 goto debug_alloc; 497 498 return (num); 499 } 500 } 501 502 /* 503 * The CPU's filled bucket is empty. If the previous filled 504 * bucket was full, exchange and try again. 505 */ 506 if (ccp->cc_pobjs > 0) { 507 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 508 continue; 509 } 510 511 /* 512 * If the bucket layer is disabled, allocate from slab. This 513 * can happen either because MCF_NOCPUCACHE is set, or because 514 * the bucket layer is currently being resized. 515 */ 516 if (ccp->cc_bktsize == 0) 517 break; 518 519 /* 520 * Both of the CPU's buckets are empty; try to get a full 521 * bucket from the bucket layer. Upon success, refill this 522 * CPU and place any empty bucket into the empty list. 523 */ 524 bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL); 525 if (bkt != NULL) { 526 if (ccp->cc_pfilled != NULL) 527 mcache_bkt_free(cp, &cp->mc_empty, 528 ccp->cc_pfilled); 529 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize); 530 continue; 531 } 532 533 /* 534 * The bucket layer has no full buckets; allocate the 535 * object(s) directly from the slab layer. 536 */ 537 break; 538 } 539 MCACHE_UNLOCK(&ccp->cc_lock); 540 541 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait); 542 543 /* 544 * If this is a blocking allocation, or if it is non-blocking and 545 * the cache's full bucket is non-empty, then retry the allocation. 546 */ 547 if (need > 0) { 548 if (!(wait & MCR_NONBLOCKING)) { 549 atomic_add_32(&cp->mc_wretry_cnt, 1); 550 goto retry_alloc; 551 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) && 552 !mcache_bkt_isempty(cp)) { 553 if (!nwretry) 554 nwretry = TRUE; 555 atomic_add_32(&cp->mc_nwretry_cnt, 1); 556 goto retry_alloc; 557 } else if (nwretry) { 558 atomic_add_32(&cp->mc_nwfail_cnt, 1); 559 } 560 } 561 562 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) 563 (*cp->mc_slab_log)((num - need), *top, TRUE); 564 565 if (!(cp->mc_flags & MCF_DEBUG)) 566 return (num - need); 567 568debug_alloc: 569 if (cp->mc_flags & MCF_DEBUG) { 570 mcache_obj_t **o = top; 571 unsigned int n; 572 573 n = 0; 574 /* 575 * Verify that the chain of objects have the same count as 576 * what we are about to report to the caller. Any mismatch 577 * here means that the object list is insanely broken and 578 * therefore we must panic. 579 */ 580 while (*o != NULL) { 581 o = &(*o)->obj_next; 582 ++n; 583 } 584 if (n != (num - need)) { 585 panic("mcache_alloc_ext: %s cp %p corrupted list " 586 "(got %d actual %d)\n", cp->mc_name, 587 (void *)cp, num - need, n); 588 } 589 } 590 591 /* Invoke the slab layer audit callback if auditing is enabled */ 592 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) 593 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE); 594 595 return (num - need); 596} 597 598/* 599 * Allocate a single object from a cache. 600 */ 601__private_extern__ void * 602mcache_alloc(mcache_t *cp, int wait) 603{ 604 mcache_obj_t *buf; 605 606 (void) mcache_alloc_ext(cp, &buf, 1, wait); 607 return (buf); 608} 609 610__private_extern__ void 611mcache_waiter_inc(mcache_t *cp) 612{ 613 atomic_add_32(&cp->mc_waiter_cnt, 1); 614} 615 616__private_extern__ void 617mcache_waiter_dec(mcache_t *cp) 618{ 619 atomic_add_32(&cp->mc_waiter_cnt, -1); 620} 621 622__private_extern__ boolean_t 623mcache_bkt_isempty(mcache_t *cp) 624{ 625 /* 626 * This isn't meant to accurately tell whether there are 627 * any full buckets in the cache; it is simply a way to 628 * obtain "hints" about the state of the cache. 629 */ 630 return (cp->mc_full.bl_total == 0); 631} 632 633/* 634 * Notify the slab layer about an event. 635 */ 636static void 637mcache_notify(mcache_t *cp, u_int32_t event) 638{ 639 if (cp->mc_slab_notify != NULL) 640 (*cp->mc_slab_notify)(cp->mc_private, event); 641} 642 643/* 644 * Purge the cache and disable its buckets. 645 */ 646static void 647mcache_purge(void *arg) 648{ 649 mcache_t *cp = arg; 650 651 mcache_bkt_purge(cp); 652 /* 653 * We cannot simply call mcache_cache_bkt_enable() from here as 654 * a bucket resize may be in flight and we would cause the CPU 655 * layers of the cache to point to different sizes. Therefore, 656 * we simply increment the enable count so that during the next 657 * periodic cache update the buckets can be reenabled. 658 */ 659 lck_mtx_lock_spin(&cp->mc_sync_lock); 660 cp->mc_enable_cnt++; 661 lck_mtx_unlock(&cp->mc_sync_lock); 662 663} 664 665__private_extern__ boolean_t 666mcache_purge_cache(mcache_t *cp) 667{ 668 /* 669 * Purging a cache that has no per-CPU caches or is already 670 * in the process of being purged is rather pointless. 671 */ 672 if (cp->mc_flags & MCF_NOCPUCACHE) 673 return (FALSE); 674 675 lck_mtx_lock_spin(&cp->mc_sync_lock); 676 if (cp->mc_purge_cnt > 0) { 677 lck_mtx_unlock(&cp->mc_sync_lock); 678 return (FALSE); 679 } 680 cp->mc_purge_cnt++; 681 lck_mtx_unlock(&cp->mc_sync_lock); 682 683 mcache_dispatch(mcache_purge, cp); 684 685 return (TRUE); 686} 687 688/* 689 * Free a single object to a cache. 690 */ 691__private_extern__ void 692mcache_free(mcache_t *cp, void *buf) 693{ 694 ((mcache_obj_t *)buf)->obj_next = NULL; 695 mcache_free_ext(cp, (mcache_obj_t *)buf); 696} 697 698/* 699 * Free one or more objects to a cache. 700 */ 701__private_extern__ void 702mcache_free_ext(mcache_t *cp, mcache_obj_t *list) 703{ 704 mcache_cpu_t *ccp = MCACHE_CPU(cp); 705 mcache_bkttype_t *btp; 706 mcache_obj_t *nlist; 707 mcache_bkt_t *bkt; 708 709 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) 710 (*cp->mc_slab_log)(0, list, FALSE); 711 712 /* Invoke the slab layer audit callback if auditing is enabled */ 713 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) 714 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE); 715 716 MCACHE_LOCK(&ccp->cc_lock); 717 for (;;) { 718 /* 719 * If there is space in the current CPU's filled bucket, put 720 * the object there and return once all objects are freed. 721 * Note the cast to unsigned integer takes care of the case 722 * where the bucket layer is disabled (when cc_objs is -1). 723 */ 724 if ((unsigned int)ccp->cc_objs < 725 (unsigned int)ccp->cc_bktsize) { 726 /* 727 * Reverse the list while we place the object into the 728 * bucket; this effectively causes the most recently 729 * freed object(s) to be reused during allocation. 730 */ 731 nlist = list->obj_next; 732 list->obj_next = (ccp->cc_objs == 0) ? NULL : 733 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 734 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list; 735 ccp->cc_free++; 736 737 if ((list = nlist) != NULL) 738 continue; 739 740 /* We are done; return to caller */ 741 MCACHE_UNLOCK(&ccp->cc_lock); 742 743 /* If there is a waiter below, notify it */ 744 if (cp->mc_waiter_cnt > 0) 745 mcache_notify(cp, MCN_RETRYALLOC); 746 return; 747 } 748 749 /* 750 * The CPU's filled bucket is full. If the previous filled 751 * bucket was empty, exchange and try again. 752 */ 753 if (ccp->cc_pobjs == 0) { 754 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 755 continue; 756 } 757 758 /* 759 * If the bucket layer is disabled, free to slab. This can 760 * happen either because MCF_NOCPUCACHE is set, or because 761 * the bucket layer is currently being resized. 762 */ 763 if (ccp->cc_bktsize == 0) 764 break; 765 766 /* 767 * Both of the CPU's buckets are full; try to get an empty 768 * bucket from the bucket layer. Upon success, empty this 769 * CPU and place any full bucket into the full list. 770 */ 771 bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp); 772 if (bkt != NULL) { 773 if (ccp->cc_pfilled != NULL) 774 mcache_bkt_free(cp, &cp->mc_full, 775 ccp->cc_pfilled); 776 mcache_cpu_refill(ccp, bkt, 0); 777 continue; 778 } 779 780 /* 781 * We need an empty bucket to put our freed objects into 782 * but couldn't get an empty bucket from the bucket layer; 783 * attempt to allocate one. We do not want to block for 784 * allocation here, and if the bucket allocation fails 785 * we will simply fall through to the slab layer. 786 */ 787 MCACHE_UNLOCK(&ccp->cc_lock); 788 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP); 789 MCACHE_LOCK(&ccp->cc_lock); 790 791 if (bkt != NULL) { 792 /* 793 * We have an empty bucket, but since we drop the 794 * CPU lock above, the cache's bucket size may have 795 * changed. If so, free the bucket and try again. 796 */ 797 if (ccp->cc_bktsize != btp->bt_bktsize) { 798 MCACHE_UNLOCK(&ccp->cc_lock); 799 mcache_free(btp->bt_cache, bkt); 800 MCACHE_LOCK(&ccp->cc_lock); 801 continue; 802 } 803 804 /* 805 * We have an empty bucket of the right size; 806 * add it to the bucket layer and try again. 807 */ 808 mcache_bkt_free(cp, &cp->mc_empty, bkt); 809 continue; 810 } 811 812 /* 813 * The bucket layer has no empty buckets; free the 814 * object(s) directly to the slab layer. 815 */ 816 break; 817 } 818 MCACHE_UNLOCK(&ccp->cc_lock); 819 820 /* If there is a waiter below, notify it */ 821 if (cp->mc_waiter_cnt > 0) 822 mcache_notify(cp, MCN_RETRYALLOC); 823 824 /* Advise the slab layer to purge the object(s) */ 825 (*cp->mc_slab_free)(cp->mc_private, list, 826 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 827} 828 829/* 830 * Cache destruction routine. 831 */ 832__private_extern__ void 833mcache_destroy(mcache_t *cp) 834{ 835 void **pbuf; 836 837 MCACHE_LIST_LOCK(); 838 LIST_REMOVE(cp, mc_list); 839 MCACHE_LIST_UNLOCK(); 840 841 mcache_bkt_purge(cp); 842 843 /* 844 * This cache is dead; there should be no further transaction. 845 * If it's still invoked, make sure that it induces a fault. 846 */ 847 cp->mc_slab_alloc = NULL; 848 cp->mc_slab_free = NULL; 849 cp->mc_slab_audit = NULL; 850 851 lck_attr_free(cp->mc_bkt_lock_attr); 852 lck_grp_free(cp->mc_bkt_lock_grp); 853 lck_grp_attr_free(cp->mc_bkt_lock_grp_attr); 854 855 lck_attr_free(cp->mc_cpu_lock_attr); 856 lck_grp_free(cp->mc_cpu_lock_grp); 857 lck_grp_attr_free(cp->mc_cpu_lock_grp_attr); 858 859 lck_attr_free(cp->mc_sync_lock_attr); 860 lck_grp_free(cp->mc_sync_lock_grp); 861 lck_grp_attr_free(cp->mc_sync_lock_grp_attr); 862 863 /* 864 * TODO: We need to destroy the zone here, but cannot do it 865 * because there is no such way to achieve that. Until then 866 * the memory allocated for the zone structure is leaked. 867 * Once it is achievable, uncomment these lines: 868 * 869 * if (cp->mc_slab_zone != NULL) { 870 * zdestroy(cp->mc_slab_zone); 871 * cp->mc_slab_zone = NULL; 872 * } 873 */ 874 875 /* Get the original address since we're about to free it */ 876 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 877 878 zfree(mcache_zone, *pbuf); 879} 880 881/* 882 * Internal slab allocator used as a backend for simple caches. The current 883 * implementation uses the zone allocator for simplicity reasons. 884 */ 885static unsigned int 886mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait) 887{ 888 mcache_t *cp = arg; 889 unsigned int need = num; 890 size_t offset = 0; 891 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 892 u_int32_t flags = cp->mc_flags; 893 void *buf, *base, **pbuf; 894 mcache_obj_t **list = *plist; 895 896 *list = NULL; 897 898 /* 899 * The address of the object returned to the caller is an 900 * offset from the 64-bit aligned base address only if the 901 * cache's alignment requirement is neither 1 nor 8 bytes. 902 */ 903 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 904 offset = cp->mc_align; 905 906 for (;;) { 907 if (!(wait & MCR_NOSLEEP)) 908 buf = zalloc(cp->mc_slab_zone); 909 else 910 buf = zalloc_noblock(cp->mc_slab_zone); 911 912 if (buf == NULL) 913 break; 914 915 /* Get the 64-bit aligned base address for this object */ 916 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 917 sizeof (u_int64_t)); 918 919 /* 920 * Wind back a pointer size from the aligned base and 921 * save the original address so we can free it later. 922 */ 923 pbuf = (void **)((intptr_t)base - sizeof (void *)); 924 *pbuf = buf; 925 926 /* 927 * If auditing is enabled, patternize the contents of 928 * the buffer starting from the 64-bit aligned base to 929 * the end of the buffer; the length is rounded up to 930 * the nearest 64-bit multiply; this is because we use 931 * 64-bit memory access to set/check the pattern. 932 */ 933 if (flags & MCF_DEBUG) { 934 VERIFY(((intptr_t)base + rsize) <= 935 ((intptr_t)buf + cp->mc_chunksize)); 936 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 937 } 938 939 /* 940 * Fix up the object's address to fulfill the cache's 941 * alignment requirement (if needed) and return this 942 * to the caller. 943 */ 944 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 945 ((intptr_t)buf + cp->mc_chunksize)); 946 *list = (mcache_obj_t *)((intptr_t)base + offset); 947 948 (*list)->obj_next = NULL; 949 list = *plist = &(*list)->obj_next; 950 951 /* If we got them all, return to mcache */ 952 if (--need == 0) 953 break; 954 } 955 956 return (num - need); 957} 958 959/* 960 * Internal slab deallocator used as a backend for simple caches. 961 */ 962static void 963mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged) 964{ 965 mcache_t *cp = arg; 966 mcache_obj_t *nlist; 967 size_t offset = 0; 968 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 969 u_int32_t flags = cp->mc_flags; 970 void *base; 971 void **pbuf; 972 973 /* 974 * The address of the object is an offset from a 64-bit 975 * aligned base address only if the cache's alignment 976 * requirement is neither 1 nor 8 bytes. 977 */ 978 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 979 offset = cp->mc_align; 980 981 for (;;) { 982 nlist = list->obj_next; 983 list->obj_next = NULL; 984 985 /* Get the 64-bit aligned base address of this object */ 986 base = (void *)((intptr_t)list - offset); 987 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 988 989 /* Get the original address since we're about to free it */ 990 pbuf = (void **)((intptr_t)base - sizeof (void *)); 991 992 if (flags & MCF_DEBUG) { 993 VERIFY(((intptr_t)base + rsize) <= 994 ((intptr_t)*pbuf + cp->mc_chunksize)); 995 mcache_audit_free_verify(NULL, base, offset, rsize); 996 } 997 998 /* Free it to zone */ 999 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 1000 ((intptr_t)*pbuf + cp->mc_chunksize)); 1001 zfree(cp->mc_slab_zone, *pbuf); 1002 1003 /* No more objects to free; return to mcache */ 1004 if ((list = nlist) == NULL) 1005 break; 1006 } 1007} 1008 1009/* 1010 * Internal slab auditor for simple caches. 1011 */ 1012static void 1013mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) 1014{ 1015 mcache_t *cp = arg; 1016 size_t offset = 0; 1017 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 1018 void *base, **pbuf; 1019 1020 /* 1021 * The address of the object returned to the caller is an 1022 * offset from the 64-bit aligned base address only if the 1023 * cache's alignment requirement is neither 1 nor 8 bytes. 1024 */ 1025 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 1026 offset = cp->mc_align; 1027 1028 while (list != NULL) { 1029 mcache_obj_t *next = list->obj_next; 1030 1031 /* Get the 64-bit aligned base address of this object */ 1032 base = (void *)((intptr_t)list - offset); 1033 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 1034 1035 /* Get the original address */ 1036 pbuf = (void **)((intptr_t)base - sizeof (void *)); 1037 1038 VERIFY(((intptr_t)base + rsize) <= 1039 ((intptr_t)*pbuf + cp->mc_chunksize)); 1040 1041 if (!alloc) 1042 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 1043 else 1044 mcache_audit_free_verify_set(NULL, base, offset, rsize); 1045 1046 list = list->obj_next = next; 1047 } 1048} 1049 1050/* 1051 * Refill the CPU's filled bucket with bkt and save the previous one. 1052 */ 1053static void 1054mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs) 1055{ 1056 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) || 1057 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize)); 1058 ASSERT(ccp->cc_bktsize > 0); 1059 1060 ccp->cc_pfilled = ccp->cc_filled; 1061 ccp->cc_pobjs = ccp->cc_objs; 1062 ccp->cc_filled = bkt; 1063 ccp->cc_objs = objs; 1064} 1065 1066/* 1067 * Allocate a bucket from the bucket layer. 1068 */ 1069static mcache_bkt_t * 1070mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkttype_t **btp) 1071{ 1072 mcache_bkt_t *bkt; 1073 1074 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) { 1075 /* 1076 * The bucket layer lock is held by another CPU; increase 1077 * the contention count so that we can later resize the 1078 * bucket size accordingly. 1079 */ 1080 MCACHE_LOCK(&cp->mc_bkt_lock); 1081 cp->mc_bkt_contention++; 1082 } 1083 1084 if ((bkt = blp->bl_list) != NULL) { 1085 blp->bl_list = bkt->bkt_next; 1086 if (--blp->bl_total < blp->bl_min) 1087 blp->bl_min = blp->bl_total; 1088 blp->bl_alloc++; 1089 } 1090 1091 if (btp != NULL) 1092 *btp = cp->cache_bkttype; 1093 1094 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1095 1096 return (bkt); 1097} 1098 1099/* 1100 * Free a bucket to the bucket layer. 1101 */ 1102static void 1103mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt) 1104{ 1105 MCACHE_LOCK(&cp->mc_bkt_lock); 1106 1107 bkt->bkt_next = blp->bl_list; 1108 blp->bl_list = bkt; 1109 blp->bl_total++; 1110 1111 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1112} 1113 1114/* 1115 * Enable the bucket layer of a cache. 1116 */ 1117static void 1118mcache_cache_bkt_enable(mcache_t *cp) 1119{ 1120 mcache_cpu_t *ccp; 1121 int cpu; 1122 1123 if (cp->mc_flags & MCF_NOCPUCACHE) 1124 return; 1125 1126 for (cpu = 0; cpu < ncpu; cpu++) { 1127 ccp = &cp->mc_cpu[cpu]; 1128 MCACHE_LOCK(&ccp->cc_lock); 1129 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize; 1130 MCACHE_UNLOCK(&ccp->cc_lock); 1131 } 1132} 1133 1134/* 1135 * Purge all buckets from a cache and disable its bucket layer. 1136 */ 1137static void 1138mcache_bkt_purge(mcache_t *cp) 1139{ 1140 mcache_cpu_t *ccp; 1141 mcache_bkt_t *bp, *pbp; 1142 mcache_bkttype_t *btp; 1143 int cpu, objs, pobjs; 1144 1145 for (cpu = 0; cpu < ncpu; cpu++) { 1146 ccp = &cp->mc_cpu[cpu]; 1147 1148 MCACHE_LOCK(&ccp->cc_lock); 1149 1150 btp = cp->cache_bkttype; 1151 bp = ccp->cc_filled; 1152 pbp = ccp->cc_pfilled; 1153 objs = ccp->cc_objs; 1154 pobjs = ccp->cc_pobjs; 1155 ccp->cc_filled = NULL; 1156 ccp->cc_pfilled = NULL; 1157 ccp->cc_objs = -1; 1158 ccp->cc_pobjs = -1; 1159 ccp->cc_bktsize = 0; 1160 1161 MCACHE_UNLOCK(&ccp->cc_lock); 1162 1163 if (bp != NULL) 1164 mcache_bkt_destroy(cp, btp, bp, objs); 1165 if (pbp != NULL) 1166 mcache_bkt_destroy(cp, btp, pbp, pobjs); 1167 } 1168 1169 /* 1170 * Updating the working set back to back essentially sets 1171 * the working set size to zero, so everything is reapable. 1172 */ 1173 mcache_bkt_ws_update(cp); 1174 mcache_bkt_ws_update(cp); 1175 1176 mcache_bkt_ws_reap(cp); 1177} 1178 1179/* 1180 * Free one or more objects in the bucket to the slab layer, 1181 * and also free the bucket itself. 1182 */ 1183static void 1184mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt, 1185 int nobjs) 1186{ 1187 if (nobjs > 0) { 1188 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1]; 1189 1190 if (cp->mc_flags & MCF_DEBUG) { 1191 mcache_obj_t *o = top; 1192 int cnt = 0; 1193 1194 /* 1195 * Verify that the chain of objects in the bucket is 1196 * valid. Any mismatch here means a mistake when the 1197 * object(s) were freed to the CPU layer, so we panic. 1198 */ 1199 while (o != NULL) { 1200 o = o->obj_next; 1201 ++cnt; 1202 } 1203 if (cnt != nobjs) { 1204 panic("mcache_bkt_destroy: %s cp %p corrupted " 1205 "list in bkt %p (nobjs %d actual %d)\n", 1206 cp->mc_name, (void *)cp, (void *)bkt, 1207 nobjs, cnt); 1208 } 1209 } 1210 1211 /* Advise the slab layer to purge the object(s) */ 1212 (*cp->mc_slab_free)(cp->mc_private, top, 1213 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 1214 } 1215 mcache_free(btp->bt_cache, bkt); 1216} 1217 1218/* 1219 * Update the bucket layer working set statistics. 1220 */ 1221static void 1222mcache_bkt_ws_update(mcache_t *cp) 1223{ 1224 MCACHE_LOCK(&cp->mc_bkt_lock); 1225 1226 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min; 1227 cp->mc_full.bl_min = cp->mc_full.bl_total; 1228 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min; 1229 cp->mc_empty.bl_min = cp->mc_empty.bl_total; 1230 1231 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1232} 1233 1234/* 1235 * Reap all buckets that are beyond the working set. 1236 */ 1237static void 1238mcache_bkt_ws_reap(mcache_t *cp) 1239{ 1240 long reap; 1241 mcache_bkt_t *bkt; 1242 mcache_bkttype_t *btp; 1243 1244 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min); 1245 while (reap-- && 1246 (bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL) 1247 mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize); 1248 1249 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min); 1250 while (reap-- && 1251 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL) 1252 mcache_bkt_destroy(cp, btp, bkt, 0); 1253} 1254 1255static void 1256mcache_reap_timeout(void *arg) 1257{ 1258 volatile UInt32 *flag = arg; 1259 1260 ASSERT(flag == &mcache_reaping); 1261 1262 *flag = 0; 1263} 1264 1265static void 1266mcache_reap_done(void *flag) 1267{ 1268 timeout(mcache_reap_timeout, flag, mcache_reap_interval); 1269} 1270 1271static void 1272mcache_reap_start(void *arg) 1273{ 1274 UInt32 *flag = arg; 1275 1276 ASSERT(flag == &mcache_reaping); 1277 1278 mcache_applyall(mcache_cache_reap); 1279 mcache_dispatch(mcache_reap_done, flag); 1280} 1281 1282__private_extern__ void 1283mcache_reap(void) 1284{ 1285 UInt32 *flag = &mcache_reaping; 1286 1287 if (mcache_llock_owner == current_thread() || 1288 !OSCompareAndSwap(0, 1, flag)) 1289 return; 1290 1291 mcache_dispatch(mcache_reap_start, flag); 1292} 1293 1294static void 1295mcache_cache_reap(mcache_t *cp) 1296{ 1297 mcache_bkt_ws_reap(cp); 1298} 1299 1300/* 1301 * Performs period maintenance on a cache. 1302 */ 1303static void 1304mcache_cache_update(mcache_t *cp) 1305{ 1306 int need_bkt_resize = 0; 1307 int need_bkt_reenable = 0; 1308 1309 lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED); 1310 1311 mcache_bkt_ws_update(cp); 1312 1313 /* 1314 * Cache resize and post-purge reenable are mutually exclusive. 1315 * If the cache was previously purged, there is no point of 1316 * increasing the bucket size as there was an indication of 1317 * memory pressure on the system. 1318 */ 1319 lck_mtx_lock_spin(&cp->mc_sync_lock); 1320 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt) 1321 need_bkt_reenable = 1; 1322 lck_mtx_unlock(&cp->mc_sync_lock); 1323 1324 MCACHE_LOCK(&cp->mc_bkt_lock); 1325 /* 1326 * If the contention count is greater than the threshold, and if 1327 * we are not already at the maximum bucket size, increase it. 1328 * Otherwise, if this cache was previously purged by the user 1329 * then we simply reenable it. 1330 */ 1331 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf && 1332 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) > 1333 mcache_bkt_contention && !need_bkt_reenable) 1334 need_bkt_resize = 1; 1335 1336 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention; 1337 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1338 1339 if (need_bkt_resize) 1340 mcache_dispatch(mcache_cache_bkt_resize, cp); 1341 else if (need_bkt_reenable) 1342 mcache_dispatch(mcache_cache_enable, cp); 1343} 1344 1345/* 1346 * Recompute a cache's bucket size. This is an expensive operation 1347 * and should not be done frequently; larger buckets provide for a 1348 * higher transfer rate with the bucket while smaller buckets reduce 1349 * the memory consumption. 1350 */ 1351static void 1352mcache_cache_bkt_resize(void *arg) 1353{ 1354 mcache_t *cp = arg; 1355 mcache_bkttype_t *btp = cp->cache_bkttype; 1356 1357 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) { 1358 mcache_bkt_purge(cp); 1359 1360 /* 1361 * Upgrade to the next bucket type with larger bucket size; 1362 * temporarily set the previous contention snapshot to a 1363 * negative number to prevent unnecessary resize request. 1364 */ 1365 MCACHE_LOCK(&cp->mc_bkt_lock); 1366 cp->cache_bkttype = ++btp; 1367 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX; 1368 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1369 1370 mcache_cache_enable(cp); 1371 } 1372} 1373 1374/* 1375 * Reenable a previously disabled cache due to purge. 1376 */ 1377static void 1378mcache_cache_enable(void *arg) 1379{ 1380 mcache_t *cp = arg; 1381 1382 lck_mtx_lock_spin(&cp->mc_sync_lock); 1383 cp->mc_purge_cnt = 0; 1384 cp->mc_enable_cnt = 0; 1385 lck_mtx_unlock(&cp->mc_sync_lock); 1386 1387 mcache_cache_bkt_enable(cp); 1388} 1389 1390static void 1391mcache_update_timeout(__unused void *arg) 1392{ 1393 timeout(mcache_update, NULL, mcache_reap_interval); 1394} 1395 1396static void 1397mcache_update(__unused void *arg) 1398{ 1399 mcache_applyall(mcache_cache_update); 1400 mcache_dispatch(mcache_update_timeout, NULL); 1401} 1402 1403static void 1404mcache_applyall(void (*func)(mcache_t *)) 1405{ 1406 mcache_t *cp; 1407 1408 MCACHE_LIST_LOCK(); 1409 LIST_FOREACH(cp, &mcache_head, mc_list) { 1410 func(cp); 1411 } 1412 MCACHE_LIST_UNLOCK(); 1413} 1414 1415static void 1416mcache_dispatch(void (*func)(void *), void *arg) 1417{ 1418 ASSERT(func != NULL); 1419 timeout(func, arg, hz/1000); 1420} 1421 1422__private_extern__ void 1423mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp, 1424 struct timeval *base_ts) 1425{ 1426 struct timeval now, base = { 0, 0 }; 1427 void *stack[MCACHE_STACK_DEPTH + 1]; 1428 1429 mca->mca_addr = addr; 1430 mca->mca_cache = cp; 1431 mca->mca_pthread = mca->mca_thread; 1432 mca->mca_thread = current_thread(); 1433 bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack)); 1434 mca->mca_pdepth = mca->mca_depth; 1435 bzero(stack, sizeof (stack)); 1436 mca->mca_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; 1437 bcopy(&stack[1], mca->mca_stack, sizeof (mca->mca_pstack)); 1438 1439 mca->mca_ptstamp = mca->mca_tstamp; 1440 microuptime(&now); 1441 if (base_ts != NULL) 1442 base = *base_ts; 1443 /* tstamp is in ms relative to base_ts */ 1444 mca->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000); 1445 if ((now.tv_sec - base.tv_sec) > 0) 1446 mca->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000); 1447} 1448 1449__private_extern__ void 1450mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1451{ 1452 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1453 u_int64_t *buf = (u_int64_t *)buf_arg; 1454 1455 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1456 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1457 1458 while (buf < buf_end) 1459 *buf++ = pattern; 1460} 1461 1462__private_extern__ void * 1463mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1464{ 1465 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1466 u_int64_t *buf; 1467 1468 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1469 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1470 1471 for (buf = buf_arg; buf < buf_end; buf++) { 1472 if (*buf != pattern) 1473 return (buf); 1474 } 1475 return (NULL); 1476} 1477 1478__private_extern__ void * 1479mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg, 1480 size_t size) 1481{ 1482 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1483 u_int64_t *buf; 1484 1485 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1486 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1487 1488 for (buf = buf_arg; buf < buf_end; buf++) { 1489 if (*buf != old) { 1490 mcache_set_pattern(old, buf_arg, 1491 (uintptr_t)buf - (uintptr_t)buf_arg); 1492 return (buf); 1493 } 1494 *buf = new; 1495 } 1496 return (NULL); 1497} 1498 1499__private_extern__ void 1500mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset, 1501 size_t size) 1502{ 1503 void *addr; 1504 u_int64_t *oaddr64; 1505 mcache_obj_t *next; 1506 1507 addr = (void *)((uintptr_t)base + offset); 1508 next = ((mcache_obj_t *)addr)->obj_next; 1509 1510 /* For the "obj_next" pointer in the buffer */ 1511 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1512 *oaddr64 = MCACHE_FREE_PATTERN; 1513 1514 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN, 1515 (caddr_t)base, size)) != NULL) { 1516 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1517 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1518 /* NOTREACHED */ 1519 } 1520 ((mcache_obj_t *)addr)->obj_next = next; 1521} 1522 1523__private_extern__ void 1524mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset, 1525 size_t size) 1526{ 1527 void *addr; 1528 u_int64_t *oaddr64; 1529 mcache_obj_t *next; 1530 1531 addr = (void *)((uintptr_t)base + offset); 1532 next = ((mcache_obj_t *)addr)->obj_next; 1533 1534 /* For the "obj_next" pointer in the buffer */ 1535 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1536 *oaddr64 = MCACHE_FREE_PATTERN; 1537 1538 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN, 1539 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) { 1540 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1541 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1542 /* NOTREACHED */ 1543 } 1544 ((mcache_obj_t *)addr)->obj_next = next; 1545} 1546 1547#undef panic 1548 1549__private_extern__ char * 1550mcache_dump_mca(mcache_audit_t *mca) 1551{ 1552 if (mca_dump_buf == NULL) 1553 return (NULL); 1554 1555 snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE, 1556 "mca %p: addr %p, cache %p (%s)\n" 1557 "last transaction; thread %p, saved PC stack (%d deep):\n" 1558 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1559 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1560 "previous transaction; thread %p, saved PC stack (%d deep):\n" 1561 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1562 "\t%p, %p, %p, %p, %p, %p, %p, %p\n", 1563 mca, mca->mca_addr, mca->mca_cache, 1564 mca->mca_cache ? mca->mca_cache->mc_name : "?", 1565 mca->mca_thread, mca->mca_depth, 1566 mca->mca_stack[0], mca->mca_stack[1], mca->mca_stack[2], 1567 mca->mca_stack[3], mca->mca_stack[4], mca->mca_stack[5], 1568 mca->mca_stack[6], mca->mca_stack[7], mca->mca_stack[8], 1569 mca->mca_stack[9], mca->mca_stack[10], mca->mca_stack[11], 1570 mca->mca_stack[12], mca->mca_stack[13], mca->mca_stack[14], 1571 mca->mca_stack[15], 1572 mca->mca_pthread, mca->mca_pdepth, 1573 mca->mca_pstack[0], mca->mca_pstack[1], mca->mca_pstack[2], 1574 mca->mca_pstack[3], mca->mca_pstack[4], mca->mca_pstack[5], 1575 mca->mca_pstack[6], mca->mca_pstack[7], mca->mca_pstack[8], 1576 mca->mca_pstack[9], mca->mca_pstack[10], mca->mca_pstack[11], 1577 mca->mca_pstack[12], mca->mca_pstack[13], mca->mca_pstack[14], 1578 mca->mca_pstack[15]); 1579 1580 return (mca_dump_buf); 1581} 1582 1583__private_extern__ void 1584mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset, 1585 int64_t expected, int64_t got) 1586{ 1587 if (mca == NULL) { 1588 panic("mcache_audit: buffer %p modified after free at " 1589 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr, 1590 offset, got, expected); 1591 /* NOTREACHED */ 1592 } 1593 1594 panic("mcache_audit: buffer %p modified after free at offset 0x%lx " 1595 "(0x%llx instead of 0x%llx)\n%s\n", 1596 addr, offset, got, expected, mcache_dump_mca(mca)); 1597 /* NOTREACHED */ 1598} 1599 1600__private_extern__ int 1601assfail(const char *a, const char *f, int l) 1602{ 1603 panic("assertion failed: %s, file: %s, line: %d", a, f, l); 1604 return (0); 1605} 1606