1/*
2 * Copyright (c) 1999, 2006, 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/* Author: Bertrand Serlet, August 1999 */
25
26/*
27 Multithread enhancements for "tiny" allocations introduced February 2008.
28 These are in the spirit of "Hoard". See:
29 Berger, E.D.; McKinley, K.S.; Blumofe, R.D.; Wilson, P.R. (2000).
30 "Hoard: a scalable memory allocator for multithreaded applications".
31 ACM SIGPLAN Notices 35 (11): 117-128. Berger2000.
32 <http://portal.acm.org/citation.cfm?id=356989.357000>
33 Retrieved on 2008-02-22.
34 */
35
36/* gcc -g -O3 magazine_malloc.c malloc.c -o libmagmalloc.dylib -I. \
37 -I/System/Library/Frameworks/System.framework/PrivateHeaders/ -funit-at-a-time \
38 -dynamiclib -Wall -arch x86_64 -arch i386 -arch ppc */
39
40#include <TargetConditionals.h>
41
42#include "scalable_malloc.h"
43#include "malloc_printf.h"
44#include "malloc_internal.h"
45#include "magmallocProvider.h"
46
47#include <_simple.h>
48#include <errno.h>
49#include <signal.h>
50#include <stdlib.h>
51#include <stdint.h>
52#include <unistd.h>
53#include <libkern/OSAtomic.h>
54#include <mach/vm_statistics.h>
55#include <mach/mach_init.h>
56#include <os/tsd.h>
57#include <sys/types.h>
58#include <sys/mman.h>
59#include <sys/param.h>
60
61#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
62#define __APPLE_API_PRIVATE
63#include <machine/cpu_capabilities.h>
64#define _COMM_PAGE_VERSION_REQD 9
65#undef __APPLE_API_PRIVATE
66#else
67#include <sys/sysctl.h>
68#endif
69
70#include <libkern/OSAtomic.h>
71#include <mach-o/dyld.h>	/* for NSVersionOfLinkTimeLibrary() */
72#include <mach-o/dyld_priv.h>	/* for _dyld_get_image_slide() */
73#include <crt_externs.h>	/* for _NSGetMachExecuteHeader() */
74#include <mach/vm_param.h>
75#include <mach/mach_vm.h>
76#include <sys/vmparam.h>
77#include <os/tsd.h>
78
79#include <mach/vm_page_size.h>
80
81#define trunc_page_quanta(x) (vm_page_size >= vm_kernel_page_size ? trunc_page((x)) : trunc_page_kernel((x)))
82#define round_page_quanta(x) (vm_page_size >= vm_kernel_page_size ? round_page((x)) : round_page_kernel((x)))
83#define vm_page_quanta_size MAX(vm_page_size, vm_kernel_page_size)
84#define vm_page_quanta_shift MAX(vm_page_shift, vm_kernel_page_shift)
85
86#ifndef PAGE_MAX_SIZE
87/* Until xnu exposes these publically in mach/vm_param.h ... */
88
89#define PAGE_MAX_SHIFT      14
90#define PAGE_MAX_SIZE       (1 << PAGE_MAX_SHIFT)
91#define PAGE_MAX_MASK       (PAGE_MAX_SIZE-1)
92
93#define PAGE_MIN_SHIFT      12
94#define PAGE_MIN_SIZE       (1 << PAGE_MIN_SHIFT)
95#define PAGE_MIN_MASK       (PAGE_MIN_MASK-1)
96
97#endif
98
99#include <CrashReporterClient.h>
100
101/*********************	DEFINITIONS	************************/
102
103#define DEBUG_MALLOC			0 // set to one to debug malloc itself
104
105#define DEBUG_CLIENT			0 // set to one to debug malloc client
106
107#define DEBUG_MADVISE			0
108
109// <rdar://problem/10397726>
110#define RELAXED_INVARIANT_CHECKS 1
111
112#if TARGET_OS_EMBEDDED
113# define MADVISE_PRESSURE_RELIEF 0
114#else
115// <rdar://problem/12596555>
116# define MADVISE_PRESSURE_RELIEF 1
117#endif
118
119#if DEBUG_MALLOC
120#warning DEBUG_MALLOC ENABLED
121# define INLINE
122# define ALWAYSINLINE
123# define CHECK_MAGAZINE_PTR_LOCKED(szone, mag_ptr, fun)				\
124	do {										\
125		if (TRY_LOCK(mag_ptr->magazine_lock)) {			\
126			malloc_printf("*** magazine_lock was not set %p in %s\n",		\
127			mag_ptr->magazine_lock, fun);						\
128		}										\
129	} while (0)
130#else
131# define INLINE	__inline__
132# define ALWAYSINLINE __attribute__((always_inline))
133# define CHECK_MAGAZINE_PTR_LOCKED(szone, mag_ptr, fun)	{}
134#endif
135
136# define NOINLINE __attribute__((noinline))
137
138#if defined(__i386__) || defined(__x86_64__)
139#define CACHE_LINE 128 /* Future-proofing at 128B */
140#elif defined(__arm__) || defined(__arm64__)
141#define CACHE_LINE 64
142#else
143#define CACHE_LINE 32
144#endif
145#define CACHE_ALIGN __attribute__ ((aligned (CACHE_LINE) ))
146
147#if !__LP64__
148#define ASLR_INTERNAL			1
149#endif
150
151#define SCRIBBLE_BYTE			0xaa /* allocated scribble */
152#define SCRABBLE_BYTE			0x55 /* free()'d scribble */
153#define SCRUBBLE_BYTE			0xdd /* madvise(..., MADV_FREE) scriblle */
154
155/*
156 * msize - a type to refer to the number of quanta of a tiny or small
157 * allocation.  A tiny block with an msize of 3 would be 3 << SHIFT_TINY_QUANTUM
158 * bytes in size.
159 */
160typedef unsigned short msize_t;
161
162typedef union {
163	void	*p;
164	uintptr_t	u;
165} ptr_union;
166
167typedef struct {
168	ptr_union	previous;
169	ptr_union	next;
170} free_list_t;
171
172typedef unsigned int grain_t; // N.B. wide enough to index all free slots
173
174typedef int mag_index_t;
175
176#define CHECK_REGIONS			(1 << 31)
177#define DISABLE_ASLR			(1 << 30)
178
179#define MAX_RECORDER_BUFFER		256
180
181/*********************	DEFINITIONS for tiny	************************/
182
183/*
184 * Memory in the Tiny range is allocated from regions (heaps) pointed to by the
185 * szone's hashed_regions pointer.
186 *
187 * Each region is laid out as a heap, followed by a header block, all within
188 * a 1MB (2^20) block.  This means there are 64520 16-byte blocks and the header
189 * is 16138 bytes, making the total 1048458 bytes, leaving 118 bytes unused.
190 *
191 * The header block is arranged as in struct tiny_region defined just below, and
192 * consists of two bitfields (or bit arrays) interleaved 32 bits by 32 bits.
193 *
194 * Each bitfield comprises NUM_TINY_BLOCKS bits, and refers to the corresponding
195 * TINY_QUANTUM block within the heap.
196 *
197 * The bitfields are used to encode the state of memory within the heap.  The header bit indicates
198 * that the corresponding quantum is the first quantum in a block (either in use or free).  The
199 * in-use bit is set for the header if the block has been handed out (allocated).  If the header
200 * bit is not set, the in-use bit is invalid.
201 *
202 * The szone maintains an array of NUM_TINY_SLOTS freelists, each of which is used to hold
203 * free objects of the corresponding quantum size.
204 *
205 * A free block is laid out depending on its size, in order to fit all free
206 * blocks in 16 bytes, on both 32 and 64 bit platforms.  One quantum blocks do
207 * not store their size in the block, instead relying on the header information
208 * to determine their size.  Blocks of two or more quanta have room to store
209 * their size in the block, and store it both after the 'next' pointer, and in
210 * the last 2 bytes of the block.
211 *
212 * 1-quantum block
213 * Offset (32-bit mode)	(64-bit mode)
214 * 0x0          0x0      : previous
215 * 0x4          0x08     : next
216 * end          end
217 *
218 * >1-quantum block
219 * Offset (32-bit mode)	(64-bit mode)
220 * 0x0          0x0      : previous
221 * 0x4          0x08     : next
222 * 0x8          0x10     : size (in quantum counts)
223 * end - 2      end - 2  : size (in quantum counts)
224 * end          end
225 *
226 * All fields are pointer-sized, except for the size which is an unsigned short.
227 *
228 */
229
230#define SHIFT_TINY_QUANTUM		4	// Required for AltiVec
231#define	TINY_QUANTUM			(1 << SHIFT_TINY_QUANTUM)
232
233#define FOLLOWING_TINY_PTR(ptr,msize)	(((unsigned char *)(ptr)) + ((msize) << SHIFT_TINY_QUANTUM))
234
235#ifdef __LP64__
236#define NUM_TINY_SLOTS			64	// number of slots for free-lists
237#else
238#define NUM_TINY_SLOTS			32	// number of slots for free-lists
239#endif
240
241#define NUM_TINY_BLOCKS			64520
242#define SHIFT_TINY_CEIL_BLOCKS		16 // ceil(log2(NUM_TINY_BLOCKS))
243#define NUM_TINY_CEIL_BLOCKS		(1 << SHIFT_TINY_CEIL_BLOCKS)
244#define TINY_BLOCKS_ALIGN		(SHIFT_TINY_CEIL_BLOCKS + SHIFT_TINY_QUANTUM) // 20
245
246#define TINY_ENTROPY_BITS		15
247#define TINY_ENTROPY_MASK		((1 << TINY_ENTROPY_BITS) - 1)
248
249/*
250 * Avoid having so much entropy that the end of a valid tiny allocation
251 * might overrun the end of the tiny region.
252 */
253#if TINY_ENTROPY_MASK + NUM_TINY_SLOTS > NUM_TINY_BLOCKS
254#error Too many entropy bits for tiny region requested
255#endif
256
257/*
258 * Enough room for the data, followed by the bit arrays (2-bits per block)
259 * plus rounding to the nearest page.
260 */
261#define CEIL_NUM_TINY_BLOCKS_WORDS	(((NUM_TINY_BLOCKS + 31) & ~31) >> 5)
262#define TINY_METADATA_SIZE		(sizeof(region_trailer_t) + sizeof(tiny_header_inuse_pair_t) * CEIL_NUM_TINY_BLOCKS_WORDS)
263#define TINY_REGION_SIZE							\
264	((NUM_TINY_BLOCKS * TINY_QUANTUM + TINY_METADATA_SIZE + PAGE_MAX_SIZE - 1) & ~ (PAGE_MAX_SIZE - 1))
265
266#define TINY_METADATA_START		(NUM_TINY_BLOCKS * TINY_QUANTUM)
267
268/*
269 * Beginning and end pointers for a region's heap.
270 */
271#define TINY_REGION_ADDRESS(region)	((void *)(region))
272#define TINY_REGION_END(region)		((void *)(((uintptr_t)(region)) + (NUM_TINY_BLOCKS * TINY_QUANTUM)))
273
274/*
275 * Locate the heap base for a pointer known to be within a tiny region.
276 */
277#define TINY_REGION_FOR_PTR(_p)		((void *)((uintptr_t)(_p) & ~((1 << TINY_BLOCKS_ALIGN) - 1)))
278
279/*
280 * Convert between byte and msize units.
281 */
282#define TINY_BYTES_FOR_MSIZE(_m)	((_m) << SHIFT_TINY_QUANTUM)
283#define TINY_MSIZE_FOR_BYTES(_b)	((_b) >> SHIFT_TINY_QUANTUM)
284
285#ifdef __LP64__
286# define TINY_FREE_SIZE(ptr)		(((msize_t *)(ptr))[8])
287#else
288# define TINY_FREE_SIZE(ptr)		(((msize_t *)(ptr))[4])
289#endif
290#define TINY_PREVIOUS_MSIZE(ptr)	((msize_t *)(ptr))[-1]
291
292/*
293 * Layout of a tiny region
294 */
295typedef uint32_t tiny_block_t[4]; // assert(TINY_QUANTUM == sizeof(tiny_block_t))
296
297typedef struct tiny_header_inuse_pair
298{
299	uint32_t	header;
300	uint32_t	inuse;
301} tiny_header_inuse_pair_t;
302
303typedef struct region_trailer
304{
305	struct region_trailer	*prev;
306	struct region_trailer	*next;
307	boolean_t			recirc_suitable;
308	volatile int		pinned_to_depot;
309	unsigned			bytes_used;
310	mag_index_t			mag_index;
311} region_trailer_t;
312
313typedef struct tiny_region
314{
315	tiny_block_t blocks[NUM_TINY_BLOCKS];
316
317	region_trailer_t trailer;
318
319	// The interleaved bit arrays comprising the header and inuse bitfields.
320	// The unused bits of each component in the last pair will be initialized to sentinel values.
321	tiny_header_inuse_pair_t pairs[CEIL_NUM_TINY_BLOCKS_WORDS];
322
323	uint8_t pad[TINY_REGION_SIZE - (NUM_TINY_BLOCKS * sizeof(tiny_block_t)) - TINY_METADATA_SIZE];
324} *tiny_region_t;
325
326/*
327 * Per-region meta data for tiny allocator
328 */
329#define REGION_TRAILER_FOR_TINY_REGION(r)	(&(((tiny_region_t)(r))->trailer))
330#define MAGAZINE_INDEX_FOR_TINY_REGION(r)	(REGION_TRAILER_FOR_TINY_REGION(r)->mag_index)
331#define BYTES_USED_FOR_TINY_REGION(r)		(REGION_TRAILER_FOR_TINY_REGION(r)->bytes_used)
332
333/*
334 * Locate the block header for a pointer known to be within a tiny region.
335 */
336#define TINY_BLOCK_HEADER_FOR_PTR(_p)	((void *)&(((tiny_region_t)TINY_REGION_FOR_PTR(_p))->pairs))
337
338/*
339 * Locate the inuse map for a given block header pointer.
340 */
341#define TINY_INUSE_FOR_HEADER(_h)	((void *)&(((tiny_header_inuse_pair_t *)(_h))->inuse))
342
343/*
344 * Compute the bitmap index for a pointer known to be within a tiny region.
345 */
346#define TINY_INDEX_FOR_PTR(_p) 		(((uintptr_t)(_p) >> SHIFT_TINY_QUANTUM) & (NUM_TINY_CEIL_BLOCKS - 1))
347
348#define TINY_CACHE			1	// This governs a last-free cache of 1 that bypasses the free-list
349
350#if ! TINY_CACHE
351#warning TINY_CACHE turned off
352#endif
353
354#define TINY_REGION_PAYLOAD_BYTES	(NUM_TINY_BLOCKS * TINY_QUANTUM)
355
356/*********************	DEFINITIONS for small	************************/
357
358/*
359 * Memory in the Small range is allocated from regions (heaps) pointed to by the szone's hashed_regions
360 * pointer.
361 *
362 * Each region is laid out as a heap, followed by the metadata array, all within an 8MB (2^23) block.
363 * The array is arranged as an array of shorts, one for each SMALL_QUANTUM in the heap.
364 * This means there are 16320 512-blocks and the array is 16320*2 bytes, which totals 8388480, leaving
365 * 128 bytes unused.
366 *
367 * The MSB of each short is set for the first quantum in a free block.  The low 15 bits encode the
368 * block size (in SMALL_QUANTUM units), or are zero if the quantum is not the first in a block.
369 *
370 * The szone maintains an array of 32 freelists, each of which is used to hold free objects
371 * of the corresponding quantum size.
372 *
373 * A free block is laid out as:
374 *
375 * Offset (32-bit mode)	(64-bit mode)
376 * 0x0          0x0      : previous
377 * 0x4          0x08     : next
378 * 0x8          0x10     : size (in quantum counts)
379 * end - 2	end - 2  : size (in quantum counts)
380 * end		end
381 *
382 * All fields are pointer-sized, except for the size which is an unsigned short.
383 *
384 */
385
386#define SMALL_IS_FREE			(1 << 15)
387
388#define	SHIFT_SMALL_QUANTUM		(SHIFT_TINY_QUANTUM + 5)	// 9
389#define	SMALL_QUANTUM			(1 << SHIFT_SMALL_QUANTUM)	// 512 bytes
390
391#define FOLLOWING_SMALL_PTR(ptr,msize)	(((unsigned char *)(ptr)) + ((msize) << SHIFT_SMALL_QUANTUM))
392
393/*
394 * The number of slots in the free-list for small blocks.  To avoid going to
395 * vm system as often on large memory machines, increase the number of free list
396 * spots above some amount of RAM installed in the system.
397 */
398#define NUM_SMALL_SLOTS			32
399#define NUM_SMALL_SLOTS_LARGEMEM	256
400#define SMALL_BITMAP_WORDS		8
401
402/*
403 * We can only represent up to 1<<15 for msize; but we choose to stay even below that to avoid the
404 * convention msize=0 => msize = (1<<15)
405 */
406#define NUM_SMALL_BLOCKS		16320
407#define SHIFT_SMALL_CEIL_BLOCKS		14 // ceil(log2(NUM_SMALL_BLOCKs))
408#define NUM_SMALL_CEIL_BLOCKS		(1 << SHIFT_SMALL_CEIL_BLOCKS)
409#define SMALL_BLOCKS_ALIGN		(SHIFT_SMALL_CEIL_BLOCKS + SHIFT_SMALL_QUANTUM) // 23
410
411#define SMALL_ENTROPY_BITS		13
412#define SMALL_ENTROPY_MASK		((1 << SMALL_ENTROPY_BITS) - 1)
413
414/*
415 * Avoid having so much entropy that the end of a valid small allocation
416 * might overrun the end of the small region.
417 */
418#if SMALL_ENTROPY_MASK + NUM_SMALL_SLOTS > NUM_SMALL_BLOCKS
419#error Too many entropy bits for small region requested
420#endif
421
422#define SMALL_METADATA_SIZE		(sizeof(region_trailer_t) + NUM_SMALL_BLOCKS * sizeof(msize_t))
423#define SMALL_REGION_SIZE						\
424	((NUM_SMALL_BLOCKS * SMALL_QUANTUM + SMALL_METADATA_SIZE + PAGE_MAX_SIZE - 1) & ~ (PAGE_MAX_SIZE - 1))
425
426#define SMALL_METADATA_START		(NUM_SMALL_BLOCKS * SMALL_QUANTUM)
427
428/*
429 * Beginning and end pointers for a region's heap.
430 */
431#define SMALL_REGION_ADDRESS(region)	((unsigned char *)region)
432#define SMALL_REGION_END(region)	(SMALL_REGION_ADDRESS(region) + (NUM_SMALL_BLOCKS * SMALL_QUANTUM))
433
434/*
435 * Locate the heap base for a pointer known to be within a small region.
436 */
437#define SMALL_REGION_FOR_PTR(_p)	((void *)((uintptr_t)(_p) & ~((1 << SMALL_BLOCKS_ALIGN) - 1)))
438
439/*
440 * Convert between byte and msize units.
441 */
442#define SMALL_BYTES_FOR_MSIZE(_m)	((_m) << SHIFT_SMALL_QUANTUM)
443#define SMALL_MSIZE_FOR_BYTES(_b)	((_b) >> SHIFT_SMALL_QUANTUM)
444
445#define SMALL_PREVIOUS_MSIZE(ptr)	((msize_t *)(ptr))[-1]
446
447/*
448 * Layout of a small region
449 */
450typedef uint32_t small_block_t[SMALL_QUANTUM/sizeof(uint32_t)];
451
452typedef struct small_region
453{
454	small_block_t blocks[NUM_SMALL_BLOCKS];
455
456	region_trailer_t trailer;
457
458	msize_t small_meta_words[NUM_SMALL_BLOCKS];
459
460	uint8_t pad[SMALL_REGION_SIZE - (NUM_SMALL_BLOCKS * sizeof(small_block_t)) - SMALL_METADATA_SIZE];
461} *small_region_t;
462
463/*
464 * Per-region meta data for small allocator
465 */
466#define REGION_TRAILER_FOR_SMALL_REGION(r)	(&(((small_region_t)(r))->trailer))
467#define MAGAZINE_INDEX_FOR_SMALL_REGION(r)	(REGION_TRAILER_FOR_SMALL_REGION(r)->mag_index)
468#define BYTES_USED_FOR_SMALL_REGION(r)		(REGION_TRAILER_FOR_SMALL_REGION(r)->bytes_used)
469
470/*
471 * Locate the metadata base for a pointer known to be within a small region.
472 */
473#define SMALL_META_HEADER_FOR_PTR(_p)	(((small_region_t)SMALL_REGION_FOR_PTR(_p))->small_meta_words)
474
475/*
476 * Compute the metadata index for a pointer known to be within a small region.
477 */
478#define SMALL_META_INDEX_FOR_PTR(_p)	(((uintptr_t)(_p) >> SHIFT_SMALL_QUANTUM) & (NUM_SMALL_CEIL_BLOCKS - 1))
479
480/*
481 * Find the metadata word for a pointer known to be within a small region.
482 */
483#define SMALL_METADATA_FOR_PTR(_p)	(SMALL_META_HEADER_FOR_PTR(_p) + SMALL_META_INDEX_FOR_PTR(_p))
484
485/*
486 * Determine whether a pointer known to be within a small region points to memory which is free.
487 */
488#define SMALL_PTR_IS_FREE(_p)		(*SMALL_METADATA_FOR_PTR(_p) & SMALL_IS_FREE)
489
490/*
491 * Extract the msize value for a pointer known to be within a small region.
492 */
493#define SMALL_PTR_SIZE(_p)		(*SMALL_METADATA_FOR_PTR(_p) & ~SMALL_IS_FREE)
494
495#define SMALL_CACHE			1
496#if !SMALL_CACHE
497#warning SMALL_CACHE turned off
498#endif
499
500#define SMALL_REGION_PAYLOAD_BYTES	(NUM_SMALL_BLOCKS * SMALL_QUANTUM)
501
502/*************************  DEFINITIONS for large  ****************************/
503
504#define LARGE_THRESHOLD			(15 * 1024) // strictly above this use "large"
505#define LARGE_THRESHOLD_LARGEMEM	(127 * 1024)
506
507#if (LARGE_THRESHOLD > NUM_SMALL_SLOTS * SMALL_QUANTUM)
508#error LARGE_THRESHOLD should always be less than NUM_SMALL_SLOTS * SMALL_QUANTUM
509#endif
510
511#if (LARGE_THRESHOLD_LARGEMEM > NUM_SMALL_SLOTS_LARGEMEM * SMALL_QUANTUM)
512#error LARGE_THRESHOLD_LARGEMEM should always be less than NUM_SMALL_SLOTS * SMALL_QUANTUM
513#endif
514
515/*
516 * When all memory is touched after a copy, vm_copy() is always a lose
517 * But if the memory is only read, vm_copy() wins over memmove() at 3 or 4 pages
518 * (on a G3/300MHz)
519 *
520 * This must be larger than LARGE_THRESHOLD
521 */
522#define VM_COPY_THRESHOLD		(40 * 1024)
523#define VM_COPY_THRESHOLD_LARGEMEM	(128 * 1024)
524
525typedef struct {
526	vm_address_t address;
527	vm_size_t size;
528	boolean_t did_madvise_reusable;
529} large_entry_t;
530
531#if !TARGET_OS_EMBEDDED
532#define LARGE_CACHE			1
533#else
534#define LARGE_CACHE			0
535#endif
536#if !LARGE_CACHE
537#warning LARGE_CACHE turned off
538#endif
539#if defined(__LP64__)
540#define LARGE_ENTRY_CACHE_SIZE 16
541#define LARGE_CACHE_SIZE_LIMIT ((vm_size_t)0x80000000) /* 2Gb */
542#else
543#define LARGE_ENTRY_CACHE_SIZE 8
544#define LARGE_CACHE_SIZE_LIMIT ((vm_size_t)0x02000000) /* 32Mb */
545#endif
546#define LARGE_CACHE_SIZE_ENTRY_LIMIT (LARGE_CACHE_SIZE_LIMIT/LARGE_ENTRY_CACHE_SIZE)
547
548#define SZONE_FLOTSAM_THRESHOLD_LOW  (1024 * 512)
549#define SZONE_FLOTSAM_THRESHOLD_HIGH (1024 * 1024)
550
551/*******************************************************************************
552 * Definitions for region hash
553 ******************************************************************************/
554
555typedef void * region_t;
556typedef region_t * rgnhdl_t; /* A pointer into hashed_regions array. */
557
558#define INITIAL_NUM_REGIONS_SHIFT	6 // log2(INITIAL_NUM_REGIONS)
559#define INITIAL_NUM_REGIONS		(1 << INITIAL_NUM_REGIONS_SHIFT) // Must be a power of 2!
560#define HASHRING_OPEN_ENTRY		((region_t) 0) // Initial value and sentinel marking end of collision chain
561#define HASHRING_REGION_DEALLOCATED	((region_t)-1) // Region at this slot reclaimed by OS
562#define HASH_BLOCKS_ALIGN		TINY_BLOCKS_ALIGN // MIN( TINY_BLOCKS_ALIGN, SMALL_BLOCKS_ALIGN, ... )
563
564typedef struct region_hash_generation {
565	size_t		num_regions_allocated;
566	size_t		num_regions_allocated_shift; // log2(num_regions_allocated)
567	region_t		*hashed_regions;  // hashed by location
568	struct		region_hash_generation *nextgen;
569} region_hash_generation_t;
570
571/*******************************************************************************
572 * Per-processor magazine for tiny and small allocators
573 ******************************************************************************/
574
575typedef struct {			// vm_allocate()'d, so the array of magazines is page-aligned to begin with.
576	// Take magazine_lock first,  Depot lock when needed for recirc, then szone->{tiny,small}_regions_lock when needed for alloc
577	_malloc_lock_s		magazine_lock CACHE_ALIGN;
578	// Protection for the crtical section that does allocate_pages outside the magazine_lock
579	volatile boolean_t	alloc_underway;
580
581	// One element deep "death row", optimizes malloc/free/malloc for identical size.
582	void		*mag_last_free; // low SHIFT_{TINY,SMALL}_QUANTUM bits indicate the msize
583	region_t		mag_last_free_rgn; // holds the region for mag_last_free
584
585	free_list_t		*mag_free_list[256]; // assert( 256 >= MAX( NUM_TINY_SLOTS, NUM_SMALL_SLOTS_LARGEMEM ))
586	unsigned		mag_bitmap[8]; // assert( sizeof(mag_bitmap) << 3 >= sizeof(mag_free_list)/sizeof(free_list_t) )
587
588	// the first and last free region in the last block are treated as big blocks in use that are not accounted for
589	size_t		mag_bytes_free_at_end;
590	size_t		mag_bytes_free_at_start;
591	region_t		mag_last_region; // Valid iff mag_bytes_free_at_end || mag_bytes_free_at_start > 0
592
593	// bean counting ...
594	unsigned		mag_num_objects;
595	size_t		mag_num_bytes_in_objects;
596	size_t		num_bytes_in_magazine;
597
598	// recirculation list -- invariant: all regions owned by this magazine that meet the emptiness criteria
599	// are located nearer to the head of the list than any region that doesn't satisfy that criteria.
600	// Doubly linked list for efficient extraction.
601	unsigned		recirculation_entries;
602	region_trailer_t	*firstNode;
603	region_trailer_t	*lastNode;
604
605	uintptr_t		pad[50-CACHE_LINE/sizeof(uintptr_t)];
606} magazine_t;
607
608#ifdef __has_extension
609#if __has_extension(c_static_assert)
610#if __LP64__
611_Static_assert(sizeof(magazine_t) == 2560, "Incorrect padding in magazine_t");
612#else
613_Static_assert(sizeof(magazine_t) == 1280, "Incorrect padding in magazine_t");
614#endif
615#endif
616#endif
617
618#define TINY_MAX_MAGAZINES		32 /* MUST BE A POWER OF 2! */
619#define TINY_MAGAZINE_PAGED_SIZE						\
620(((sizeof(magazine_t) * (TINY_MAX_MAGAZINES + 1)) + vm_page_quanta_size - 1) &\
621~ (vm_page_quanta_size - 1)) /* + 1 for the Depot */
622
623#define SMALL_MAX_MAGAZINES		32 /* MUST BE A POWER OF 2! */
624#define SMALL_MAGAZINE_PAGED_SIZE						\
625(((sizeof(magazine_t) * (SMALL_MAX_MAGAZINES + 1)) + vm_page_quanta_size - 1) &\
626~ (vm_page_quanta_size - 1)) /* + 1 for the Depot */
627
628#define DEPOT_MAGAZINE_INDEX		-1
629
630/****************************** zone itself ***********************************/
631
632/*
633 * Note that objects whose adddress are held in pointers here must be pursued
634 * individually in the {tiny,small}_in_use_enumeration() routines. See for
635 * example the treatment of region_hash_generation and tiny_magazines below.
636 */
637
638typedef struct szone_s {				// vm_allocate()'d, so page-aligned to begin with.
639	malloc_zone_t		basic_zone;		// first page will be given read-only protection
640	uint8_t			pad[PAGE_MAX_SIZE - sizeof(malloc_zone_t)];
641
642	unsigned long		cpu_id_key;		// unused
643	// remainder of structure is R/W (contains no function pointers)
644	unsigned			debug_flags;
645	void			*log_address;
646
647	/* Regions for tiny objects */
648	_malloc_lock_s	tiny_regions_lock CACHE_ALIGN;
649	size_t			num_tiny_regions;
650	size_t			num_tiny_regions_dealloc;
651	region_hash_generation_t	*tiny_region_generation;
652	region_hash_generation_t	trg[2];
653
654	int				num_tiny_magazines;
655	unsigned			num_tiny_magazines_mask;
656	int				num_tiny_magazines_mask_shift;
657	magazine_t			*tiny_magazines; // array of per-processor magazines
658
659	uintptr_t			last_tiny_advise;
660
661	/* Regions for small objects */
662	_malloc_lock_s	small_regions_lock CACHE_ALIGN;
663	size_t			num_small_regions;
664	size_t			num_small_regions_dealloc;
665	region_hash_generation_t	*small_region_generation;
666	region_hash_generation_t	srg[2];
667
668	unsigned			num_small_slots; // determined by physmem size
669
670	int				num_small_magazines;
671	unsigned			num_small_magazines_mask;
672	int				num_small_magazines_mask_shift;
673	magazine_t			*small_magazines; // array of per-processor magazines
674
675	uintptr_t			last_small_advise;
676
677	/* large objects: all the rest */
678	_malloc_lock_s		large_szone_lock CACHE_ALIGN; // One customer at a time for large
679	unsigned			num_large_objects_in_use;
680	unsigned			num_large_entries;
681	large_entry_t		*large_entries; // hashed by location; null entries don't count
682	size_t			num_bytes_in_large_objects;
683
684#if LARGE_CACHE
685	int				large_entry_cache_oldest;
686	int				large_entry_cache_newest;
687	large_entry_t		large_entry_cache[LARGE_ENTRY_CACHE_SIZE]; // "death row" for large malloc/free
688	boolean_t			large_legacy_reset_mprotect;
689	size_t			large_entry_cache_reserve_bytes;
690	size_t			large_entry_cache_reserve_limit;
691	size_t			large_entry_cache_bytes; // total size of death row, bytes
692#endif
693
694	/* flag and limits pertaining to altered malloc behavior for systems with
695	 large amounts of physical memory */
696	unsigned  is_largemem;
697	unsigned  large_threshold;
698	unsigned  vm_copy_threshold;
699
700	/* security cookie */
701	uintptr_t cookie;
702
703	/* Initial region list */
704	region_t			initial_tiny_regions[INITIAL_NUM_REGIONS];
705	region_t			initial_small_regions[INITIAL_NUM_REGIONS];
706
707	/* The purgeable zone constructed by create_purgeable_zone() would like to hand off tiny and small
708	 * allocations to the default scalable zone. Record the latter as the "helper" zone here. */
709	struct szone_s		*helper_zone;
710
711	boolean_t			flotsam_enabled;
712} szone_t;
713
714#define SZONE_PAGED_SIZE		round_page_quanta((sizeof(szone_t)))
715
716#if DEBUG_MALLOC || DEBUG_CLIENT
717static void		szone_sleep(void);
718#endif
719extern void malloc_error_break(void);
720
721// msg prints after fmt, ...
722static NOINLINE void	szone_error(szone_t *szone, int is_corruption, const char *msg, const void *ptr, const char *fmt, ...)
723__printflike(5, 6);
724
725static void		protect(void *address, size_t size, unsigned protection, unsigned debug_flags);
726static void		*allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_flags,
727								int vm_page_label);
728static void		*allocate_pages_securely(szone_t *szone, size_t size, unsigned char align,
729										 int vm_page_label);
730static void		deallocate_pages(szone_t *szone, void *addr, size_t size, unsigned debug_flags);
731static int		madvise_free_range(szone_t *szone, region_t r, uintptr_t pgLo, uintptr_t pgHi, uintptr_t *last);
732static kern_return_t	_szone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr);
733
734static INLINE		mag_index_t mag_get_thread_index(szone_t *szone) ALWAYSINLINE;
735static magazine_t	*mag_lock_zine_for_region_trailer(szone_t *szone, magazine_t *magazines, region_trailer_t *trailer,
736													  mag_index_t mag_index);
737
738static INLINE rgnhdl_t	hash_lookup_region_no_lock(region_t *regions, size_t num_entries, size_t shift, region_t r)
739ALWAYSINLINE;
740static void		hash_region_insert_no_lock(region_t *regions, size_t num_entries, size_t shift, region_t r);
741static region_t		*hash_regions_alloc_no_lock(szone_t *szone, size_t num_entries);
742static region_t		*hash_regions_grow_no_lock(szone_t *szone, region_t *regions, size_t old_size,
743											   size_t *mutable_shift, size_t *new_size);
744
745static INLINE uintptr_t free_list_gen_checksum(uintptr_t ptr) ALWAYSINLINE;
746static INLINE uintptr_t free_list_checksum_ptr(szone_t *szone, void *p) ALWAYSINLINE;
747static INLINE void	*free_list_unchecksum_ptr(szone_t *szone, ptr_union *ptr) ALWAYSINLINE;
748static unsigned		free_list_count(szone_t *szone, free_list_t *ptr);
749
750static INLINE void	recirc_list_extract(szone_t *szone, magazine_t *mag_ptr, region_trailer_t *node) ALWAYSINLINE;
751static INLINE void	recirc_list_splice_last(szone_t *szone, magazine_t *mag_ptr, region_trailer_t *node) ALWAYSINLINE;
752static INLINE void	recirc_list_splice_first(szone_t *szone, magazine_t *mag_ptr, region_trailer_t *node) ALWAYSINLINE;
753
754static INLINE void	BITARRAY_SET(uint32_t *bits, msize_t index) ALWAYSINLINE;
755static INLINE void	BITARRAY_CLR(uint32_t *bits, msize_t index) ALWAYSINLINE;
756static INLINE boolean_t BITARRAY_BIT(uint32_t *bits, msize_t index) ALWAYSINLINE;
757
758static msize_t		get_tiny_free_size(const void *ptr);
759static msize_t		get_tiny_previous_free_msize(const void *ptr);
760static INLINE msize_t	get_tiny_meta_header(const void *ptr, boolean_t *is_free) ALWAYSINLINE;
761static INLINE void	set_tiny_meta_header_in_use(const void *ptr, msize_t msize) ALWAYSINLINE;
762static INLINE void	set_tiny_meta_header_in_use_1(const void *ptr) ALWAYSINLINE;
763static INLINE void	set_tiny_meta_header_middle(const void *ptr) ALWAYSINLINE;
764static INLINE void	set_tiny_meta_header_free(const void *ptr, msize_t msize) ALWAYSINLINE;
765static INLINE boolean_t	tiny_meta_header_is_free(const void *ptr) ALWAYSINLINE;
766static INLINE void	*tiny_previous_preceding_free(void *ptr, msize_t *prev_msize) ALWAYSINLINE;
767
768static void		tiny_free_list_add_ptr(szone_t *szone, magazine_t *tiny_mag_ptr, void *ptr, msize_t msize);
769static void		tiny_free_list_remove_ptr(szone_t *szone, magazine_t *tiny_mag_ptr, void *ptr, msize_t msize);
770static INLINE region_t	tiny_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE;
771
772static void		tiny_finalize_region(szone_t *szone, magazine_t *tiny_mag_ptr);
773static int		tiny_free_detach_region(szone_t *szone, magazine_t *tiny_mag_ptr, region_t r);
774static size_t		tiny_free_reattach_region(szone_t *szone, magazine_t *tiny_mag_ptr, region_t r);
775static void		tiny_free_scan_madvise_free(szone_t *szone, magazine_t *depot_ptr, region_t r);
776static region_t		tiny_free_try_depot_unmap_no_lock(szone_t *szone, magazine_t *depot_ptr, region_trailer_t *node);
777static boolean_t	tiny_free_do_recirc_to_depot(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index);
778static region_t		tiny_find_msize_region(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index, msize_t msize);
779static boolean_t	tiny_get_region_from_depot(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index, msize_t msize);
780
781static INLINE boolean_t	tiny_free_no_lock(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index, region_t region,
782										  void *ptr, msize_t msize) ALWAYSINLINE;
783static void		*tiny_malloc_from_region_no_lock(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index,
784												 msize_t msize, void *fresh_region);
785static boolean_t	tiny_try_realloc_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size);
786static boolean_t	tiny_check_region(szone_t *szone, region_t region);
787static kern_return_t	tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone,
788											   memory_reader_t reader, vm_range_recorder_t recorder);
789static void		*tiny_malloc_from_free_list(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index,
790											msize_t msize);
791static INLINE void	*tiny_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested) ALWAYSINLINE;
792static INLINE void	free_tiny(szone_t *szone, void *ptr, region_t tiny_region, size_t known_size) ALWAYSINLINE;
793static void		print_tiny_free_list(szone_t *szone);
794static void		print_tiny_region(boolean_t verbose, region_t region, size_t bytes_at_start, size_t bytes_at_end);
795static boolean_t	tiny_free_list_check(szone_t *szone, grain_t slot);
796
797static INLINE void	small_meta_header_set_is_free(msize_t *meta_headers, unsigned index, msize_t msize) ALWAYSINLINE;
798static INLINE void	small_meta_header_set_in_use(msize_t *meta_headers, msize_t index, msize_t msize) ALWAYSINLINE;
799static INLINE void	small_meta_header_set_middle(msize_t *meta_headers, msize_t index) ALWAYSINLINE;
800static void		small_free_list_add_ptr(szone_t *szone, magazine_t *small_mag_ptr, void *ptr, msize_t msize);
801static void		small_free_list_remove_ptr(szone_t *szone, magazine_t *small_mag_ptr, void *ptr, msize_t msize);
802static INLINE region_t	small_region_for_ptr_no_lock(szone_t *szone, const void *ptr) ALWAYSINLINE;
803
804static void		small_finalize_region(szone_t *szone, magazine_t *small_mag_ptr);
805static int		small_free_detach_region(szone_t *szone, magazine_t *small_mag_ptr, region_t r);
806static size_t		small_free_reattach_region(szone_t *szone, magazine_t *small_mag_ptr, region_t r);
807static void		small_free_scan_madvise_free(szone_t *szone, magazine_t *depot_ptr, region_t r);
808static region_t		small_free_try_depot_unmap_no_lock(szone_t *szone, magazine_t *depot_ptr, region_trailer_t *node);
809static boolean_t	small_free_do_recirc_to_depot(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index);
810static region_t		small_find_msize_region(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index, msize_t msize);
811static boolean_t	small_get_region_from_depot(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index, msize_t msize);
812static INLINE boolean_t	small_free_no_lock(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index, region_t region,
813										   void *ptr, msize_t msize) ALWAYSINLINE;
814static void		*small_malloc_from_region_no_lock(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index,
815												  msize_t msize, void *fresh_region);
816static boolean_t	small_try_realloc_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size);
817static boolean_t	small_check_region(szone_t *szone, region_t region);
818static kern_return_t	small_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone,
819												memory_reader_t reader, vm_range_recorder_t recorder);
820static void		*small_malloc_from_free_list(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index,
821											 msize_t msize);
822static INLINE void	*small_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested) ALWAYSINLINE;
823static INLINE void	free_small(szone_t *szone, void *ptr, region_t small_region, size_t known_size) ALWAYSINLINE;
824static void		print_small_free_list(szone_t *szone);
825static void		print_small_region(szone_t *szone, boolean_t verbose, region_t region, size_t bytes_at_start, size_t bytes_at_end);
826static boolean_t	small_free_list_check(szone_t *szone, grain_t grain);
827
828#if DEBUG_MALLOC
829static void		large_debug_print(szone_t *szone);
830#endif
831static large_entry_t	*large_entry_for_pointer_no_lock(szone_t *szone, const void *ptr);
832static void		large_entry_insert_no_lock(szone_t *szone, large_entry_t range);
833static INLINE void	large_entries_rehash_after_entry_no_lock(szone_t *szone, large_entry_t *entry) ALWAYSINLINE;
834static INLINE large_entry_t *large_entries_alloc_no_lock(szone_t *szone, unsigned num) ALWAYSINLINE;
835static void		large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num,
836										   vm_range_t *range_to_deallocate);
837static large_entry_t	*large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate);
838static vm_range_t	large_entry_free_no_lock(szone_t *szone, large_entry_t *entry);
839static NOINLINE		kern_return_t	large_in_use_enumerator(task_t task, void *context,
840															unsigned type_mask, vm_address_t large_entries_address,
841															unsigned num_entries, memory_reader_t reader,
842															vm_range_recorder_t recorder);
843static void		*large_malloc(szone_t *szone, size_t num_kernel_pages, unsigned char alignment, boolean_t cleared_requested);
844static NOINLINE void	free_large(szone_t *szone, void *ptr);
845static INLINE int	large_try_realloc_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size) ALWAYSINLINE;
846
847/*
848 * Mark these NOINLINE to avoid bloating the purgeable zone call backs
849 */
850static NOINLINE void	szone_free(szone_t *szone, void *ptr);
851static NOINLINE void	*szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_requested);
852static NOINLINE void	*szone_malloc(szone_t *szone, size_t size);
853static NOINLINE void	*szone_calloc(szone_t *szone, size_t num_items, size_t size);
854static NOINLINE void	*szone_valloc(szone_t *szone, size_t size);
855static NOINLINE size_t	szone_size_try_large(szone_t *szone, const void *ptr);
856static NOINLINE size_t	szone_size(szone_t *szone, const void *ptr);
857static NOINLINE void	*szone_realloc(szone_t *szone, void *ptr, size_t new_size);
858static NOINLINE void	*szone_memalign(szone_t *szone, size_t alignment, size_t size);
859static NOINLINE void	szone_free_definite_size(szone_t *szone, void *ptr, size_t size);
860static NOINLINE unsigned szone_batch_malloc(szone_t *szone, size_t size, void **results, unsigned count);
861static NOINLINE void	szone_batch_free(szone_t *szone, void **to_be_freed, unsigned count);
862static void		szone_destroy(szone_t *szone);
863static NOINLINE size_t	szone_good_size(szone_t *szone, size_t size);
864
865static NOINLINE boolean_t szone_check_all(szone_t *szone, const char *function);
866static boolean_t	szone_check(szone_t *szone);
867static kern_return_t	szone_ptr_in_use_enumerator(task_t task, void *context,
868													unsigned type_mask, vm_address_t zone_address,
869													memory_reader_t reader, vm_range_recorder_t recorder);
870static NOINLINE void	szone_print(szone_t *szone, boolean_t verbose);
871static void		szone_log(malloc_zone_t *zone, void *log_address);
872static void		szone_force_lock(szone_t *szone);
873static INLINE void	szone_force_lock_magazine(szone_t *szone, magazine_t *mag);
874static void		szone_force_unlock(szone_t *szone);
875static boolean_t	szone_locked(szone_t *szone);
876
877static void		szone_statistics(szone_t *szone, malloc_statistics_t *stats);
878
879static void		purgeable_free(szone_t *szone, void *ptr);
880static void		*purgeable_malloc(szone_t *szone, size_t size);
881static void		*purgeable_calloc(szone_t *szone, size_t num_items, size_t size);
882static void		*purgeable_valloc(szone_t *szone, size_t size);
883static size_t		purgeable_size(szone_t *szone, const void *ptr);
884static void		*purgeable_realloc(szone_t *szone, void *ptr, size_t new_size);
885static void		*purgeable_memalign(szone_t *szone, size_t alignment, size_t size);
886static void		purgeable_free_definite_size(szone_t *szone, void *ptr, size_t size);
887static unsigned		purgeable_batch_malloc(szone_t *szone, size_t size, void **results, unsigned count);
888static void		purgeable_batch_free(szone_t *szone, void **to_be_freed, unsigned count);
889static void		purgeable_destroy(szone_t *szone);
890static size_t		purgeable_good_size(szone_t *szone, size_t size);
891
892static boolean_t	purgeable_check(szone_t *szone);
893static kern_return_t	purgeable_ptr_in_use_enumerator(task_t task, void *context,
894														unsigned type_mask, vm_address_t zone_address,
895														memory_reader_t reader, vm_range_recorder_t recorder);
896static void		purgeable_print(szone_t *szone, boolean_t verbose);
897static void		purgeable_log(malloc_zone_t *zone, void *log_address);
898static void		purgeable_force_lock(szone_t *szone);
899static void		purgeable_force_unlock(szone_t *szone);
900static boolean_t	purgeable_locked(szone_t *szone);
901
902static void		purgeable_statistics(szone_t *szone, malloc_statistics_t *stats);
903
904static void		*frozen_malloc(szone_t *zone, size_t new_size);
905static void		*frozen_calloc(szone_t *zone, size_t num_items, size_t size);
906static void		*frozen_valloc(szone_t *zone, size_t new_size);
907static void		*frozen_realloc(szone_t *zone, void *ptr, size_t new_size);
908static void		frozen_free(szone_t *zone, void *ptr);
909static void		frozen_destroy(szone_t *zone);
910
911static volatile uintptr_t entropic_address = 0;
912static volatile uintptr_t entropic_limit = 0;
913#define ENTROPIC_KABILLION 0x10000000 /* 256Mb */
914
915extern uint64_t malloc_entropy[2];
916
917static inline void
918SZONE_LOCK(szone_t *szone) {
919	_malloc_lock_lock(&szone->large_szone_lock);
920}
921
922static inline void
923SZONE_UNLOCK(szone_t *szone) {
924	_malloc_lock_unlock(&szone->large_szone_lock);
925}
926
927static inline bool
928SZONE_TRY_LOCK(szone_t *szone) {
929	return _malloc_lock_trylock(&szone->large_szone_lock);
930}
931
932static inline void
933SZONE_MAGAZINE_PTR_LOCK(szone_t *szone, magazine_t *mag_ptr) {
934	_malloc_lock_lock(&mag_ptr->magazine_lock);
935}
936
937static inline void
938SZONE_MAGAZINE_PTR_UNLOCK(szone_t *szone, magazine_t *mag_ptr) {
939	_malloc_lock_unlock(&mag_ptr->magazine_lock);
940}
941
942static inline bool
943SZONE_MAGAZINE_PTR_TRY_LOCK(szone_t *szone, magazine_t *mag_ptr) {
944	return _malloc_lock_trylock(&mag_ptr->magazine_lock);
945}
946
947static inline void yield(void) {
948	thread_switch(MACH_PORT_NULL, SWITCH_OPTION_DEPRESS, 1);
949}
950
951#if DEBUG_MALLOC
952# define LOG(szone,ptr)							\
953	(szone->log_address && (((uintptr_t)szone->log_address == -1) ||	\
954	(szone->log_address == (void *)(ptr))))
955#else
956# define LOG(szone,ptr)		0
957#endif
958
959#if DEBUG_MALLOC || DEBUG_CLIENT
960# define CHECK(szone,fun)						\
961	if ((szone)->debug_flags & CHECK_REGIONS)				\
962	szone_check_all(szone, fun)
963#else
964# define CHECK(szone,fun)						\
965	do {} while (0)
966#endif
967
968/*********************	VERY LOW LEVEL UTILITIES  ************************/
969
970static void
971szone_sleep(void)
972{
973	if (getenv("MallocErrorStop")) {
974		_malloc_printf(ASL_LEVEL_NOTICE, "*** sending SIGSTOP to help debug\n");
975		kill(getpid(), SIGSTOP);
976	} else if (getenv("MallocErrorSleep")) {
977		_malloc_printf(ASL_LEVEL_NOTICE, "*** sleeping to help debug\n");
978		sleep(3600); // to help debug
979	}
980}
981
982// msg prints after fmt, ...
983static NOINLINE void
984szone_error(szone_t *szone, int is_corruption, const char *msg, const void *ptr, const char *fmt, ...)
985{
986	va_list ap;
987	_SIMPLE_STRING b = _simple_salloc();
988
989	if (b) {
990		if (fmt) {
991			va_start(ap, fmt);
992			_simple_vsprintf(b, fmt, ap);
993			va_end(ap);
994		}
995		if (ptr) {
996			_simple_sprintf(b, "*** error for object %p: %s\n", ptr, msg);
997		} else {
998			_simple_sprintf(b, "*** error: %s\n", msg);
999		}
1000		malloc_printf("%s*** set a breakpoint in malloc_error_break to debug\n", _simple_string(b));
1001	} else {
1002		/*
1003		 * Should only get here if vm_allocate() can't get a single page of
1004		 * memory, implying _simple_asl_log() would also fail.  So we just
1005		 * print to the file descriptor.
1006		 */
1007		if (fmt) {
1008			va_start(ap, fmt);
1009			_malloc_vprintf(MALLOC_PRINTF_NOLOG, fmt, ap);
1010			va_end(ap);
1011		}
1012		if (ptr) {
1013			_malloc_printf(MALLOC_PRINTF_NOLOG, "*** error for object %p: %s\n", ptr, msg);
1014		} else {
1015			_malloc_printf(MALLOC_PRINTF_NOLOG, "*** error: %s\n", msg);
1016		}
1017		_malloc_printf(MALLOC_PRINTF_NOLOG, "*** set a breakpoint in malloc_error_break to debug\n");
1018	}
1019	malloc_error_break();
1020#if DEBUG_MALLOC
1021	szone_print(szone, 1);
1022#endif
1023	szone_sleep();
1024	// Call abort() if this is a memory corruption error and the abort on
1025	// corruption flag is set, or if any error should abort.
1026	if ((is_corruption && (szone->debug_flags & SCALABLE_MALLOC_ABORT_ON_CORRUPTION)) ||
1027		(szone->debug_flags & SCALABLE_MALLOC_ABORT_ON_ERROR)) {
1028		CRSetCrashLogMessage(b ? _simple_string(b) : msg);
1029		abort();
1030	} else if (b) {
1031		_simple_sfree(b);
1032	}
1033}
1034
1035static void
1036protect(void *address, size_t size, unsigned protection, unsigned debug_flags)
1037{
1038	kern_return_t	err;
1039
1040	if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_PRELUDE)) {
1041		err = mprotect((void *)((uintptr_t)address - vm_page_quanta_size), vm_page_quanta_size, protection);
1042		if (err) {
1043			malloc_printf("*** can't protect(%p) region for prelude guard page at %p\n",
1044						  protection,(uintptr_t)address - vm_page_quanta_size);
1045		}
1046	}
1047	if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_POSTLUDE)) {
1048		err = mprotect((void *)(round_page_quanta(((uintptr_t)address + size))), vm_page_quanta_size, protection);
1049		if (err) {
1050			malloc_printf("*** can't protect(%p) region for postlude guard page at %p\n",
1051						  protection, (uintptr_t)address + size);
1052		}
1053	}
1054}
1055
1056static void *
1057allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label)
1058{
1059	boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES;
1060	boolean_t purgeable = debug_flags & SCALABLE_MALLOC_PURGEABLE;
1061	mach_vm_address_t vm_addr;
1062	uintptr_t addr;
1063	mach_vm_size_t allocation_size = round_page_quanta(size);
1064	mach_vm_offset_t allocation_mask = ((mach_vm_offset_t)1 << align) - 1;
1065	int alloc_flags = VM_FLAGS_ANYWHERE | VM_MAKE_TAG(vm_page_label);
1066	kern_return_t kr;
1067
1068	if (!allocation_size) allocation_size = vm_page_quanta_size;
1069	if (add_guard_pages) {
1070		if (align > vm_page_quanta_shift) {
1071			/* <rdar://problem/16601499> alignment greater than pagesize needs more work */
1072			allocation_size += (1<<align) + vm_page_quanta_size;
1073		} else {
1074			allocation_size += 2 * vm_page_quanta_size;
1075		}
1076	}
1077
1078	if (purgeable) alloc_flags |= VM_FLAGS_PURGABLE;
1079	if (allocation_size < size) // size_t arithmetic wrapped!
1080		return NULL;
1081
1082	vm_addr = vm_page_quanta_size;
1083	kr = mach_vm_map(mach_task_self(), &vm_addr, allocation_size,
1084			allocation_mask, alloc_flags, MEMORY_OBJECT_NULL, 0, FALSE,
1085			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1086	if (kr) {
1087		szone_error(szone, 0, "can't allocate region", NULL,
1088				"*** mach_vm_map(size=%lu) failed (error code=%d)\n", size, kr);
1089		return NULL;
1090	}
1091	addr = (uintptr_t)vm_addr;
1092
1093	if (add_guard_pages) {
1094		if (align > vm_page_quanta_shift) {
1095			/* <rdar://problem/16601499> calculate the first address inside the alignment padding
1096			 * where we can place the guard page and still be aligned.
1097			 *
1098			 * |-----------------------------------------------------------|
1099			 * |leading|gp|                  alloc                  |gp| t |
1100			 * |-----------------------------------------------------------|
1101			 */
1102			uintptr_t alignaddr = ((addr + vm_page_quanta_size) + (1<<align) - 1) & ~((1<<align) - 1);
1103			size_t leading = alignaddr - addr - vm_page_quanta_size;
1104			size_t trailing = (1<<align) - vm_page_quanta_size - leading;
1105
1106			/* Unmap the excess area. */
1107			kr = mach_vm_deallocate(mach_task_self(), addr, leading);
1108			if (kr) {
1109				szone_error(szone, 0, "can't unmap excess guard region", NULL,
1110						"*** mach_vm_deallocate(addr=%p, size=%lu) failed (code=%d)", addr, leading, kr);
1111				return NULL;
1112			}
1113
1114			kr = mach_vm_deallocate(mach_task_self(), addr + allocation_size - trailing, trailing);
1115			if (kr) {
1116				szone_error(szone, 0, "can't unmap excess trailing guard region", NULL,
1117						"*** mach_vm_deallocate(addr=%p, size=%lu) failed (code=%d)", addr + allocation_size - trailing, trailing, kr);
1118				return NULL;
1119			}
1120
1121			addr = alignaddr;
1122		} else {
1123			addr += vm_page_quanta_size;
1124		}
1125		protect((void *)addr, size, PROT_NONE, debug_flags);
1126	}
1127	return (void *)addr;
1128}
1129
1130static void *
1131allocate_pages_securely(szone_t *szone, size_t size, unsigned char align, int vm_page_label)
1132{
1133	mach_vm_address_t vm_addr;
1134	uintptr_t addr;
1135	mach_vm_size_t allocation_size = round_page_quanta(size);
1136	mach_vm_offset_t allocation_mask = ((mach_vm_offset_t)1 << align) - 1;
1137	int alloc_flags = VM_FLAGS_ANYWHERE | VM_MAKE_TAG(vm_page_label);
1138	kern_return_t kr;
1139
1140	if (szone->debug_flags & DISABLE_ASLR) {
1141		return allocate_pages(szone, size, align, 0, vm_page_label);
1142	}
1143
1144	if (!allocation_size) allocation_size = vm_page_quanta_size;
1145	if (allocation_size < size) // size_t arithmetic wrapped!
1146		return NULL;
1147
1148retry:
1149	vm_addr = entropic_address;
1150	kr = mach_vm_map(mach_task_self(), &vm_addr, allocation_size,
1151			allocation_mask, alloc_flags, MEMORY_OBJECT_NULL, 0, FALSE,
1152			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1153	if (kr == KERN_NO_SPACE) {
1154		vm_addr = vm_page_quanta_size;
1155		kr = mach_vm_map(mach_task_self(), &vm_addr, allocation_size,
1156				allocation_mask, alloc_flags, MEMORY_OBJECT_NULL, 0, FALSE,
1157				VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1158	}
1159	if (kr) {
1160		szone_error(szone, 0, "can't allocate region securely", NULL,
1161				"*** mach_vm_map(size=%lu) failed (error code=%d)\n", size, kr);
1162		return NULL;
1163	}
1164	addr = (uintptr_t)vm_addr;
1165
1166	// Don't allow allocation to rise above entropic_limit (for tidiness).
1167	if (addr + allocation_size > entropic_limit) { // Exhausted current range?
1168		uintptr_t t = entropic_address;
1169		uintptr_t u = t - ENTROPIC_KABILLION;
1170
1171		if (u < t) { // provided we don't wrap, deallocate and retry, in the expanded entropic range
1172			mach_vm_deallocate(mach_task_self(), vm_addr, allocation_size);
1173			(void)__sync_bool_compare_and_swap(&entropic_address, t, u); // Just one reduction please
1174			goto retry;
1175		}
1176		// fall through to use what we got
1177	}
1178
1179	if (addr < entropic_address) { // we wrapped to find this allocation, expand the entropic range
1180		uintptr_t t = entropic_address;
1181		uintptr_t u = t - ENTROPIC_KABILLION;
1182		if (u < t)
1183			(void)__sync_bool_compare_and_swap(&entropic_address, t, u); // Just one reduction please
1184		// fall through to use what we got
1185	}
1186	return (void *)addr;
1187}
1188
1189static void
1190deallocate_pages(szone_t *szone, void *addr, size_t size, unsigned debug_flags)
1191{
1192	boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES;
1193	mach_vm_address_t vm_addr = (mach_vm_address_t)addr;
1194	mach_vm_size_t allocation_size = size;
1195	kern_return_t kr;
1196
1197	if (add_guard_pages) {
1198		vm_addr -= vm_page_quanta_size;
1199		allocation_size += 2 * vm_page_quanta_size;
1200	}
1201	kr = mach_vm_deallocate(mach_task_self(), vm_addr, allocation_size);
1202	if (kr && szone)
1203		szone_error(szone, 0, "Can't deallocate_pages region", addr, NULL);
1204}
1205
1206/* On OS X we use MADV_FREE_REUSABLE, which signals the kernel to remove the given
1207 * pages from the memory statistics for our process. However, on returning that memory
1208 * to use we have to signal that it has been reused.
1209 *
1210 * On iOS MADV_FREE is used, which does no such tinkering and madvise_reuse_range is a
1211 * no-op.
1212 */
1213#if TARGET_OS_EMBEDDED
1214# define MADVISE_STYLE MADV_FREE
1215#else
1216# define MADVISE_STYLE MADV_FREE_REUSABLE
1217#endif
1218
1219static int
1220madvise_free_range(szone_t *szone, region_t r, uintptr_t pgLo, uintptr_t pgHi, uintptr_t *last)
1221{
1222	if (pgHi > pgLo) {
1223		size_t len = pgHi - pgLo;
1224
1225		if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) {
1226			memset((void *)pgLo, SCRUBBLE_BYTE, len); // Scribble on MADV_FREEd memory
1227		}
1228
1229#if TARGET_OS_EMBEDDED
1230		if (last) {
1231			if (*last == pgLo)
1232				return 0;
1233
1234			*last = pgLo;
1235		}
1236#endif
1237
1238		MAGMALLOC_MADVFREEREGION((void *)szone, (void *)r, (void *)pgLo, len); // DTrace USDT Probe
1239		if (-1 == madvise((void *)pgLo, len, MADVISE_STYLE)) {
1240			/* -1 return: VM map entry change makes this unfit for reuse. Something evil lurks. */
1241#if DEBUG_MADVISE
1242			szone_error(szone, 0, "madvise_free_range madvise(..., MADV_FREE_REUSABLE) failed",
1243						(void *)pgLo, "length=%d\n", len);
1244#endif
1245		}
1246	}
1247	return 0;
1248}
1249
1250static int
1251madvise_reuse_range(szone_t *szone, region_t r, uintptr_t pgLo, uintptr_t phHi)
1252{
1253#if !TARGET_OS_EMBEDDED
1254	if (phHi > pgLo) {
1255		size_t len = phHi - pgLo;
1256
1257		if (madvise((void *)pgLo, len, MADV_FREE_REUSE) == -1) {
1258			/* -1 return: VM map entry change makes this unfit for reuse. Something evil lurks. */
1259#if DEBUG_MADVISE
1260			szone_error(szone, 0, "madvise_reuse_range madvise(..., MADV_FREE_REUSE) failed",
1261					sparse_region, "length=%d\n", TINY_REGION_PAYLOAD_BYTES);
1262#endif
1263			return 1;
1264		}
1265	}
1266#endif
1267	return 0;
1268}
1269
1270static kern_return_t
1271_szone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr)
1272{
1273	*ptr = (void *)address;
1274	return 0;
1275}
1276
1277/*
1278 * These commpage routines provide fast access to the logical cpu number
1279 * of the calling processor assuming no pre-emption occurs.
1280 */
1281
1282static INLINE mag_index_t
1283mag_get_thread_index(szone_t *szone)
1284{
1285	return _os_cpu_number() & (TINY_MAX_MAGAZINES - 1);
1286}
1287
1288static magazine_t *
1289mag_lock_zine_for_region_trailer(szone_t *szone, magazine_t *magazines, region_trailer_t *trailer, mag_index_t mag_index)
1290{
1291	mag_index_t refreshed_index;
1292	magazine_t *mag_ptr = &(magazines[mag_index]);
1293
1294	// Take the lock  on entry.
1295	SZONE_MAGAZINE_PTR_LOCK(szone, mag_ptr);
1296
1297	// Now in the time it took to acquire the lock, the region may have migrated
1298	// from one magazine to another. In which case the magazine lock we obtained
1299	// (namely magazines[mag_index].mag_lock) is stale. If so, keep on tryin' ...
1300	while (mag_index != (refreshed_index = trailer->mag_index)) { // Note assignment
1301
1302		SZONE_MAGAZINE_PTR_UNLOCK(szone, mag_ptr);
1303
1304		mag_index = refreshed_index;
1305		mag_ptr = &(magazines[mag_index]);
1306		SZONE_MAGAZINE_PTR_LOCK(szone, mag_ptr);
1307	}
1308
1309	return mag_ptr;
1310}
1311
1312/*******************************************************************************
1313 * Region hash implementation
1314 *
1315 * This is essentially a duplicate of the existing Large allocator hash, minus
1316 * the ability to remove entries.  The two should be combined eventually.
1317 ******************************************************************************/
1318#pragma mark region hash
1319
1320/*
1321 * hash_lookup_region_no_lock - Scan a hash ring looking for an entry for a
1322 * given region.
1323 *
1324 * FIXME: If consecutive queries of the same region are likely, a one-entry
1325 * cache would likely be a significant performance win here.
1326 */
1327static INLINE rgnhdl_t
1328hash_lookup_region_no_lock(region_t *regions, size_t num_entries, size_t shift, region_t r) {
1329	size_t	index, hash_index;
1330	rgnhdl_t	entry;
1331
1332	if (!num_entries)
1333		return 0;
1334
1335	// Multiplicative hash where the multiplier is a prime near (ULONG_MAX / phi). [phi = 1.618033...]
1336	// Since the values of (((uintptr_t)r >> HASH_BLOCKS_ALIGN) are (roughly) an ascending sequence of integers,
1337	// this hash works really well. See Knuth TAOCP, Vol. 3.
1338#if __LP64__
1339	index = hash_index = (((uintptr_t)r >> HASH_BLOCKS_ALIGN) * 11400714819323198549ULL) >> (64 - shift);
1340#else
1341	index = hash_index = (((uintptr_t)r >> HASH_BLOCKS_ALIGN) * 2654435761UL) >> (32 - shift);
1342#endif
1343	do {
1344		entry = regions + index;
1345		if (*entry == 0)
1346			return 0;
1347		if (*entry == r)
1348			return entry;
1349		if (++index == num_entries)
1350			index = 0;
1351	} while (index != hash_index);
1352	return 0;
1353}
1354
1355/*
1356 * hash_region_insert_no_lock - Insert a region into the hash ring.
1357 */
1358static void
1359hash_region_insert_no_lock(region_t *regions, size_t num_entries, size_t shift, region_t r) {
1360	size_t	index, hash_index;
1361	rgnhdl_t	entry;
1362
1363	// Multiplicative hash where the multiplier is a prime near (ULONG_MAX / phi). [phi = 1.618033...]
1364	// Since the values of (((uintptr_t)r >> HASH_BLOCKS_ALIGN) are (roughly) an ascending sequence of integers,
1365	// this hash works really well. See Knuth TAOCP, Vol. 3.
1366#if __LP64__
1367	index = hash_index = (((uintptr_t)r >> HASH_BLOCKS_ALIGN) * 11400714819323198549ULL) >> (64 - shift);
1368#else
1369	index = hash_index = (((uintptr_t)r >> HASH_BLOCKS_ALIGN) * 2654435761UL) >> (32 - shift);
1370#endif
1371	do {
1372		entry = regions + index;
1373		if (*entry == HASHRING_OPEN_ENTRY || *entry == HASHRING_REGION_DEALLOCATED) {
1374			*entry = r;
1375			return;
1376		}
1377		if (++index == num_entries)
1378			index = 0;
1379	} while (index != hash_index);
1380}
1381
1382/*
1383 * hash_regions_alloc_no_lock - Allocate space for a number of entries.  This
1384 * must be a VM allocation as to avoid recursing between allocating a new small
1385 * region, and asking the small region to allocate space for the new list of
1386 * regions.
1387 */
1388static region_t *
1389hash_regions_alloc_no_lock(szone_t *szone, size_t num_entries)
1390{
1391	size_t	size = num_entries * sizeof(region_t);
1392
1393	return allocate_pages(szone, round_page_quanta(size), 0, 0, VM_MEMORY_MALLOC);
1394}
1395
1396/*
1397 * hash_regions_grow_no_lock - Grow the hash ring, and rehash the entries.
1398 * Return the new region and new size to update the szone.  Do not deallocate
1399 * the old entries since someone may still be allocating them.
1400 */
1401static region_t *
1402hash_regions_grow_no_lock(szone_t *szone, region_t *regions, size_t old_size, size_t *mutable_shift,
1403						  size_t *new_size)
1404{
1405	// double in size and allocate memory for the regions
1406	*new_size = old_size + old_size;
1407	*mutable_shift = *mutable_shift + 1;
1408	region_t *new_regions = hash_regions_alloc_no_lock(szone, *new_size);
1409
1410	// rehash the entries into the new list
1411	size_t index;
1412	for (index = 0; index < old_size; ++index) {
1413		region_t r = regions[index];
1414		if (r != HASHRING_OPEN_ENTRY && r != HASHRING_REGION_DEALLOCATED)
1415			hash_region_insert_no_lock(new_regions, *new_size, *mutable_shift, r);
1416	}
1417	return new_regions;
1418}
1419
1420/*********************	FREE LIST UTILITIES  ************************/
1421
1422// A free list entry is comprised of a pair of pointers, previous and next.
1423// These are used to implement a doubly-linked list, which permits efficient
1424// extraction.
1425//
1426// Because the free list entries are previously freed objects, a misbehaved
1427// program may write to a pointer after it has called free() on that pointer,
1428// either by dereference or buffer overflow from an adjacent pointer. This write
1429// would then corrupt the free list's previous and next pointers, leading to a
1430// crash.  In order to detect this case, we take advantage of the fact that
1431// malloc'd pointers are known to be at least 16 byte aligned, and thus have
1432// at least 4 trailing zero bits.
1433//
1434// When an entry is added to the free list, a checksum of the previous and next
1435// pointers is calculated and written to the high four bits of the respective
1436// pointers.  Upon detection of an invalid checksum, an error is logged and NULL
1437// is returned.  Since all code which un-checksums pointers checks for a NULL
1438// return, a potentially crashing or malicious dereference is avoided at the
1439// cost of leaking the corrupted block, and any subsequent blocks on the free
1440// list of that size.
1441
1442static NOINLINE void
1443free_list_checksum_botch(szone_t *szone, free_list_t *ptr)
1444{
1445	szone_error(szone, 1, "incorrect checksum for freed object "
1446				"- object was probably modified after being freed.", ptr, NULL);
1447}
1448
1449static INLINE uintptr_t free_list_gen_checksum(uintptr_t ptr)
1450{
1451	uint8_t chk;
1452
1453	chk  = (unsigned char)(ptr >>  0);
1454	chk += (unsigned char)(ptr >>  8);
1455	chk += (unsigned char)(ptr >> 16);
1456	chk += (unsigned char)(ptr >> 24);
1457#if  __LP64__
1458	chk += (unsigned char)(ptr >> 32);
1459	chk += (unsigned char)(ptr >> 40);
1460	chk += (unsigned char)(ptr >> 48);
1461	chk += (unsigned char)(ptr >> 56);
1462#endif
1463
1464	return chk & (uintptr_t)0xF;
1465}
1466
1467#define NYBBLE 4
1468#if  __LP64__
1469#define ANTI_NYBBLE (64 - NYBBLE)
1470#else
1471#define ANTI_NYBBLE (32 - NYBBLE)
1472#endif
1473
1474static INLINE uintptr_t
1475free_list_checksum_ptr(szone_t *szone, void *ptr)
1476{
1477	uintptr_t p = (uintptr_t)ptr;
1478	return (p >> NYBBLE) | (free_list_gen_checksum(p ^ szone->cookie) << ANTI_NYBBLE); // compiles to rotate instruction
1479}
1480
1481static INLINE void *
1482free_list_unchecksum_ptr(szone_t *szone, ptr_union *ptr)
1483{
1484	ptr_union p;
1485	uintptr_t t = ptr->u;
1486
1487	t = (t << NYBBLE) | (t >> ANTI_NYBBLE); // compiles to rotate instruction
1488	p.u = t &  ~(uintptr_t)0xF;
1489
1490	if ((t & (uintptr_t)0xF) != free_list_gen_checksum(p.u ^ szone->cookie))
1491	{
1492		free_list_checksum_botch(szone, (free_list_t *)ptr);
1493		return NULL;
1494	}
1495	return p.p;
1496}
1497
1498#undef ANTI_NYBBLE
1499#undef NYBBLE
1500
1501static unsigned
1502free_list_count(szone_t *szone, free_list_t *ptr)
1503{
1504	unsigned	count = 0;
1505
1506	while (ptr) {
1507		count++;
1508		ptr = free_list_unchecksum_ptr(szone, &ptr->next);
1509	}
1510	return count;
1511}
1512
1513static INLINE void
1514recirc_list_extract(szone_t *szone, magazine_t *mag_ptr, region_trailer_t *node)
1515{
1516	// excise node from list
1517	if (NULL == node->prev)
1518		mag_ptr->firstNode = node->next;
1519	else
1520		node->prev->next = node->next;
1521
1522	if (NULL == node->next)
1523		mag_ptr->lastNode = node->prev;
1524	else
1525		node->next->prev = node->prev;
1526
1527	mag_ptr->recirculation_entries--;
1528}
1529
1530static INLINE void
1531recirc_list_splice_last(szone_t *szone, magazine_t *mag_ptr, region_trailer_t *node)
1532{
1533	if (NULL == mag_ptr->lastNode) {
1534		mag_ptr->firstNode = node;
1535		node->prev = NULL;
1536	} else {
1537		node->prev = mag_ptr->lastNode;
1538		mag_ptr->lastNode->next = node;
1539	}
1540	mag_ptr->lastNode = node;
1541	node->next = NULL;
1542	node->recirc_suitable = FALSE;
1543	mag_ptr->recirculation_entries++;
1544}
1545
1546static INLINE void
1547recirc_list_splice_first(szone_t *szone, magazine_t *mag_ptr, region_trailer_t *node)
1548{
1549	if (NULL == mag_ptr->firstNode) {
1550		mag_ptr->lastNode = node;
1551		node->next = NULL;
1552	} else {
1553		node->next = mag_ptr->firstNode;
1554		mag_ptr->firstNode->prev = node;
1555	}
1556	mag_ptr->firstNode = node;
1557	node->prev = NULL;
1558	node->recirc_suitable = FALSE;
1559	mag_ptr->recirculation_entries++;
1560}
1561
1562/* Macros used to manipulate the uint32_t quantity mag_bitmap. */
1563
1564/* BITMAPV variants are used by tiny. */
1565#if defined(__LP64__)
1566// assert(NUM_SLOTS == 64) in which case (slot >> 5) is either 0 or 1
1567#define BITMAPV_SET(bitmap,slot) 	(bitmap[(slot) >> 5] |= 1 << ((slot) & 31))
1568#define BITMAPV_CLR(bitmap,slot) 	(bitmap[(slot) >> 5] &= ~ (1 << ((slot) & 31)))
1569#define BITMAPV_BIT(bitmap,slot)	((bitmap[(slot) >> 5] >> ((slot) & 31)) & 1)
1570#define BITMAPV_CTZ(bitmap)		(__builtin_ctzl(bitmap))
1571#else
1572// assert(NUM_SLOTS == 32) in which case (slot >> 5) is always 0, so code it that way
1573#define BITMAPV_SET(bitmap,slot) 	(bitmap[0] |= 1 << (slot))
1574#define BITMAPV_CLR(bitmap,slot)	(bitmap[0] &= ~ (1 << (slot)))
1575#define BITMAPV_BIT(bitmap,slot) 	((bitmap[0] >> (slot)) & 1)
1576#define BITMAPV_CTZ(bitmap)		(__builtin_ctz(bitmap))
1577#endif
1578
1579/* BITMAPN is used by small. (slot >> 5) takes on values from 0 to 7. */
1580#define BITMAPN_SET(bitmap,slot) 	(bitmap[(slot) >> 5] |= 1 << ((slot) & 31))
1581#define BITMAPN_CLR(bitmap,slot) 	(bitmap[(slot) >> 5] &= ~ (1 << ((slot) & 31)))
1582#define BITMAPN_BIT(bitmap,slot)	((bitmap[(slot) >> 5] >> ((slot) & 31)) & 1)
1583
1584/* returns bit # of least-significant one bit, starting at 0 (undefined if !bitmap) */
1585#define BITMAP32_CTZ(bitmap)		(__builtin_ctz(bitmap[0]))
1586
1587/*********************	TINY FREE LIST UTILITIES	************************/
1588
1589// We encode the meta-headers as follows:
1590// Each quantum has an associated set of 2 bits:
1591// block_header when 1 says this block is the beginning of a block
1592// in_use when 1 says this block is in use
1593// so a block in use of size 3 is 1-1 0-X 0-X
1594// for a free block TINY_FREE_SIZE(ptr) carries the size and the bits are 1-0 X-X X-X
1595// for a block middle the bits are 0-0
1596
1597// We store the meta-header bit arrays by interleaving them 32 bits at a time.
1598// Initial 32 bits of block_header, followed by initial 32 bits of in_use, followed
1599// by next 32 bits of block_header, followed by next 32 bits of in_use, etc.
1600// This localizes memory references thereby reducing cache and TLB pressures.
1601
1602static INLINE void
1603BITARRAY_SET(uint32_t *bits, msize_t index)
1604{
1605	// index >> 5 identifies the uint32_t to manipulate in the conceptually contiguous bits array
1606	// (index >> 5) << 1 identifies the uint32_t allowing for the actual interleaving
1607	bits[(index >> 5) << 1] |= (1 << (index & 31));
1608}
1609
1610static INLINE void
1611BITARRAY_CLR(uint32_t *bits, msize_t index)
1612{
1613	bits[(index >> 5) << 1] &= ~(1 << (index & 31));
1614}
1615
1616static INLINE boolean_t
1617BITARRAY_BIT(uint32_t *bits, msize_t index)
1618{
1619	return ((bits[(index >> 5) << 1]) >> (index & 31)) & 1;
1620}
1621
1622#if 0
1623static INLINE void	bitarray_mclr(uint32_t *bits, unsigned start, unsigned end) ALWAYSINLINE;
1624
1625static INLINE void
1626bitarray_mclr(uint32_t *bits, unsigned start, unsigned end)
1627{
1628	// start >> 5 identifies the uint32_t to manipulate in the conceptually contiguous bits array
1629	// (start >> 5) << 1 identifies the uint32_t allowing for the actual interleaving
1630	uint32_t	*addr = bits + ((start >> 5) << 1);
1631
1632	uint32_t	span = end - start;
1633	start = start & 31;
1634	end = start + span;
1635
1636	if (end > 31) {
1637		addr[0] &= (0xFFFFFFFFU >> (31 - start)) >> 1;
1638		addr[2] &= (0xFFFFFFFFU << (end - 32));
1639	} else {
1640		unsigned mask = (0xFFFFFFFFU >> (31 - start)) >> 1;
1641		mask |= (0xFFFFFFFFU << end);
1642		addr[0] &= mask;
1643	}
1644}
1645#endif
1646
1647/*
1648 * Obtain the size of a free tiny block (in msize_t units).
1649 */
1650static msize_t
1651get_tiny_free_size(const void *ptr)
1652{
1653	void	*next_block = (void *)((uintptr_t)ptr + TINY_QUANTUM);
1654	void	*region_end = TINY_REGION_END(TINY_REGION_FOR_PTR(ptr));
1655
1656	// check whether the next block is outside the tiny region or a block header
1657	// if so, then the size of this block is one, and there is no stored size.
1658	if (next_block < region_end)
1659	{
1660		uint32_t	*next_header = TINY_BLOCK_HEADER_FOR_PTR(next_block);
1661		msize_t		next_index  = TINY_INDEX_FOR_PTR(next_block);
1662
1663		if (!BITARRAY_BIT(next_header, next_index))
1664			return TINY_FREE_SIZE(ptr);
1665	}
1666	return 1;
1667}
1668
1669/*
1670 * Get the size of the previous free block, which is stored in the last two
1671 * bytes of the block.  If the previous block is not free, then the result is
1672 * undefined.
1673 */
1674static msize_t
1675get_tiny_previous_free_msize(const void *ptr)
1676{
1677	// check whether the previous block is in the tiny region and a block header
1678	// if so, then the size of the previous block is one, and there is no stored
1679	// size.
1680	if (ptr != TINY_REGION_FOR_PTR(ptr))
1681	{
1682		void		*prev_block = (void *)((uintptr_t)ptr - TINY_QUANTUM);
1683		uint32_t	*prev_header = TINY_BLOCK_HEADER_FOR_PTR(prev_block);
1684		msize_t		prev_index  = TINY_INDEX_FOR_PTR(prev_block);
1685		if (BITARRAY_BIT(prev_header, prev_index))
1686			return 1;
1687		return TINY_PREVIOUS_MSIZE(ptr);
1688	}
1689	// don't read possibly unmapped memory before the beginning of the region
1690	return 0;
1691}
1692
1693static INLINE msize_t
1694get_tiny_meta_header(const void *ptr, boolean_t *is_free)
1695{
1696	// returns msize and is_free
1697	// may return 0 for the msize component (meaning 65536)
1698	uint32_t	*block_header;
1699	msize_t	index;
1700
1701	block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
1702	index = TINY_INDEX_FOR_PTR(ptr);
1703
1704	msize_t midx = (index >> 5) << 1;
1705	uint32_t mask = 1 << (index & 31);
1706	*is_free = 0;
1707	if (0 == (block_header[midx] & mask)) // if (!BITARRAY_BIT(block_header, index))
1708		return 0;
1709	if (0 == (block_header[midx + 1] & mask)) { // if (!BITARRAY_BIT(in_use, index))
1710		*is_free = 1;
1711		return get_tiny_free_size(ptr);
1712	}
1713
1714	// index >> 5 identifies the uint32_t to manipulate in the conceptually contiguous bits array
1715	// (index >> 5) << 1 identifies the uint32_t allowing for the actual interleaving
1716#if defined(__LP64__)
1717	// The return value, msize, is computed as the distance to the next 1 bit in block_header.
1718	// That's guaranteed to be somewhwere in the next 64 bits. And those bits could span three
1719	// uint32_t block_header elements. Collect the bits into a single uint64_t and measure up with ffsl.
1720	uint32_t	*addr = ((uint32_t *)block_header) + ((index >> 5) << 1);
1721	uint32_t	bitidx = index & 31;
1722	uint64_t	word_lo = addr[0];
1723	uint64_t	word_mid = addr[2];
1724	uint64_t	word_hi = addr[4];
1725	uint64_t	word_lomid = (word_lo >> bitidx) | (word_mid << (32 - bitidx));
1726	uint64_t	word = bitidx ? word_lomid | (word_hi << (64 - bitidx)) : word_lomid;
1727	uint32_t	result = __builtin_ffsl(word >> 1);
1728#else
1729	// The return value, msize, is computed as the distance to the next 1 bit in block_header.
1730	// That's guaranteed to be somwhwere in the next 32 bits. And those bits could span two
1731	// uint32_t block_header elements. Collect the bits into a single uint32_t and measure up with ffs.
1732	uint32_t	*addr = ((uint32_t *)block_header) + ((index >> 5) << 1);
1733	uint32_t	bitidx = index & 31;
1734	uint32_t	word = bitidx ? (addr[0] >> bitidx) | (addr[2] << (32 - bitidx)) : addr[0];
1735	uint32_t	result = __builtin_ffs(word >> 1);
1736#endif
1737	return result;
1738}
1739
1740static INLINE void
1741set_tiny_meta_header_in_use(const void *ptr, msize_t msize)
1742{
1743	uint32_t	*block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
1744	msize_t	index = TINY_INDEX_FOR_PTR(ptr);
1745	msize_t	clr_msize = msize - 1;
1746	msize_t	midx = (index >> 5) << 1;
1747	uint32_t	val = (1 << (index & 31));
1748
1749#if DEBUG_MALLOC
1750	if (msize >= NUM_TINY_SLOTS)
1751		malloc_printf("set_tiny_meta_header_in_use() invariant broken %p %d\n", ptr, msize);
1752	if ((unsigned)index + (unsigned)msize > 0x10000)
1753		malloc_printf("set_tiny_meta_header_in_use() invariant broken (2) %p %d\n", ptr, msize);
1754#endif
1755
1756	block_header[midx] |= val; // BITARRAY_SET(block_header, index);
1757	block_header[midx + 1] |= val; // BITARRAY_SET(in_use, index);
1758
1759	// bitarray_mclr(block_header, index, end_bit);
1760	// bitarray_mclr(in_use, index, end_bit);
1761
1762	index++;
1763	midx = (index >> 5) << 1;
1764
1765	unsigned	start = index & 31;
1766	unsigned	end = start + clr_msize;
1767
1768#if defined(__LP64__)
1769	if (end > 63) {
1770		unsigned mask0 = (0xFFFFFFFFU >> (31 - start)) >> 1;
1771		unsigned mask1 = (0xFFFFFFFFU << (end - 64));
1772		block_header[midx + 0] &= mask0; // clear header
1773		block_header[midx + 1] &= mask0; // clear in_use
1774		block_header[midx + 2] = 0; // clear header
1775		block_header[midx + 3] = 0; // clear in_use
1776		block_header[midx + 4] &= mask1; // clear header
1777		block_header[midx + 5] &= mask1; // clear in_use
1778	} else
1779#endif
1780		if (end > 31) {
1781			unsigned mask0 = (0xFFFFFFFFU >> (31 - start)) >> 1;
1782			unsigned mask1 = (0xFFFFFFFFU << (end - 32));
1783			block_header[midx + 0] &= mask0;
1784			block_header[midx + 1] &= mask0;
1785			block_header[midx + 2] &= mask1;
1786			block_header[midx + 3] &= mask1;
1787		} else {
1788			unsigned mask = (0xFFFFFFFFU >> (31 - start)) >> 1;
1789			mask |= (0xFFFFFFFFU << end);
1790			block_header[midx + 0] &= mask;
1791			block_header[midx + 1] &= mask;
1792		}
1793
1794	// we set the block_header bit for the following block to reaffirm next block is a block
1795	index += clr_msize;
1796	midx = (index >> 5) << 1;
1797	val = (1 << (index & 31));
1798	block_header[midx] |= val; // BITARRAY_SET(block_header, (index+clr_msize));
1799#if DEBUG_MALLOC
1800	{
1801		boolean_t ff;
1802		msize_t	mf;
1803
1804		mf = get_tiny_meta_header(ptr, &ff);
1805		if (msize != mf) {
1806			malloc_printf("setting header for tiny in_use %p : %d\n", ptr, msize);
1807			malloc_printf("reading header for tiny %p : %d %d\n", ptr, mf, ff);
1808		}
1809	}
1810#endif
1811}
1812
1813static INLINE void
1814set_tiny_meta_header_in_use_1(const void *ptr) // As above with msize == 1
1815{
1816	uint32_t	*block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
1817	msize_t	index = TINY_INDEX_FOR_PTR(ptr);
1818	msize_t	midx = (index >> 5) << 1;
1819	uint32_t	val = (1 << (index & 31));
1820
1821	block_header[midx] |= val; // BITARRAY_SET(block_header, index);
1822	block_header[midx + 1] |= val; // BITARRAY_SET(in_use, index);
1823
1824	index++;
1825	midx = (index >> 5) << 1;
1826	val = (1 << (index & 31));
1827
1828	block_header[midx] |= val; // BITARRAY_SET(block_header, (index+clr_msize))
1829}
1830
1831static INLINE void
1832set_tiny_meta_header_middle(const void *ptr)
1833{
1834	// indicates this block is in the middle of an in use block
1835	uint32_t	*block_header;
1836	uint32_t	*in_use;
1837	msize_t	index;
1838
1839	block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
1840	in_use = TINY_INUSE_FOR_HEADER(block_header);
1841	index = TINY_INDEX_FOR_PTR(ptr);
1842
1843	BITARRAY_CLR(block_header, index);
1844	BITARRAY_CLR(in_use, index);
1845}
1846
1847static INLINE void
1848set_tiny_meta_header_free(const void *ptr, msize_t msize)
1849{
1850	// !msize is acceptable and means 65536
1851	uint32_t	*block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
1852	msize_t	index = TINY_INDEX_FOR_PTR(ptr);
1853	msize_t	midx = (index >> 5) << 1;
1854	uint32_t	val = (1 << (index & 31));
1855
1856#if DEBUG_MALLOC
1857	if ((unsigned)index + (unsigned)msize > 0x10000) {
1858		malloc_printf("setting header for tiny free %p msize too large: %d\n", ptr, msize);
1859	}
1860#endif
1861
1862	block_header[midx] |= val; // BITARRAY_SET(block_header, index);
1863	block_header[midx + 1] &= ~val; // BITARRAY_CLR(in_use, index);
1864
1865	// mark the end of this block if msize is > 1.  For msize == 0, the whole
1866	// region is free, so there is no following block. For msize == 1, there is
1867	// no space to write the size on 64 bit systems.  The size for 1 quantum
1868	// blocks is computed from the metadata bitmaps.
1869	if (msize > 1) {
1870		void	*follower = FOLLOWING_TINY_PTR(ptr, msize);
1871		TINY_PREVIOUS_MSIZE(follower) = msize;
1872		TINY_FREE_SIZE(ptr) = msize;
1873	}
1874	if (msize == 0) {
1875		TINY_FREE_SIZE(ptr) = msize;
1876	}
1877#if DEBUG_MALLOC
1878	boolean_t	ff;
1879	msize_t	mf = get_tiny_meta_header(ptr, &ff);
1880	if ((msize != mf) || !ff) {
1881		malloc_printf("setting header for tiny free %p : %u\n", ptr, msize);
1882		malloc_printf("reading header for tiny %p : %u %u\n", ptr, mf, ff);
1883	}
1884#endif
1885}
1886
1887static INLINE boolean_t
1888tiny_meta_header_is_free(const void *ptr)
1889{
1890	uint32_t	*block_header;
1891	uint32_t	*in_use;
1892	msize_t	index;
1893
1894	block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
1895	in_use = TINY_INUSE_FOR_HEADER(block_header);
1896	index = TINY_INDEX_FOR_PTR(ptr);
1897	if (!BITARRAY_BIT(block_header, index))
1898		return 0;
1899	return !BITARRAY_BIT(in_use, index);
1900}
1901
1902static INLINE void *
1903tiny_previous_preceding_free(void *ptr, msize_t *prev_msize)
1904{
1905	// returns the previous block, assuming and verifying it's free
1906	uint32_t	*block_header;
1907	uint32_t	*in_use;
1908	msize_t	index;
1909	msize_t	previous_msize;
1910	msize_t	previous_index;
1911	void	*previous_ptr;
1912
1913	block_header = TINY_BLOCK_HEADER_FOR_PTR(ptr);
1914	in_use = TINY_INUSE_FOR_HEADER(block_header);
1915	index = TINY_INDEX_FOR_PTR(ptr);
1916
1917	if (!index)
1918		return NULL;
1919	if ((previous_msize = get_tiny_previous_free_msize(ptr)) > index)
1920		return NULL;
1921
1922	previous_index = index - previous_msize;
1923	previous_ptr = (void *)((uintptr_t)TINY_REGION_FOR_PTR(ptr) + TINY_BYTES_FOR_MSIZE(previous_index));
1924	if (!BITARRAY_BIT(block_header, previous_index))
1925		return NULL;
1926	if (BITARRAY_BIT(in_use, previous_index))
1927		return NULL;
1928	if (get_tiny_free_size(previous_ptr) != previous_msize)
1929		return NULL;
1930
1931	// conservative check did match true check
1932	*prev_msize = previous_msize;
1933	return previous_ptr;
1934}
1935
1936/*
1937 * Adds an item to the proper free list, and also marks the meta-header of the
1938 * block properly.
1939 * Assumes szone has been locked
1940 */
1941static void
1942tiny_free_list_add_ptr(szone_t *szone, magazine_t *tiny_mag_ptr, void *ptr, msize_t msize)
1943{
1944	grain_t	slot = (!msize || (msize >= NUM_TINY_SLOTS)) ? NUM_TINY_SLOTS - 1 : msize - 1;
1945	free_list_t	*free_ptr = ptr;
1946	free_list_t	*free_head = tiny_mag_ptr->mag_free_list[slot];
1947
1948#if DEBUG_MALLOC
1949	if (LOG(szone,ptr)) {
1950		malloc_printf("in %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
1951	}
1952	if (((uintptr_t)ptr) & (TINY_QUANTUM - 1)) {
1953		szone_error(szone, 1, "tiny_free_list_add_ptr: Unaligned ptr", ptr, NULL);
1954	}
1955#endif
1956	set_tiny_meta_header_free(ptr, msize);
1957	if (free_head) {
1958#if DEBUG_MALLOC
1959		if (free_list_unchecksum_ptr(szone, &free_head->previous)) {
1960			szone_error(szone, 1, "tiny_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr,
1961						"ptr=%p slot=%d free_head=%p previous=%p\n", ptr, slot, (void *)free_head, free_head->previous.p);
1962		}
1963		if (! tiny_meta_header_is_free(free_head)) {
1964			szone_error(szone, 1, "tiny_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr,
1965						"ptr=%p slot=%d free_head=%p\n", ptr, slot, (void *)free_head);
1966		}
1967#endif
1968		free_head->previous.u = free_list_checksum_ptr(szone, free_ptr);
1969	} else {
1970		BITMAPV_SET(tiny_mag_ptr->mag_bitmap, slot);
1971	}
1972	free_ptr->previous.u = free_list_checksum_ptr(szone, NULL);
1973	free_ptr->next.u = free_list_checksum_ptr(szone, free_head);
1974
1975	tiny_mag_ptr->mag_free_list[slot] = free_ptr;
1976}
1977
1978/*
1979 * Removes the item pointed to by ptr in the proper free list.
1980 * Assumes szone has been locked
1981 */
1982static void
1983tiny_free_list_remove_ptr(szone_t *szone, magazine_t *tiny_mag_ptr, void *ptr, msize_t msize)
1984{
1985	grain_t	slot = (!msize || (msize >= NUM_TINY_SLOTS)) ? NUM_TINY_SLOTS - 1 : msize - 1;
1986	free_list_t	*free_ptr = ptr, *next, *previous;
1987
1988	next = free_list_unchecksum_ptr(szone, &free_ptr->next);
1989	previous = free_list_unchecksum_ptr(szone, &free_ptr->previous);
1990
1991#if DEBUG_MALLOC
1992	if (LOG(szone,ptr)) {
1993		malloc_printf("In %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
1994	}
1995#endif
1996	if (!previous) {
1997		// The block to remove is the head of the free list
1998#if DEBUG_MALLOC
1999		if (tiny_mag_ptr->mag_free_list[slot] != ptr) {
2000			szone_error(szone, 1, "tiny_free_list_remove_ptr: Internal invariant broken (tiny_mag_ptr->mag_free_list[slot])", ptr,
2001						"ptr=%p slot=%d msize=%d tiny_mag_ptr->mag_free_list[slot]=%p\n",
2002						ptr, slot, msize, (void *)tiny_mag_ptr->mag_free_list[slot]);
2003			return;
2004		}
2005#endif
2006		tiny_mag_ptr->mag_free_list[slot] = next;
2007		if (!next) BITMAPV_CLR(tiny_mag_ptr->mag_bitmap, slot);
2008	} else {
2009		// We know free_ptr is already checksummed, so we don't need to do it
2010		// again.
2011		previous->next = free_ptr->next;
2012	}
2013	if (next) {
2014		// We know free_ptr is already checksummed, so we don't need to do it
2015		// again.
2016		next->previous = free_ptr->previous;
2017	}
2018}
2019
2020/*
2021 * tiny_region_for_ptr_no_lock - Returns the tiny region containing the pointer,
2022 * or NULL if not found.
2023 */
2024static INLINE region_t
2025tiny_region_for_ptr_no_lock(szone_t *szone, const void *ptr)
2026{
2027	rgnhdl_t r = hash_lookup_region_no_lock(szone->tiny_region_generation->hashed_regions,
2028											szone->tiny_region_generation->num_regions_allocated,
2029											szone->tiny_region_generation->num_regions_allocated_shift,
2030											TINY_REGION_FOR_PTR(ptr));
2031	return r ? *r : r;
2032}
2033
2034static void
2035tiny_finalize_region(szone_t *szone, magazine_t *tiny_mag_ptr) {
2036	void      *last_block, *previous_block;
2037	uint32_t  *last_header;
2038	msize_t   last_msize, previous_msize, last_index;
2039
2040	// It is possible that the block prior to the last block in the region has
2041	// been free'd, but was not coalesced with the free bytes at the end of the
2042	// block, since we treat the bytes at the end of the region as "in use" in
2043	// the meta headers. Attempt to coalesce the last block with the previous
2044	// block, so we don't violate the "no consecutive free blocks" invariant.
2045	//
2046	// FIXME: Need to investigate how much work would be required to increase
2047	//        'mag_bytes_free_at_end' when freeing the preceding block, rather
2048	//        than performing this workaround.
2049	//
2050
2051	if (tiny_mag_ptr->mag_bytes_free_at_end) {
2052		last_block = (void *)
2053		((uintptr_t)TINY_REGION_END(tiny_mag_ptr->mag_last_region) - tiny_mag_ptr->mag_bytes_free_at_end);
2054		last_msize = TINY_MSIZE_FOR_BYTES(tiny_mag_ptr->mag_bytes_free_at_end);
2055		last_header = TINY_BLOCK_HEADER_FOR_PTR(last_block);
2056		last_index  = TINY_INDEX_FOR_PTR(last_block);
2057
2058		// Before anything we transform any remaining mag_bytes_free_at_end into a
2059		// regular free block.  We take special care here to update the bitfield
2060		// information, since we are bypassing the normal free codepath.  If there
2061		// is more than one quanta worth of memory in mag_bytes_free_at_end, then
2062		// there will be two block headers:
2063		// 1) header for the free space at end, msize = 1
2064		// 2) header inserted by set_tiny_meta_header_in_use after block
2065		// We must clear the second one so that when the free block's size is
2066		// queried, we do not think the block is only 1 quantum in size because
2067		// of the second set header bit.
2068		if (last_index != (NUM_TINY_BLOCKS - 1))
2069			BITARRAY_CLR(last_header, (last_index + 1));
2070
2071		previous_block = tiny_previous_preceding_free(last_block, &previous_msize);
2072		if (previous_block) {
2073			set_tiny_meta_header_middle(last_block);
2074			tiny_free_list_remove_ptr(szone, tiny_mag_ptr, previous_block, previous_msize);
2075			last_block = previous_block;
2076			last_msize += previous_msize;
2077		}
2078
2079		// splice last_block into the free list
2080		tiny_free_list_add_ptr(szone, tiny_mag_ptr, last_block, last_msize);
2081		tiny_mag_ptr->mag_bytes_free_at_end = 0;
2082	}
2083
2084#if ASLR_INTERNAL
2085	// Coalesce the big free block at start with any following free blocks
2086	if (tiny_mag_ptr->mag_bytes_free_at_start) {
2087		last_block = TINY_REGION_ADDRESS(tiny_mag_ptr->mag_last_region);
2088		last_msize = TINY_MSIZE_FOR_BYTES(tiny_mag_ptr->mag_bytes_free_at_start);
2089
2090		void *next_block = (void *) ((uintptr_t)last_block + tiny_mag_ptr->mag_bytes_free_at_start);
2091
2092		// clear the in use bit we were using to mark the end of the big start block
2093		set_tiny_meta_header_middle((void *)((uintptr_t)next_block - TINY_QUANTUM));
2094
2095		// Coalesce the big start block with any following free blocks
2096		if (tiny_meta_header_is_free(next_block)) {
2097			msize_t next_msize = get_tiny_free_size(next_block);
2098			set_tiny_meta_header_middle(next_block);
2099			tiny_free_list_remove_ptr(szone, tiny_mag_ptr, next_block, next_msize);
2100			last_msize += next_msize;
2101		}
2102
2103		// splice last_block into the free list
2104		tiny_free_list_add_ptr(szone, tiny_mag_ptr, last_block, last_msize);
2105		tiny_mag_ptr->mag_bytes_free_at_start = 0;
2106	}
2107#endif
2108
2109	tiny_mag_ptr->mag_last_region = NULL;
2110}
2111
2112static int
2113tiny_free_detach_region(szone_t *szone, magazine_t *tiny_mag_ptr, region_t r) {
2114	uintptr_t	start = (uintptr_t)TINY_REGION_ADDRESS(r);
2115	uintptr_t	current = start;
2116	uintptr_t	limit =  (uintptr_t)TINY_REGION_END(r);
2117	boolean_t	is_free;
2118	msize_t	msize;
2119	int		total_alloc = 0;
2120
2121	while (current < limit) {
2122		msize = get_tiny_meta_header((void *)current, &is_free);
2123		if (is_free && !msize && (current == start)) {
2124			// first block is all free
2125			break;
2126		}
2127		if (!msize) {
2128#if DEBUG_MALLOC
2129			malloc_printf("*** tiny_free_detach_region error with %p: msize=%d is_free =%d\n",
2130						  (void *)current, msize, is_free);
2131#endif
2132			break;
2133		}
2134		if (is_free) {
2135			tiny_free_list_remove_ptr(szone, tiny_mag_ptr, (void *)current, msize);
2136		} else {
2137			total_alloc++;
2138		}
2139		current += TINY_BYTES_FOR_MSIZE(msize);
2140	}
2141	return total_alloc;
2142}
2143
2144static size_t
2145tiny_free_reattach_region(szone_t *szone, magazine_t *tiny_mag_ptr, region_t r) {
2146	uintptr_t	start = (uintptr_t)TINY_REGION_ADDRESS(r);
2147	uintptr_t	current = start;
2148	uintptr_t	limit =  (uintptr_t)TINY_REGION_END(r);
2149	boolean_t	is_free;
2150	msize_t	msize;
2151	size_t	total_alloc = 0;
2152
2153	while (current < limit) {
2154		msize = get_tiny_meta_header((void *)current, &is_free);
2155		if (is_free && !msize && (current == start)) {
2156			// first block is all free
2157			break;
2158		}
2159		if (!msize) {
2160#if DEBUG_MALLOC
2161			malloc_printf("*** tiny_free_reattach_region error with %p: msize=%d is_free =%d\n",
2162						  (void *)current, msize, is_free);
2163#endif
2164			break;
2165		}
2166		if (is_free) {
2167			tiny_free_list_add_ptr(szone, tiny_mag_ptr, (void *)current, msize);
2168		} else {
2169			total_alloc += TINY_BYTES_FOR_MSIZE(msize);
2170		}
2171		current += TINY_BYTES_FOR_MSIZE(msize);
2172	}
2173	return total_alloc;
2174}
2175
2176typedef struct {
2177	uint8_t pnum, size;
2178} tiny_pg_pair_t;
2179
2180static void NOINLINE /* want private stack frame for automatic array */
2181tiny_free_scan_madvise_free(szone_t *szone, magazine_t *depot_ptr, region_t r) {
2182	uintptr_t	start = (uintptr_t)TINY_REGION_ADDRESS(r);
2183	uintptr_t	current = start;
2184	uintptr_t	limit =  (uintptr_t)TINY_REGION_END(r);
2185	boolean_t	is_free;
2186	msize_t	msize;
2187	tiny_pg_pair_t advisory[((TINY_REGION_PAYLOAD_BYTES + vm_page_quanta_size - 1) >> vm_page_quanta_shift) >> 1]; // 256bytes stack allocated
2188	int		advisories = 0;
2189
2190	// Scan the metadata identifying blocks which span one or more pages. Mark the pages MADV_FREE taking care to preserve free list
2191	// management data.
2192	while (current < limit) {
2193		msize = get_tiny_meta_header((void *)current, &is_free);
2194		if (is_free && !msize && (current == start)) {
2195			// first block is all free
2196#if DEBUG_MALLOC
2197			malloc_printf("*** tiny_free_scan_madvise_free first block is all free! %p: msize=%d is_free =%d\n",
2198						  (void *)current, msize, is_free);
2199#endif
2200			uintptr_t pgLo = round_page_quanta(start + sizeof(free_list_t) + sizeof(msize_t));
2201			uintptr_t pgHi = trunc_page_quanta(start + TINY_REGION_SIZE - sizeof(msize_t));
2202
2203			if (pgLo < pgHi) {
2204				advisory[advisories].pnum = (pgLo - start) >> vm_page_quanta_shift;
2205				advisory[advisories].size = (pgHi - pgLo) >> vm_page_quanta_shift;
2206				advisories++;
2207			}
2208			break;
2209		}
2210		if (!msize) {
2211#if DEBUG_MALLOC
2212			malloc_printf("*** tiny_free_scan_madvise_free error with %p: msize=%d is_free =%d\n",
2213						  (void *)current, msize, is_free);
2214#endif
2215			break;
2216		}
2217		if (is_free) {
2218			uintptr_t pgLo = round_page_quanta(current + sizeof(free_list_t) + sizeof(msize_t));
2219			uintptr_t pgHi = trunc_page_quanta(current + TINY_BYTES_FOR_MSIZE(msize) - sizeof(msize_t));
2220
2221			if (pgLo < pgHi) {
2222				advisory[advisories].pnum = (pgLo - start) >> vm_page_quanta_shift;
2223				advisory[advisories].size = (pgHi - pgLo) >> vm_page_quanta_shift;
2224				advisories++;
2225			}
2226		}
2227		current += TINY_BYTES_FOR_MSIZE(msize);
2228	}
2229
2230	if (advisories > 0) {
2231		int i;
2232
2233		// So long as the following hold for this region:
2234		// (1) No malloc()'s are ever performed from the depot (hence free pages remain free,)
2235		// (2) The region is not handed over to a per-CPU magazine (where malloc()'s could be performed),
2236		// (3) The entire region is not mumap()'d (so the madvise's are applied to the intended addresses),
2237		// then the madvise opportunities collected just above can be applied outside all locks.
2238		// (1) is ensured by design, (2) and (3) are ensured by bumping the globally visible counter node->pinned_to_depot.
2239
2240		OSAtomicIncrement32Barrier(&(REGION_TRAILER_FOR_TINY_REGION(r)->pinned_to_depot));
2241		SZONE_MAGAZINE_PTR_UNLOCK(szone, depot_ptr);
2242		for (i = 0; i < advisories; ++i) {
2243			uintptr_t addr = (advisory[i].pnum << vm_page_quanta_shift) + start;
2244			size_t size = advisory[i].size << vm_page_quanta_shift;
2245
2246			madvise_free_range(szone, r, addr, addr + size, NULL);
2247		}
2248		SZONE_MAGAZINE_PTR_LOCK(szone, depot_ptr);
2249		OSAtomicDecrement32Barrier(&(REGION_TRAILER_FOR_TINY_REGION(r)->pinned_to_depot));
2250	}
2251}
2252
2253static region_t
2254tiny_free_try_depot_unmap_no_lock(szone_t *szone, magazine_t *depot_ptr, region_trailer_t *node)
2255{
2256	if (0 < node->bytes_used ||
2257		0 < node->pinned_to_depot ||
2258		depot_ptr->recirculation_entries < (szone->num_tiny_magazines * 2)) {
2259		return NULL;
2260	}
2261
2262	// disconnect node from Depot
2263	recirc_list_extract(szone, depot_ptr, node);
2264
2265	// Iterate the region pulling its free entries off the (locked) Depot's free list
2266	region_t sparse_region = TINY_REGION_FOR_PTR(node);
2267	int objects_in_use = tiny_free_detach_region(szone, depot_ptr, sparse_region);
2268
2269	if (0 == objects_in_use) {
2270		// Invalidate the hash table entry for this region with HASHRING_REGION_DEALLOCATED.
2271		// Using HASHRING_REGION_DEALLOCATED preserves the collision chain, using HASHRING_OPEN_ENTRY (0) would not.
2272		rgnhdl_t pSlot = hash_lookup_region_no_lock(szone->tiny_region_generation->hashed_regions,
2273													szone->tiny_region_generation->num_regions_allocated,
2274													szone->tiny_region_generation->num_regions_allocated_shift, sparse_region);
2275		if (NULL == pSlot) {
2276			szone_error(szone, 1, "tiny_free_try_depot_unmap_no_lock hash lookup failed:", NULL, "%p\n", sparse_region);
2277			return NULL;
2278		}
2279		*pSlot = HASHRING_REGION_DEALLOCATED;
2280		depot_ptr->num_bytes_in_magazine -= TINY_REGION_PAYLOAD_BYTES;
2281		__sync_fetch_and_add( &(szone->num_tiny_regions_dealloc), 1); // Atomically increment num_tiny_regions_dealloc
2282
2283		// Caller will transfer ownership of the region back to the OS with no locks held
2284		MAGMALLOC_DEALLOCREGION((void *)szone, (void *)sparse_region, TINY_REGION_SIZE); // DTrace USDT Probe
2285		return sparse_region;
2286	} else {
2287		szone_error(szone, 1, "tiny_free_try_depot_unmap_no_lock objects_in_use not zero:", NULL, "%d\n", objects_in_use);
2288		return NULL;
2289	}
2290}
2291
2292static boolean_t
2293tiny_free_do_recirc_to_depot(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index)
2294{
2295	// The entire magazine crossed the "emptiness threshold". Transfer a region
2296	// from this magazine to the Depot. Choose a region that itself has crossed the emptiness threshold (i.e
2297	// is at least fraction "f" empty.) Such a region will be marked "suitable" on the recirculation list.
2298	region_trailer_t *node = tiny_mag_ptr->firstNode;
2299
2300	while (node && !node->recirc_suitable) {
2301		node = node->next;
2302	}
2303
2304	if (NULL == node) {
2305#if DEBUG_MALLOC
2306		malloc_printf("*** tiny_free_do_recirc_to_depot end of list\n");
2307#endif
2308		return TRUE; // Caller must SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
2309	}
2310
2311	region_t sparse_region = TINY_REGION_FOR_PTR(node);
2312
2313	// Deal with unclaimed memory -- mag_bytes_free_at_end or mag_bytes_free_at_start
2314	if (sparse_region == tiny_mag_ptr->mag_last_region && (tiny_mag_ptr->mag_bytes_free_at_end || tiny_mag_ptr->mag_bytes_free_at_start)) {
2315		tiny_finalize_region(szone, tiny_mag_ptr);
2316	}
2317
2318	// disconnect "suitable" node from magazine
2319	recirc_list_extract(szone, tiny_mag_ptr, node);
2320
2321	// Iterate the region pulling its free entries off its (locked) magazine's free list
2322	int objects_in_use = tiny_free_detach_region(szone, tiny_mag_ptr, sparse_region);
2323	magazine_t *depot_ptr = &(szone->tiny_magazines[DEPOT_MAGAZINE_INDEX]);
2324
2325	// hand over the region to the (locked) Depot
2326	SZONE_MAGAZINE_PTR_LOCK(szone,depot_ptr);
2327	// this will cause tiny_free_list_add_ptr called by tiny_free_reattach_region to use
2328	// the depot as its target magazine, rather than magazine formerly associated with sparse_region
2329	MAGAZINE_INDEX_FOR_TINY_REGION(sparse_region) = DEPOT_MAGAZINE_INDEX;
2330	node->pinned_to_depot = 0;
2331
2332	// Iterate the region putting its free entries on Depot's free list
2333	size_t bytes_inplay = tiny_free_reattach_region(szone, depot_ptr, sparse_region);
2334
2335	tiny_mag_ptr->mag_num_bytes_in_objects -= bytes_inplay;
2336	tiny_mag_ptr->num_bytes_in_magazine -= TINY_REGION_PAYLOAD_BYTES;
2337	tiny_mag_ptr->mag_num_objects -= objects_in_use;
2338
2339	SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr); // Unlock the originating magazine
2340
2341	depot_ptr->mag_num_bytes_in_objects += bytes_inplay;
2342	depot_ptr->num_bytes_in_magazine += TINY_REGION_PAYLOAD_BYTES;
2343	depot_ptr->mag_num_objects += objects_in_use;
2344
2345	// connect to Depot as last node
2346	recirc_list_splice_last(szone, depot_ptr, node);
2347
2348	MAGMALLOC_RECIRCREGION((void *)szone, (int)mag_index, (void *)sparse_region, TINY_REGION_SIZE,
2349						   (int)BYTES_USED_FOR_TINY_REGION(sparse_region));  // DTrace USDT Probe
2350
2351	// Mark free'd dirty pages with MADV_FREE to reduce memory pressure
2352	tiny_free_scan_madvise_free(szone, depot_ptr, sparse_region);
2353
2354	// If the region is entirely empty vm_deallocate() it outside the depot lock
2355	region_t r_dealloc = tiny_free_try_depot_unmap_no_lock(szone, depot_ptr, node);
2356	SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
2357	if (r_dealloc)
2358		deallocate_pages(szone, r_dealloc, TINY_REGION_SIZE, 0);
2359	return FALSE; // Caller need not unlock the originating magazine
2360}
2361
2362static region_t
2363tiny_find_msize_region(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index, msize_t msize)
2364{
2365	free_list_t		*ptr;
2366	grain_t		slot = msize - 1;
2367	free_list_t		**free_list = tiny_mag_ptr->mag_free_list;
2368	free_list_t		**the_slot = free_list + slot;
2369	free_list_t		**limit;
2370#if defined(__LP64__)
2371	uint64_t		bitmap;
2372#else
2373	uint32_t		bitmap;
2374#endif
2375	// Assumes we've locked the magazine
2376	CHECK_MAGAZINE_PTR_LOCKED(szone, tiny_mag_ptr, __PRETTY_FUNCTION__);
2377
2378	// Look for an exact match by checking the freelist for this msize.
2379	ptr = *the_slot;
2380	if (ptr)
2381		return TINY_REGION_FOR_PTR(ptr);
2382
2383	// Mask off the bits representing slots holding free blocks smaller than the
2384	// size we need.  If there are no larger free blocks, try allocating from
2385	// the free space at the end of the tiny region.
2386#if defined(__LP64__)
2387	bitmap = ((uint64_t *)(tiny_mag_ptr->mag_bitmap))[0] & ~ ((1ULL << slot) - 1);
2388#else
2389	bitmap = tiny_mag_ptr->mag_bitmap[0] & ~ ((1 << slot) - 1);
2390#endif
2391	if (!bitmap)
2392		return NULL;
2393
2394	slot = BITMAPV_CTZ(bitmap);
2395	limit = free_list + NUM_TINY_SLOTS - 1;
2396	free_list += slot;
2397
2398	if (free_list < limit) {
2399		ptr = *free_list;
2400		if (ptr)
2401			return TINY_REGION_FOR_PTR(ptr);
2402		else {
2403			/* Shouldn't happen. Fall through to look at last slot. */
2404#if DEBUG_MALLOC
2405			malloc_printf("in tiny_find_msize_region(), mag_bitmap out of sync, slot=%d\n",slot);
2406#endif
2407		}
2408	}
2409
2410	// We are now looking at the last slot, which contains blocks equal to, or
2411	// due to coalescing of free blocks, larger than (NUM_TINY_SLOTS - 1) * tiny quantum size.
2412	ptr = *limit;
2413	if (ptr)
2414		return TINY_REGION_FOR_PTR(ptr);
2415
2416	return NULL;
2417}
2418
2419static boolean_t
2420tiny_get_region_from_depot(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index, msize_t msize)
2421{
2422	magazine_t *depot_ptr = &(szone->tiny_magazines[DEPOT_MAGAZINE_INDEX]);
2423
2424	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
2425	if (szone->num_tiny_magazines == 1) // Uniprocessor, single magazine, so no recirculation necessary
2426		return 0;
2427
2428#if DEBUG_MALLOC
2429	if (DEPOT_MAGAZINE_INDEX == mag_index) {
2430		szone_error(szone, 1, "tiny_get_region_from_depot called for magazine index -1", NULL, NULL);
2431		return 0;
2432	}
2433#endif
2434
2435	SZONE_MAGAZINE_PTR_LOCK(szone,depot_ptr);
2436
2437	// Appropriate a Depot'd region that can satisfy requested msize.
2438	region_trailer_t *node;
2439	region_t sparse_region;
2440
2441	while (1) {
2442		sparse_region = tiny_find_msize_region(szone, depot_ptr, DEPOT_MAGAZINE_INDEX, msize);
2443		if (NULL == sparse_region) { // Depot empty?
2444			SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
2445			return 0;
2446		}
2447
2448		node = REGION_TRAILER_FOR_TINY_REGION(sparse_region);
2449		if (0 >= node->pinned_to_depot)
2450			break;
2451
2452		SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
2453		yield();
2454		SZONE_MAGAZINE_PTR_LOCK(szone,depot_ptr);
2455	}
2456
2457	// disconnect node from Depot
2458	recirc_list_extract(szone, depot_ptr, node);
2459
2460	// Iterate the region pulling its free entries off the (locked) Depot's free list
2461	int objects_in_use = tiny_free_detach_region(szone, depot_ptr, sparse_region);
2462
2463	// Transfer ownership of the region
2464	MAGAZINE_INDEX_FOR_TINY_REGION(sparse_region) = mag_index;
2465	node->pinned_to_depot = 0;
2466
2467	// Iterate the region putting its free entries on its new (locked) magazine's free list
2468	size_t bytes_inplay = tiny_free_reattach_region(szone, tiny_mag_ptr, sparse_region);
2469
2470	depot_ptr->mag_num_bytes_in_objects -= bytes_inplay;
2471	depot_ptr->num_bytes_in_magazine -= TINY_REGION_PAYLOAD_BYTES;
2472	depot_ptr->mag_num_objects -= objects_in_use;
2473
2474	tiny_mag_ptr->mag_num_bytes_in_objects += bytes_inplay;
2475	tiny_mag_ptr->num_bytes_in_magazine += TINY_REGION_PAYLOAD_BYTES;
2476	tiny_mag_ptr->mag_num_objects += objects_in_use;
2477
2478	// connect to magazine as first node
2479	recirc_list_splice_first(szone, tiny_mag_ptr, node);
2480
2481	SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
2482
2483	// madvise() outside the Depot lock
2484	(void)madvise_reuse_range(szone, sparse_region, sparse_region, sparse_region+TINY_REGION_PAYLOAD_BYTES);
2485
2486	MAGMALLOC_DEPOTREGION((void *)szone, (int)mag_index, (void *)sparse_region, TINY_REGION_SIZE,
2487						  (int)BYTES_USED_FOR_TINY_REGION(sparse_region)); // DTrace USDT Probe
2488
2489	return 1;
2490}
2491
2492#define K 1.5 // headroom measured in number of 1Mb regions
2493#define DENSITY_THRESHOLD(a) \
2494	((a) - ((a) >> 2)) // "Emptiness" f = 0.25, so "Density" is (1 - f)*a. Generally: ((a) - ((a) >> -log2(f)))
2495
2496static INLINE boolean_t
2497tiny_free_no_lock(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index, region_t region, void *ptr,
2498				  msize_t msize)
2499{
2500	void	*original_ptr = ptr;
2501	size_t	original_size = TINY_BYTES_FOR_MSIZE(msize);
2502	void	*next_block = ((unsigned char *)ptr + original_size);
2503	msize_t	previous_msize, next_msize;
2504	void	*previous;
2505	free_list_t	*big_free_block;
2506	free_list_t	*after_next_block;
2507	free_list_t	*before_next_block;
2508
2509#if DEBUG_MALLOC
2510	if (LOG(szone,ptr)) {
2511		malloc_printf("in tiny_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
2512	}
2513	if (!msize) {
2514		szone_error(szone, 1, "trying to free tiny block that is too small", ptr,
2515					"in tiny_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
2516	}
2517#endif
2518
2519	// We try to coalesce this block with the preceeding one
2520	previous = tiny_previous_preceding_free(ptr, &previous_msize);
2521	if (previous) {
2522#if DEBUG_MALLOC
2523		if (LOG(szone, ptr) || LOG(szone,previous)) {
2524			malloc_printf("in tiny_free_no_lock(), coalesced backwards for %p previous=%p\n", ptr, previous);
2525		}
2526#endif
2527
2528		// clear the meta_header since this is no longer the start of a block
2529		set_tiny_meta_header_middle(ptr);
2530		tiny_free_list_remove_ptr(szone, tiny_mag_ptr, previous, previous_msize);
2531		ptr = previous;
2532		msize += previous_msize;
2533	}
2534	// We try to coalesce with the next block
2535	if ((next_block < TINY_REGION_END(region)) && tiny_meta_header_is_free(next_block)) {
2536		next_msize = get_tiny_free_size(next_block);
2537#if DEBUG_MALLOC
2538		if (LOG(szone, ptr) || LOG(szone, next_block)) {
2539			malloc_printf("in tiny_free_no_lock(), for ptr=%p, msize=%d coalesced forward=%p next_msize=%d\n",
2540						  ptr, msize, next_block, next_msize);
2541		}
2542#endif
2543		// If we are coalescing with the next block, and the next block is in
2544		// the last slot of the free list, then we optimize this case here to
2545		// avoid removing next_block from the slot (NUM_TINY_SLOTS - 1) and then adding ptr back
2546		// to slot (NUM_TINY_SLOTS - 1).
2547		if (next_msize >= NUM_TINY_SLOTS) {
2548			msize += next_msize;
2549
2550			big_free_block = (free_list_t *)next_block;
2551			after_next_block = free_list_unchecksum_ptr(szone, &big_free_block->next);
2552			before_next_block = free_list_unchecksum_ptr(szone, &big_free_block->previous);
2553
2554			if (!before_next_block) {
2555				tiny_mag_ptr->mag_free_list[NUM_TINY_SLOTS-1] = ptr;
2556			} else {
2557				before_next_block->next.u = free_list_checksum_ptr(szone, ptr);
2558			}
2559
2560			if (after_next_block) {
2561				after_next_block->previous.u = free_list_checksum_ptr(szone, ptr);
2562			}
2563
2564			// we don't need to checksum these since they are already checksummed
2565			((free_list_t *)ptr)->previous = big_free_block->previous;
2566			((free_list_t *)ptr)->next = big_free_block->next;
2567
2568			// clear the meta_header to enable coalescing backwards
2569			set_tiny_meta_header_middle(big_free_block);
2570			set_tiny_meta_header_free(ptr, msize);
2571
2572			goto tiny_free_ending;
2573		}
2574		tiny_free_list_remove_ptr(szone, tiny_mag_ptr, next_block, next_msize);
2575		set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards
2576		msize += next_msize;
2577	}
2578
2579	// The tiny cache already scribbles free blocks as they go through the
2580	// cache whenever msize < TINY_QUANTUM , so we do not need to do it here.
2581	if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && msize && (msize >= TINY_QUANTUM))
2582		memset(ptr, SCRABBLE_BYTE, TINY_BYTES_FOR_MSIZE(msize));
2583
2584	tiny_free_list_add_ptr(szone, tiny_mag_ptr, ptr, msize);
2585
2586tiny_free_ending:
2587
2588	tiny_mag_ptr->mag_num_objects--;
2589	// we use original_size and not msize to avoid double counting the coalesced blocks
2590	tiny_mag_ptr->mag_num_bytes_in_objects -= original_size;
2591
2592	// Update this region's bytes in use count
2593	region_trailer_t *node = REGION_TRAILER_FOR_TINY_REGION(region);
2594	size_t bytes_used = node->bytes_used - original_size;
2595	node->bytes_used = bytes_used;
2596
2597#if !TARGET_OS_EMBEDDED // Always madvise for embedded platforms
2598	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
2599	if (szone->num_tiny_magazines == 1) { // Uniprocessor, single magazine, so no recirculation necessary
2600		/* NOTHING */
2601	} else if (DEPOT_MAGAZINE_INDEX != mag_index) {
2602		// Emptiness discriminant
2603		if (bytes_used < DENSITY_THRESHOLD(TINY_REGION_PAYLOAD_BYTES)) {
2604			/* Region has crossed threshold from density to sparsity. Mark it "suitable" on the
2605			 recirculation candidates list. */
2606			node->recirc_suitable = TRUE;
2607		} else {
2608			/* After this free, we've found the region is still dense, so it must have been even more so before
2609			 the free. That implies the region is already correctly marked. Do nothing. */
2610		}
2611
2612		// Has the entire magazine crossed the "emptiness threshold"? If so, transfer a region
2613		// from this magazine to the Depot. Choose a region that itself has crossed the emptiness threshold (i.e
2614		// is at least fraction "f" empty.) Such a region will be marked "suitable" on the recirculation list.
2615		size_t a = tiny_mag_ptr->num_bytes_in_magazine; // Total bytes allocated to this magazine
2616		size_t u = tiny_mag_ptr->mag_num_bytes_in_objects; // In use (malloc'd) from this magaqzine
2617
2618		if (a - u > ((3 * TINY_REGION_PAYLOAD_BYTES) / 2) && u < DENSITY_THRESHOLD(a)) {
2619			return tiny_free_do_recirc_to_depot(szone, tiny_mag_ptr, mag_index);
2620		}
2621
2622	} else {
2623#endif
2624		// Freed to Depot. N.B. Lock on tiny_magazines[DEPOT_MAGAZINE_INDEX] is already held
2625		// Calcuate the first page in the coalesced block that would be safe to mark MADV_FREE
2626		size_t free_header_size = sizeof(free_list_t) + sizeof(msize_t);
2627		uintptr_t safe_ptr = (uintptr_t)ptr + free_header_size;
2628		uintptr_t round_safe = round_page_quanta(safe_ptr);
2629
2630		// Calcuate the last page in the coalesced block that would be safe to mark MADV_FREE
2631		size_t free_tail_size = sizeof(msize_t);
2632		uintptr_t safe_extent = (uintptr_t)ptr + TINY_BYTES_FOR_MSIZE(msize) - free_tail_size;
2633		uintptr_t trunc_extent = trunc_page_quanta(safe_extent);
2634
2635		// The newly freed block may complete a span of bytes that cover a page. Mark it with MADV_FREE.
2636		if (round_safe < trunc_extent) { // Coalesced area covers a page (perhaps many)
2637			// Extend the freed block by the free region header and tail sizes to include pages
2638			// we may have coalesced that no longer host free region tails and headers.
2639			// This may extend over in-use ranges, but the MIN/MAX clamping below will fix that up.
2640			uintptr_t lo = trunc_page_quanta((uintptr_t)original_ptr - free_tail_size);
2641			uintptr_t hi = round_page_quanta((uintptr_t)original_ptr + original_size + free_header_size);
2642
2643			uintptr_t free_lo = MAX(round_safe, lo);
2644			uintptr_t free_hi = MIN(trunc_extent, hi);
2645
2646			if (free_lo < free_hi) {
2647				tiny_free_list_remove_ptr(szone, tiny_mag_ptr, ptr, msize);
2648				set_tiny_meta_header_in_use(ptr, msize);
2649
2650				OSAtomicIncrement32Barrier(&(node->pinned_to_depot));
2651				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
2652				madvise_free_range(szone, region, free_lo, free_hi, &szone->last_tiny_advise);
2653				SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
2654				OSAtomicDecrement32Barrier(&(node->pinned_to_depot));
2655
2656				set_tiny_meta_header_free(ptr, msize);
2657				tiny_free_list_add_ptr(szone, tiny_mag_ptr, ptr, msize);
2658			}
2659		}
2660
2661#if !TARGET_OS_EMBEDDED
2662		if (0 < bytes_used || 0 < node->pinned_to_depot) {
2663			/* Depot'd region is still live. Leave it in place on the Depot's recirculation list
2664			 so as to avoid thrashing between the Depot's free list and a magazines's free list
2665			 with detach_region/reattach_region */
2666		} else {
2667			/* Depot'd region is just now empty. Consider return to OS. */
2668			region_t r_dealloc = tiny_free_try_depot_unmap_no_lock(szone, tiny_mag_ptr, node);
2669			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
2670			if (r_dealloc)
2671				deallocate_pages(szone, r_dealloc, TINY_REGION_SIZE, 0);
2672			return FALSE; // Caller need not unlock
2673		}
2674	}
2675#endif
2676
2677	return TRUE; // Caller must do SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr)
2678}
2679
2680// Allocates from the last region or a freshly allocated region
2681static void *
2682tiny_malloc_from_region_no_lock(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index,
2683								msize_t msize, void * aligned_address)
2684{
2685	void	*ptr;
2686
2687	// Deal with unclaimed memory -- mag_bytes_free_at_end or mag_bytes_free_at_start
2688	if (tiny_mag_ptr->mag_bytes_free_at_end || tiny_mag_ptr->mag_bytes_free_at_start)
2689		tiny_finalize_region(szone, tiny_mag_ptr);
2690
2691	// We set the unused bits of the header in the last pair to be all ones, and those of the inuse to zeroes.
2692	((tiny_region_t)aligned_address)->pairs[CEIL_NUM_TINY_BLOCKS_WORDS-1].header =
2693	(NUM_TINY_BLOCKS & 31) ? (0xFFFFFFFFU << (NUM_TINY_BLOCKS & 31)) : 0;
2694	((tiny_region_t)aligned_address)->pairs[CEIL_NUM_TINY_BLOCKS_WORDS-1].inuse = 0;
2695
2696	// Here find the only place in tinyland that (infrequently) takes the tiny_regions_lock.
2697	// Only one thread at a time should be permitted to assess the density of the hash
2698	// ring and adjust if needed.
2699	// Only one thread at a time should be permitted to insert its new region on
2700	// the hash ring.
2701	// It is safe for all other threads to read the hash ring (hashed_regions) and
2702	// the associated sizes (num_regions_allocated and num_tiny_regions).
2703
2704	_malloc_lock_lock(&szone->tiny_regions_lock);
2705
2706	// Check to see if the hash ring of tiny regions needs to grow.  Try to
2707	// avoid the hash ring becoming too dense.
2708	if (szone->tiny_region_generation->num_regions_allocated < (2 * szone->num_tiny_regions)) {
2709		region_t *new_regions;
2710		size_t new_size;
2711		size_t new_shift = szone->tiny_region_generation->num_regions_allocated_shift; // In/Out parameter
2712		new_regions = hash_regions_grow_no_lock(szone, szone->tiny_region_generation->hashed_regions,
2713												szone->tiny_region_generation->num_regions_allocated,
2714												&new_shift,
2715												&new_size);
2716		// Do not deallocate the current hashed_regions allocation since someone may
2717		// be iterating it.  Instead, just leak it.
2718
2719		// Prepare to advance to the "next generation" of the hash ring.
2720		szone->tiny_region_generation->nextgen->hashed_regions = new_regions;
2721		szone->tiny_region_generation->nextgen->num_regions_allocated = new_size;
2722		szone->tiny_region_generation->nextgen->num_regions_allocated_shift = new_shift;
2723
2724		// Throw the switch to atomically advance to the next generation.
2725		szone->tiny_region_generation = szone->tiny_region_generation->nextgen;
2726		// Ensure everyone sees the advance.
2727		OSMemoryBarrier();
2728	}
2729	// Tag the region at "aligned_address" as belonging to us,
2730	// and so put it under the protection of the magazine lock we are holding.
2731	// Do this before advertising "aligned_address" on the hash ring(!)
2732	MAGAZINE_INDEX_FOR_TINY_REGION(aligned_address) = mag_index;
2733
2734	// Insert the new region into the hash ring, and update malloc statistics
2735	hash_region_insert_no_lock(szone->tiny_region_generation->hashed_regions,
2736							   szone->tiny_region_generation->num_regions_allocated,
2737							   szone->tiny_region_generation->num_regions_allocated_shift,
2738							   aligned_address);
2739
2740	szone->num_tiny_regions++;
2741	_malloc_lock_unlock(&szone->tiny_regions_lock);
2742
2743	tiny_mag_ptr->mag_last_region = aligned_address;
2744	BYTES_USED_FOR_TINY_REGION(aligned_address) = TINY_BYTES_FOR_MSIZE(msize);
2745#if ASLR_INTERNAL
2746	int offset_msize = malloc_entropy[0] & TINY_ENTROPY_MASK;
2747#if DEBUG_MALLOC
2748	if (getenv("MallocASLRForce")) offset_msize = strtol(getenv("MallocASLRForce"), NULL, 0) & TINY_ENTROPY_MASK;
2749	if (getenv("MallocASLRPrint")) malloc_printf("Region: %p offset: %d\n", aligned_address, offset_msize);
2750#endif
2751#else
2752	int offset_msize = 0;
2753#endif
2754	ptr = (void *)((uintptr_t) aligned_address + TINY_BYTES_FOR_MSIZE(offset_msize));
2755	set_tiny_meta_header_in_use(ptr, msize);
2756	tiny_mag_ptr->mag_num_objects++;
2757	tiny_mag_ptr->mag_num_bytes_in_objects += TINY_BYTES_FOR_MSIZE(msize);
2758	tiny_mag_ptr->num_bytes_in_magazine += TINY_REGION_PAYLOAD_BYTES;
2759
2760	// We put a header on the last block so that it appears in use (for coalescing, etc...)
2761	set_tiny_meta_header_in_use_1((void *)((uintptr_t)ptr + TINY_BYTES_FOR_MSIZE(msize)));
2762	tiny_mag_ptr->mag_bytes_free_at_end = TINY_BYTES_FOR_MSIZE(NUM_TINY_BLOCKS - msize - offset_msize);
2763
2764#if ASLR_INTERNAL
2765	// Put a header on the previous block for same reason
2766	tiny_mag_ptr->mag_bytes_free_at_start = TINY_BYTES_FOR_MSIZE(offset_msize);
2767	if (offset_msize) {
2768		set_tiny_meta_header_in_use_1((void *)((uintptr_t)ptr - TINY_QUANTUM));
2769	}
2770#else
2771	tiny_mag_ptr->mag_bytes_free_at_start = 0;
2772#endif
2773
2774	// connect to magazine as last node
2775	recirc_list_splice_last(szone, tiny_mag_ptr, REGION_TRAILER_FOR_TINY_REGION(aligned_address));
2776
2777#if DEBUG_MALLOC
2778	if (LOG(szone,ptr)) {
2779		malloc_printf("in tiny_malloc_from_region_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
2780	}
2781#endif
2782	return ptr;
2783}
2784
2785static INLINE void *
2786tiny_try_shrink_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_good_size)
2787{
2788	msize_t new_msize = TINY_MSIZE_FOR_BYTES(new_good_size);
2789	msize_t mshrinkage = TINY_MSIZE_FOR_BYTES(old_size) - new_msize;
2790
2791	if (mshrinkage) {
2792		void *q = (void *)((uintptr_t)ptr + TINY_BYTES_FOR_MSIZE(new_msize));
2793		magazine_t *tiny_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->tiny_magazines,
2794																	REGION_TRAILER_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr)),
2795																	MAGAZINE_INDEX_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr)));
2796
2797		// Mark q as block header and in-use, thus creating two blocks.
2798		set_tiny_meta_header_in_use(q, mshrinkage);
2799		tiny_mag_ptr->mag_num_objects++;
2800
2801		SZONE_MAGAZINE_PTR_UNLOCK(szone,tiny_mag_ptr);
2802		szone_free(szone, q); // avoid inlining free_tiny(szone, q, ...);
2803	}
2804	return ptr;
2805}
2806
2807static INLINE boolean_t
2808tiny_try_realloc_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size)
2809{
2810	// returns 1 on success
2811	msize_t	index;
2812	msize_t	old_msize;
2813	unsigned	next_index;
2814	void	*next_block;
2815	boolean_t	is_free;
2816	msize_t	next_msize, coalesced_msize, leftover_msize;
2817	void	*leftover;
2818
2819	index = TINY_INDEX_FOR_PTR(ptr);
2820	old_msize = TINY_MSIZE_FOR_BYTES(old_size);
2821	next_index = index + old_msize;
2822
2823	if (next_index >= NUM_TINY_BLOCKS) {
2824		return 0;
2825	}
2826	next_block = (char *)ptr + old_size;
2827
2828	magazine_t	*tiny_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->tiny_magazines,
2829																 REGION_TRAILER_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr)),
2830																 MAGAZINE_INDEX_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr)));
2831
2832	/*
2833	 * Look for a free block immediately afterwards.  If it's large enough, we can consume (part of)
2834	 * it.
2835	 */
2836	is_free = tiny_meta_header_is_free(next_block);
2837	if (!is_free) {
2838		SZONE_MAGAZINE_PTR_UNLOCK(szone,tiny_mag_ptr);
2839		return 0; // next_block is in use;
2840	}
2841	next_msize = get_tiny_free_size(next_block);
2842	if (old_size + TINY_BYTES_FOR_MSIZE(next_msize) < new_size) {
2843		SZONE_MAGAZINE_PTR_UNLOCK(szone,tiny_mag_ptr);
2844		return 0; // even with next block, not enough
2845	}
2846	/*
2847	 * The following block is big enough; pull it from its freelist and chop off enough to satisfy
2848	 * our needs.
2849	 */
2850	tiny_free_list_remove_ptr(szone, tiny_mag_ptr, next_block, next_msize);
2851	set_tiny_meta_header_middle(next_block); // clear the meta_header to enable coalescing backwards
2852	coalesced_msize = TINY_MSIZE_FOR_BYTES(new_size - old_size + TINY_QUANTUM - 1);
2853	leftover_msize = next_msize - coalesced_msize;
2854	if (leftover_msize) {
2855		/* there's some left, so put the remainder back */
2856		leftover = (void *)((uintptr_t)next_block + TINY_BYTES_FOR_MSIZE(coalesced_msize));
2857
2858		tiny_free_list_add_ptr(szone, tiny_mag_ptr, leftover, leftover_msize);
2859	}
2860	set_tiny_meta_header_in_use(ptr, old_msize + coalesced_msize);
2861#if DEBUG_MALLOC
2862	if (LOG(szone,ptr)) {
2863		malloc_printf("in tiny_try_realloc_in_place(), ptr=%p, msize=%d\n", ptr, old_msize + coalesced_msize);
2864	}
2865#endif
2866	tiny_mag_ptr->mag_num_bytes_in_objects += TINY_BYTES_FOR_MSIZE(coalesced_msize);
2867
2868	// Update this region's bytes in use count
2869	region_trailer_t *node = REGION_TRAILER_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr));
2870	size_t bytes_used = node->bytes_used + TINY_BYTES_FOR_MSIZE(coalesced_msize);
2871	node->bytes_used = bytes_used;
2872
2873	// Emptiness discriminant
2874	if (bytes_used < DENSITY_THRESHOLD(TINY_REGION_PAYLOAD_BYTES)) {
2875		/* After this reallocation the region is still sparse, so it must have been even more so before
2876		 the reallocation. That implies the region is already correctly marked. Do nothing. */
2877	} else {
2878		/* Region has crossed threshold from sparsity to density. Mark it not "suitable" on the
2879		 recirculation candidates list. */
2880		node->recirc_suitable = FALSE;
2881	}
2882
2883	SZONE_MAGAZINE_PTR_UNLOCK(szone,tiny_mag_ptr);
2884	CHECK(szone, __PRETTY_FUNCTION__);
2885	return 1;
2886}
2887
2888static boolean_t
2889tiny_check_region(szone_t *szone, region_t region)
2890{
2891	uintptr_t	start, ptr, region_end;
2892	boolean_t	prev_free = 0;
2893	boolean_t	is_free;
2894	msize_t	msize;
2895	free_list_t	*free_head;
2896	void	*follower, *previous, *next;
2897	mag_index_t	mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(region);
2898	magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
2899
2900	// Assumes locked
2901	CHECK_MAGAZINE_PTR_LOCKED(szone, tiny_mag_ptr, __PRETTY_FUNCTION__);
2902
2903	/* establish region limits */
2904	start = (uintptr_t)TINY_REGION_ADDRESS(region);
2905	ptr = start;
2906	if (region == tiny_mag_ptr->mag_last_region) {
2907		ptr += tiny_mag_ptr->mag_bytes_free_at_start;
2908
2909		/*
2910		 * Check the leading block's integrity here also.
2911		 */
2912		if (tiny_mag_ptr->mag_bytes_free_at_start) {
2913			msize = get_tiny_meta_header((void *)(ptr - TINY_QUANTUM), &is_free);
2914			if (is_free || (msize != 1)) {
2915				malloc_printf("*** invariant broken for leader block %p - %d %d\n", ptr - TINY_QUANTUM, msize, is_free);
2916			}
2917		}
2918	}
2919	region_end = (uintptr_t)TINY_REGION_END(region);
2920
2921	/*
2922	 * The last region may have a trailing chunk which has not been converted into inuse/freelist
2923	 * blocks yet.
2924	 */
2925	if (region == tiny_mag_ptr->mag_last_region)
2926		region_end -= tiny_mag_ptr->mag_bytes_free_at_end;
2927
2928	/*
2929	 * Scan blocks within the region.
2930	 */
2931	while (ptr < region_end) {
2932		/*
2933		 * If the first block is free, and its size is 65536 (msize = 0) then the entire region is
2934		 * free.
2935		 */
2936		msize = get_tiny_meta_header((void *)ptr, &is_free);
2937		if (is_free && !msize && (ptr == start)) {
2938			return 1;
2939		}
2940
2941		/*
2942		 * If the block's size is 65536 (msize = 0) then since we're not the first entry the size is
2943		 * corrupt.
2944		 */
2945		if (!msize) {
2946			malloc_printf("*** invariant broken for tiny block %p this msize=%d - size is too small\n",
2947						  ptr, msize);
2948			return 0;
2949		}
2950
2951		if (!is_free) {
2952			/*
2953			 * In use blocks cannot be more than (NUM_TINY_SLOTS - 1) quanta large.
2954			 */
2955			prev_free = 0;
2956			if (msize > (NUM_TINY_SLOTS - 1)) {
2957				malloc_printf("*** invariant broken for %p this tiny msize=%d - size is too large\n",
2958							  ptr, msize);
2959				return 0;
2960			}
2961			/* move to next block */
2962			ptr += TINY_BYTES_FOR_MSIZE(msize);
2963		} else {
2964#if !RELAXED_INVARIANT_CHECKS
2965			/*
2966			 * Free blocks must have been coalesced, we cannot have a free block following another
2967			 * free block.
2968			 */
2969			if (prev_free) {
2970				malloc_printf("*** invariant broken for free block %p this tiny msize=%d: two free blocks in a row\n",
2971							  ptr, msize);
2972				return 0;
2973			}
2974#endif // RELAXED_INVARIANT_CHECKS
2975			prev_free = 1;
2976			/*
2977			 * Check the integrity of this block's entry in its freelist.
2978			 */
2979			free_head = (free_list_t *)ptr;
2980			previous = free_list_unchecksum_ptr(szone, &free_head->previous);
2981			next = free_list_unchecksum_ptr(szone, &free_head->next);
2982			if (previous && !tiny_meta_header_is_free(previous)) {
2983				malloc_printf("*** invariant broken for %p (previous %p is not a free pointer)\n",
2984							  ptr, previous);
2985				return 0;
2986			}
2987			if (next && !tiny_meta_header_is_free(next)) {
2988				malloc_printf("*** invariant broken for %p (next in free list %p is not a free pointer)\n",
2989							  ptr, next);
2990				return 0;
2991			}
2992			/*
2993			 * Check the free block's trailing size value.
2994			 */
2995			follower = FOLLOWING_TINY_PTR(ptr, msize);
2996			if (((uintptr_t)follower != region_end) && (get_tiny_previous_free_msize(follower) != msize)) {
2997				malloc_printf("*** invariant broken for tiny free %p followed by %p in region [%p-%p] "
2998							  "(end marker incorrect) should be %d; in fact %d\n",
2999							  ptr, follower, TINY_REGION_ADDRESS(region), region_end, msize, get_tiny_previous_free_msize(follower));
3000				return 0;
3001			}
3002			/* move to next block */
3003			ptr = (uintptr_t)follower;
3004		}
3005	}
3006	/*
3007	 * Ensure that we scanned the entire region
3008	 */
3009	if (ptr != region_end) {
3010		malloc_printf("*** invariant broken for region end %p - %p\n", ptr, region_end);
3011		return 0;
3012	}
3013	/*
3014	 * Check the trailing block's integrity.
3015	 */
3016	if (region == tiny_mag_ptr->mag_last_region) {
3017		if (tiny_mag_ptr->mag_bytes_free_at_end) {
3018			msize = get_tiny_meta_header((void *)ptr, &is_free);
3019			if (is_free || (msize != 1)) {
3020				malloc_printf("*** invariant broken for blocker block %p - %d %d\n", ptr, msize, is_free);
3021			}
3022		}
3023	}
3024	return 1;
3025}
3026
3027static kern_return_t
3028tiny_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone,
3029					   memory_reader_t reader, vm_range_recorder_t recorder)
3030{
3031	size_t		num_regions;
3032	size_t		index;
3033	region_t		*regions;
3034	vm_range_t		buffer[MAX_RECORDER_BUFFER];
3035	unsigned		count = 0;
3036	kern_return_t	err;
3037	region_t		region;
3038	vm_range_t		range;
3039	vm_range_t		admin_range;
3040	vm_range_t		ptr_range;
3041	unsigned char	*mapped_region;
3042	uint32_t		*block_header;
3043	uint32_t		*in_use;
3044	unsigned		block_index;
3045	unsigned		block_limit;
3046	boolean_t		is_free;
3047	msize_t		msize;
3048	void		*mapped_ptr;
3049	unsigned 		bit;
3050	magazine_t          *tiny_mag_base = NULL;
3051
3052	region_hash_generation_t *trg_ptr;
3053	err = reader(task, (vm_address_t)szone->tiny_region_generation, sizeof(region_hash_generation_t), (void **)&trg_ptr);
3054	if (err) return err;
3055
3056	num_regions = trg_ptr->num_regions_allocated;
3057	err = reader(task, (vm_address_t)trg_ptr->hashed_regions, sizeof(region_t) * num_regions, (void **)&regions);
3058	if (err) return err;
3059
3060	if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
3061		// Map in all active magazines. Do this outside the iteration over regions.
3062		err = reader(task, (vm_address_t)(szone->tiny_magazines),
3063					 szone->num_tiny_magazines*sizeof(magazine_t),(void **)&tiny_mag_base);
3064		if (err) return err;
3065	}
3066
3067	for (index = 0; index < num_regions; ++index) {
3068		region = regions[index];
3069		if (HASHRING_OPEN_ENTRY != region && HASHRING_REGION_DEALLOCATED != region) {
3070			range.address = (vm_address_t)TINY_REGION_ADDRESS(region);
3071			range.size = (vm_size_t)TINY_REGION_SIZE;
3072			if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) {
3073				admin_range.address = range.address + TINY_METADATA_START;
3074				admin_range.size = TINY_METADATA_SIZE;
3075				recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1);
3076			}
3077			if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) {
3078				ptr_range.address = range.address;
3079				ptr_range.size = NUM_TINY_BLOCKS * TINY_QUANTUM;
3080				recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1);
3081			}
3082			if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
3083				void 		*mag_last_free;
3084				vm_address_t	mag_last_free_ptr = 0;
3085				msize_t		mag_last_free_msize = 0;
3086
3087				err = reader(task, range.address, range.size, (void **)&mapped_region);
3088				if (err)
3089					return err;
3090
3091				mag_index_t mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(mapped_region);
3092				magazine_t *tiny_mag_ptr = tiny_mag_base + mag_index;
3093
3094				if (DEPOT_MAGAZINE_INDEX != mag_index) {
3095					mag_last_free = tiny_mag_ptr->mag_last_free;
3096					if (mag_last_free) {
3097						mag_last_free_ptr = (uintptr_t) mag_last_free & ~(TINY_QUANTUM - 1);
3098						mag_last_free_msize = (uintptr_t) mag_last_free & (TINY_QUANTUM - 1);
3099					}
3100				} else {
3101					for (mag_index = 0; mag_index < szone->num_tiny_magazines; mag_index++) {
3102						if ((void *)range.address == (tiny_mag_base + mag_index)->mag_last_free_rgn) {
3103							mag_last_free = (tiny_mag_base + mag_index)->mag_last_free;
3104							if (mag_last_free) {
3105								mag_last_free_ptr = (uintptr_t) mag_last_free & ~(TINY_QUANTUM - 1);
3106								mag_last_free_msize = (uintptr_t) mag_last_free & (TINY_QUANTUM - 1);
3107							}
3108						}
3109					}
3110				}
3111
3112				block_header = (uint32_t *)(mapped_region + TINY_METADATA_START + sizeof(region_trailer_t));
3113				in_use = TINY_INUSE_FOR_HEADER(block_header);
3114				block_index = 0;
3115				block_limit = NUM_TINY_BLOCKS;
3116				if (region == tiny_mag_ptr->mag_last_region) {
3117					block_index += TINY_MSIZE_FOR_BYTES(tiny_mag_ptr->mag_bytes_free_at_start);
3118					block_limit -= TINY_MSIZE_FOR_BYTES(tiny_mag_ptr->mag_bytes_free_at_end);
3119				}
3120
3121				while (block_index < block_limit) {
3122					vm_size_t block_offset = TINY_BYTES_FOR_MSIZE(block_index);
3123					is_free = !BITARRAY_BIT(in_use, block_index);
3124					if (is_free) {
3125						mapped_ptr = mapped_region + block_offset;
3126
3127						// mapped_region, the address at which 'range' in 'task' has been
3128						// mapped into our process, is not necessarily aligned to
3129						// TINY_BLOCKS_ALIGN.
3130						//
3131						// Since the code in get_tiny_free_size() assumes the pointer came
3132						// from a properly aligned tiny region, and mapped_region is not
3133						// necessarily aligned, then do the size calculation directly.
3134						// If the next bit is set in the header bitmap, then the size is one
3135						// quantum.  Otherwise, read the size field.
3136						if (!BITARRAY_BIT(block_header, (block_index+1)))
3137							msize = TINY_FREE_SIZE(mapped_ptr);
3138						else
3139							msize = 1;
3140
3141					} else if (range.address + block_offset != mag_last_free_ptr) {
3142						msize = 1;
3143						bit = block_index + 1;
3144						while (! BITARRAY_BIT(block_header, bit)) {
3145							bit++;
3146							msize ++;
3147						}
3148						buffer[count].address = range.address + block_offset;
3149						buffer[count].size = TINY_BYTES_FOR_MSIZE(msize);
3150						count++;
3151						if (count >= MAX_RECORDER_BUFFER) {
3152							recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
3153							count = 0;
3154						}
3155					} else {
3156						// Block is not free but it matches mag_last_free_ptr so even
3157						// though it is not marked free in the bitmap, we treat it as if
3158						// it is and move on
3159						msize = mag_last_free_msize;
3160					}
3161
3162					if (!msize)
3163						return KERN_FAILURE; // Somethings amiss. Avoid looping at this block_index.
3164
3165					block_index += msize;
3166				}
3167				if (count) {
3168					recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
3169					count = 0;
3170				}
3171			}
3172		}
3173	}
3174	return 0;
3175}
3176
3177static void *
3178tiny_malloc_from_free_list(szone_t *szone, magazine_t *tiny_mag_ptr, mag_index_t mag_index, msize_t msize)
3179{
3180	free_list_t		*ptr;
3181	msize_t		this_msize;
3182	grain_t		slot = msize - 1;
3183	free_list_t		**free_list = tiny_mag_ptr->mag_free_list;
3184	free_list_t		**the_slot = free_list + slot;
3185	free_list_t		*next;
3186	free_list_t		**limit;
3187#if defined(__LP64__)
3188	uint64_t		bitmap;
3189#else
3190	uint32_t		bitmap;
3191#endif
3192	msize_t		leftover_msize;
3193	free_list_t		*leftover_ptr;
3194
3195	// Assumes we've locked the region
3196	CHECK_MAGAZINE_PTR_LOCKED(szone, tiny_mag_ptr, __PRETTY_FUNCTION__);
3197
3198	// Look for an exact match by checking the freelist for this msize.
3199	//
3200	ptr = *the_slot;
3201	if (ptr) {
3202		next = free_list_unchecksum_ptr(szone, &ptr->next);
3203		if (next) {
3204			next->previous = ptr->previous;
3205		} else {
3206			BITMAPV_CLR(tiny_mag_ptr->mag_bitmap, slot);
3207		}
3208		*the_slot = next;
3209		this_msize = msize;
3210#if DEBUG_MALLOC
3211		if (LOG(szone, ptr)) {
3212			malloc_printf("in tiny_malloc_from_free_list(), exact match ptr=%p, this_msize=%d\n", ptr, this_msize);
3213		}
3214#endif
3215		goto return_tiny_alloc;
3216	}
3217
3218	// Mask off the bits representing slots holding free blocks smaller than the
3219	// size we need.  If there are no larger free blocks, try allocating from
3220	// the free space at the end of the tiny region.
3221#if defined(__LP64__)
3222	bitmap = ((uint64_t *)(tiny_mag_ptr->mag_bitmap))[0] & ~ ((1ULL << slot) - 1);
3223#else
3224	bitmap = tiny_mag_ptr->mag_bitmap[0] & ~ ((1 << slot) - 1);
3225#endif
3226	if (!bitmap)
3227		goto try_tiny_malloc_from_end;
3228
3229	slot = BITMAPV_CTZ(bitmap);
3230	limit = free_list + NUM_TINY_SLOTS - 1;
3231	free_list += slot;
3232
3233	if (free_list < limit) {
3234		ptr = *free_list;
3235		if (ptr) {
3236			next = free_list_unchecksum_ptr(szone, &ptr->next);
3237			*free_list = next;
3238			if (next) {
3239				next->previous = ptr->previous;
3240			} else {
3241				BITMAPV_CLR(tiny_mag_ptr->mag_bitmap, slot);
3242			}
3243			this_msize = get_tiny_free_size(ptr);
3244			goto add_leftover_and_proceed;
3245		}
3246#if DEBUG_MALLOC
3247		malloc_printf("in tiny_malloc_from_free_list(), mag_bitmap out of sync, slot=%d\n",slot);
3248#endif
3249	}
3250
3251	// We are now looking at the last slot, which contains blocks equal to, or
3252	// due to coalescing of free blocks, larger than (NUM_TINY_SLOTS - 1) * tiny quantum size.
3253	// If the last freelist is not empty, and the head contains a block that is
3254	// larger than our request, then the remainder is put back on the free list.
3255	ptr = *limit;
3256	if (ptr) {
3257		this_msize = get_tiny_free_size(ptr);
3258		next = free_list_unchecksum_ptr(szone, &ptr->next);
3259		if (this_msize - msize >= NUM_TINY_SLOTS) {
3260			// the leftover will go back to the free list, so we optimize by
3261			// modifying the free list rather than a pop and push of the head
3262			leftover_msize = this_msize - msize;
3263			leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize));
3264			*limit = leftover_ptr;
3265			if (next) {
3266				next->previous.u = free_list_checksum_ptr(szone, leftover_ptr);
3267			}
3268			leftover_ptr->previous = ptr->previous;
3269			leftover_ptr->next = ptr->next;
3270			set_tiny_meta_header_free(leftover_ptr, leftover_msize);
3271#if DEBUG_MALLOC
3272			if (LOG(szone,ptr)) {
3273				malloc_printf("in tiny_malloc_from_free_list(), last slot ptr=%p, msize=%d this_msize=%d\n",
3274							  ptr, msize, this_msize);
3275			}
3276#endif
3277			this_msize = msize;
3278			goto return_tiny_alloc;
3279		}
3280		if (next) {
3281			next->previous = ptr->previous;
3282		}
3283		*limit = next;
3284		goto add_leftover_and_proceed;
3285		/* NOTREACHED */
3286	}
3287
3288try_tiny_malloc_from_end:
3289	// Let's see if we can use tiny_mag_ptr->mag_bytes_free_at_end
3290	if (tiny_mag_ptr->mag_bytes_free_at_end >= TINY_BYTES_FOR_MSIZE(msize)) {
3291		ptr = (free_list_t *)((uintptr_t)TINY_REGION_END(tiny_mag_ptr->mag_last_region) -
3292							  tiny_mag_ptr->mag_bytes_free_at_end);
3293		tiny_mag_ptr->mag_bytes_free_at_end -= TINY_BYTES_FOR_MSIZE(msize);
3294		if (tiny_mag_ptr->mag_bytes_free_at_end) {
3295			// let's add an in use block after ptr to serve as boundary
3296			set_tiny_meta_header_in_use_1((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize));
3297		}
3298		this_msize = msize;
3299#if DEBUG_MALLOC
3300		if (LOG(szone, ptr)) {
3301			malloc_printf("in tiny_malloc_from_free_list(), from end ptr=%p, msize=%d\n", ptr, msize);
3302		}
3303#endif
3304		goto return_tiny_alloc;
3305	}
3306#if ASLR_INTERNAL
3307	// Try from start if nothing left at end
3308	if (tiny_mag_ptr->mag_bytes_free_at_start >= TINY_BYTES_FOR_MSIZE(msize)) {
3309		ptr = (free_list_t *)(TINY_REGION_ADDRESS(tiny_mag_ptr->mag_last_region) +
3310							  tiny_mag_ptr->mag_bytes_free_at_start - TINY_BYTES_FOR_MSIZE(msize));
3311		tiny_mag_ptr->mag_bytes_free_at_start -= TINY_BYTES_FOR_MSIZE(msize);
3312		if (tiny_mag_ptr->mag_bytes_free_at_start) {
3313			// let's add an in use block before ptr to serve as boundary
3314			set_tiny_meta_header_in_use_1((unsigned char *)ptr - TINY_QUANTUM);
3315		}
3316		this_msize = msize;
3317#if DEBUG_MALLOC
3318		if (LOG(szone, ptr)) {
3319			malloc_printf("in tiny_malloc_from_free_list(), from start ptr=%p, msize=%d\n", ptr, msize);
3320		}
3321#endif
3322		goto return_tiny_alloc;
3323	}
3324#endif
3325	return NULL;
3326
3327add_leftover_and_proceed:
3328	if (!this_msize || (this_msize > msize)) {
3329		leftover_msize = this_msize - msize;
3330		leftover_ptr = (free_list_t *)((unsigned char *)ptr + TINY_BYTES_FOR_MSIZE(msize));
3331#if DEBUG_MALLOC
3332		if (LOG(szone,ptr)) {
3333			malloc_printf("in tiny_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize);
3334		}
3335#endif
3336		tiny_free_list_add_ptr(szone, tiny_mag_ptr, leftover_ptr, leftover_msize);
3337		this_msize = msize;
3338	}
3339
3340return_tiny_alloc:
3341	tiny_mag_ptr->mag_num_objects++;
3342	tiny_mag_ptr->mag_num_bytes_in_objects += TINY_BYTES_FOR_MSIZE(this_msize);
3343
3344	// Update this region's bytes in use count
3345	region_trailer_t *node = REGION_TRAILER_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr));
3346	size_t bytes_used = node->bytes_used + TINY_BYTES_FOR_MSIZE(this_msize);
3347	node->bytes_used = bytes_used;
3348
3349	// Emptiness discriminant
3350	if (bytes_used < DENSITY_THRESHOLD(TINY_REGION_PAYLOAD_BYTES)) {
3351		/* After this allocation the region is still sparse, so it must have been even more so before
3352		 the allocation. That implies the region is already correctly marked. Do nothing. */
3353	} else {
3354		/* Region has crossed threshold from sparsity to density. Mark it not "suitable" on the
3355		 recirculation candidates list. */
3356		node->recirc_suitable = FALSE;
3357	}
3358#if DEBUG_MALLOC
3359	if (LOG(szone,ptr)) {
3360		malloc_printf("in tiny_malloc_from_free_list(), ptr=%p, this_msize=%d, msize=%d\n", ptr, this_msize, msize);
3361	}
3362#endif
3363	if (this_msize > 1)
3364		set_tiny_meta_header_in_use(ptr, this_msize);
3365	else
3366		set_tiny_meta_header_in_use_1(ptr);
3367	return ptr;
3368}
3369#undef DENSITY_THRESHOLD
3370#undef K
3371
3372static INLINE void *
3373tiny_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested)
3374{
3375	void	*ptr;
3376	mag_index_t	mag_index = mag_get_thread_index(szone);
3377	magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
3378
3379#if DEBUG_MALLOC
3380	if (DEPOT_MAGAZINE_INDEX == mag_index) {
3381		szone_error(szone, 1, "malloc called for magazine index -1", NULL, NULL);
3382		return(NULL);
3383	}
3384
3385	if (!msize) {
3386		szone_error(szone, 1, "invariant broken (!msize) in allocation (region)", NULL, NULL);
3387		return(NULL);
3388	}
3389#endif
3390
3391	SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
3392
3393#if TINY_CACHE
3394	ptr = tiny_mag_ptr->mag_last_free;
3395
3396	if ((((uintptr_t)ptr) & (TINY_QUANTUM - 1)) == msize) {
3397		// we have a winner
3398		tiny_mag_ptr->mag_last_free = NULL;
3399		tiny_mag_ptr->mag_last_free_rgn = NULL;
3400		SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3401		CHECK(szone, __PRETTY_FUNCTION__);
3402		ptr = (void *)((uintptr_t)ptr & ~ (TINY_QUANTUM - 1));
3403		if (cleared_requested) {
3404			memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
3405		}
3406#if DEBUG_MALLOC
3407		if (LOG(szone,ptr)) {
3408			malloc_printf("in tiny_malloc_should_clear(), tiny cache ptr=%p, msize=%d\n", ptr, msize);
3409		}
3410#endif
3411		return ptr;
3412	}
3413#endif /* TINY_CACHE */
3414
3415	while (1) {
3416		ptr = tiny_malloc_from_free_list(szone, tiny_mag_ptr, mag_index, msize);
3417		if (ptr) {
3418			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3419			CHECK(szone, __PRETTY_FUNCTION__);
3420			if (cleared_requested) {
3421				memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
3422			}
3423			return ptr;
3424		}
3425
3426		if (tiny_get_region_from_depot(szone, tiny_mag_ptr, mag_index, msize)) {
3427			ptr = tiny_malloc_from_free_list(szone, tiny_mag_ptr, mag_index, msize);
3428			if (ptr) {
3429				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3430				CHECK(szone, __PRETTY_FUNCTION__);
3431				if (cleared_requested) {
3432					memset(ptr, 0, TINY_BYTES_FOR_MSIZE(msize));
3433				}
3434				return ptr;
3435			}
3436		}
3437
3438		// The magazine is exhausted. A new region (heap) must be allocated to satisfy this call to malloc().
3439		// The allocation, an mmap() system call, will be performed outside the magazine spin locks by the first
3440		// thread that suffers the exhaustion. That thread sets "alloc_underway" and enters a critical section.
3441		// Threads arriving here later are excluded from the critical section, yield the CPU, and then retry the
3442		// allocation. After some time the magazine is resupplied, the original thread leaves with its allocation,
3443		// and retry-ing threads succeed in the code just above.
3444		if (!tiny_mag_ptr->alloc_underway) {
3445			void *fresh_region;
3446
3447			// time to create a new region (do this outside the magazine lock)
3448			tiny_mag_ptr->alloc_underway = TRUE;
3449			OSMemoryBarrier();
3450			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3451			fresh_region = allocate_pages_securely(szone, TINY_REGION_SIZE, TINY_BLOCKS_ALIGN, VM_MEMORY_MALLOC_TINY);
3452			SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
3453
3454			MAGMALLOC_ALLOCREGION((void *)szone, (int)mag_index, fresh_region, TINY_REGION_SIZE); // DTrace USDT Probe
3455
3456			if (!fresh_region) { // out of memory!
3457				tiny_mag_ptr->alloc_underway = FALSE;
3458				OSMemoryBarrier();
3459				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3460				return NULL;
3461			}
3462
3463			ptr = tiny_malloc_from_region_no_lock(szone, tiny_mag_ptr, mag_index, msize, fresh_region);
3464
3465			// we don't clear because this freshly allocated space is pristine
3466			tiny_mag_ptr->alloc_underway = FALSE;
3467			OSMemoryBarrier();
3468			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3469			CHECK(szone, __PRETTY_FUNCTION__);
3470			return ptr;
3471		} else {
3472			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3473			yield();
3474			SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
3475		}
3476	}
3477	/* NOTREACHED */
3478}
3479
3480static NOINLINE void
3481free_tiny_botch(szone_t *szone, free_list_t *ptr)
3482{
3483	mag_index_t mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr));
3484	magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
3485	SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3486	szone_error(szone, 1, "double free", ptr, NULL);
3487}
3488
3489static INLINE void
3490free_tiny(szone_t *szone, void *ptr, region_t tiny_region, size_t known_size)
3491{
3492	msize_t	msize;
3493	boolean_t	is_free;
3494	mag_index_t mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(tiny_region);
3495	magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
3496
3497	// ptr is known to be in tiny_region
3498	if (known_size) {
3499		msize = TINY_MSIZE_FOR_BYTES(known_size + TINY_QUANTUM - 1);
3500	} else {
3501		msize = get_tiny_meta_header(ptr, &is_free);
3502		if (is_free) {
3503			free_tiny_botch(szone, ptr);
3504			return;
3505		}
3506	}
3507#if DEBUG_MALLOC
3508	if (!msize) {
3509		malloc_printf("*** free_tiny() block in use is too large: %p\n", ptr);
3510		return;
3511	}
3512#endif
3513
3514	SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
3515
3516#if TINY_CACHE
3517	// Depot does not participate in TINY_CACHE since it can't be directly malloc()'d
3518	if (DEPOT_MAGAZINE_INDEX != mag_index) {
3519		if (msize < TINY_QUANTUM) { // to see if the bits fit in the last 4 bits
3520			void *ptr2 = tiny_mag_ptr->mag_last_free; // Might be NULL
3521			region_t rgn2 = tiny_mag_ptr->mag_last_free_rgn;
3522
3523			/* check that we don't already have this pointer in the cache */
3524			if (ptr == (void *)((uintptr_t)ptr2 & ~ (TINY_QUANTUM - 1))) {
3525				free_tiny_botch(szone, ptr);
3526				return;
3527			}
3528
3529			if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && msize)
3530				memset(ptr, SCRABBLE_BYTE, TINY_BYTES_FOR_MSIZE(msize));
3531
3532			tiny_mag_ptr->mag_last_free = (void *)(((uintptr_t)ptr) | msize);
3533			tiny_mag_ptr->mag_last_free_rgn = tiny_region;
3534
3535			if (!ptr2) {
3536				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3537				CHECK(szone, __PRETTY_FUNCTION__);
3538				return;
3539			}
3540
3541			msize = (uintptr_t)ptr2 & (TINY_QUANTUM - 1);
3542			ptr = (void *)(((uintptr_t)ptr2) & ~(TINY_QUANTUM - 1));
3543			tiny_region = rgn2;
3544		}
3545	}
3546#endif /* TINY_CACHE */
3547
3548	// Now in the time it took to acquire the lock, the region may have migrated
3549	// from one magazine to another. I.e. trailer->mag_index is volatile.
3550	// In which case the magazine lock we obtained (namely magazines[mag_index].mag_lock)
3551	// is stale. If so, keep on tryin' ...
3552	region_trailer_t *trailer = REGION_TRAILER_FOR_TINY_REGION(tiny_region);
3553	mag_index_t refreshed_index;
3554
3555	while (mag_index != (refreshed_index = trailer->mag_index)) { // Note assignment
3556
3557		SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3558
3559		mag_index = refreshed_index;
3560		tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
3561		SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
3562	}
3563
3564	if (tiny_free_no_lock(szone, tiny_mag_ptr, mag_index, tiny_region, ptr, msize))
3565		SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3566
3567	CHECK(szone, __PRETTY_FUNCTION__);
3568}
3569
3570static void
3571print_tiny_free_list(szone_t *szone)
3572{
3573	free_list_t	*ptr;
3574	_SIMPLE_STRING b = _simple_salloc();
3575	mag_index_t mag_index;
3576
3577	if (b) {
3578		_simple_sappend(b, "tiny free sizes:\n");
3579		for (mag_index = -1; mag_index < szone->num_tiny_magazines; mag_index++) {
3580			grain_t	slot = 0;
3581			_simple_sprintf(b,"\tMagazine %d: ", mag_index);
3582			while (slot < NUM_TINY_SLOTS) {
3583				ptr = szone->tiny_magazines[mag_index].mag_free_list[slot];
3584				if (ptr) {
3585					_simple_sprintf(b, "%s%y[%d]; ", (slot == NUM_TINY_SLOTS-1) ? ">=" : "",
3586									(slot+1)*TINY_QUANTUM, free_list_count(szone, ptr));
3587				}
3588				slot++;
3589			}
3590			_simple_sappend(b,"\n");
3591		}
3592		_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
3593		_simple_sfree(b);
3594	}
3595}
3596
3597static void
3598print_tiny_region(boolean_t verbose, region_t region, size_t bytes_at_start, size_t bytes_at_end)
3599{
3600	unsigned		counts[1024];
3601	unsigned		in_use = 0;
3602	uintptr_t		start = (uintptr_t)TINY_REGION_ADDRESS(region);
3603	uintptr_t		current = start + bytes_at_end;
3604	uintptr_t		limit =  (uintptr_t)TINY_REGION_END(region) - bytes_at_end;
3605	boolean_t		is_free;
3606	msize_t		msize;
3607	unsigned		ci;
3608	_SIMPLE_STRING	b;
3609	uintptr_t		pgTot = 0;
3610
3611	if (region == HASHRING_REGION_DEALLOCATED) {
3612		if ((b = _simple_salloc()) != NULL) {
3613			_simple_sprintf(b, "Tiny region [unknown address] was returned to the OS\n");
3614			_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
3615			_simple_sfree(b);
3616		}
3617		return;
3618	}
3619
3620	memset(counts, 0, sizeof(counts));
3621	while (current < limit) {
3622		msize = get_tiny_meta_header((void *)current, &is_free);
3623		if (is_free & !msize && (current == start)) {
3624			// first block is all free
3625			uintptr_t pgLo = round_page_quanta(start + sizeof(free_list_t) + sizeof(msize_t));
3626			uintptr_t pgHi = trunc_page_quanta(start + TINY_REGION_SIZE - sizeof(msize_t));
3627
3628			if (pgLo < pgHi) {
3629				pgTot += (pgHi - pgLo);
3630			}
3631			break;
3632		}
3633		if (!msize) {
3634			malloc_printf("*** error with %p: msize=%d\n", (void *)current, (unsigned)msize);
3635			break;
3636		}
3637		if (!is_free) {
3638			// block in use
3639			if (msize > NUM_TINY_SLOTS)
3640				malloc_printf("*** error at %p msize for in_use is %d\n", (void *)current, msize);
3641			if (msize < 1024)
3642				counts[msize]++;
3643			in_use++;
3644		} else {
3645			uintptr_t pgLo = round_page_quanta(current + sizeof(free_list_t) + sizeof(msize_t));
3646			uintptr_t pgHi = trunc_page_quanta(current + TINY_BYTES_FOR_MSIZE(msize) - sizeof(msize_t));
3647
3648			if (pgLo < pgHi) {
3649				pgTot += (pgHi - pgLo);
3650			}
3651		}
3652		current += TINY_BYTES_FOR_MSIZE(msize);
3653	}
3654	if ((b = _simple_salloc()) != NULL) {
3655		_simple_sprintf(b, "Tiny region [%p-%p, %y] \t", (void *)start, TINY_REGION_END(region), (int)TINY_REGION_SIZE);
3656		_simple_sprintf(b, "Magazine=%d \t", MAGAZINE_INDEX_FOR_TINY_REGION(region));
3657		_simple_sprintf(b, "Allocations in use=%d \t Bytes in use=%ly \t", in_use, BYTES_USED_FOR_TINY_REGION(region));
3658		if (bytes_at_end || bytes_at_start)
3659			_simple_sprintf(b, "Untouched=%ly ", bytes_at_end + bytes_at_start);
3660		if (DEPOT_MAGAZINE_INDEX == MAGAZINE_INDEX_FOR_TINY_REGION(region)) {
3661			_simple_sprintf(b, "Advised MADV_FREE=%ly", pgTot);
3662		} else {
3663			_simple_sprintf(b, "Fragments subject to reclamation=%ly", pgTot);
3664		}
3665		if (verbose && in_use) {
3666			_simple_sappend(b, "\n\tSizes in use: ");
3667			for (ci = 0; ci < 1024; ci++)
3668				if (counts[ci])
3669					_simple_sprintf(b, "%d[%d] ", TINY_BYTES_FOR_MSIZE(ci), counts[ci]);
3670		}
3671		_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
3672		_simple_sfree(b);
3673	}
3674}
3675
3676static boolean_t
3677tiny_free_list_check(szone_t *szone, grain_t slot)
3678{
3679	mag_index_t mag_index;
3680
3681	for (mag_index = -1; mag_index < szone->num_tiny_magazines; mag_index++) {
3682		magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
3683		SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
3684
3685		unsigned	count = 0;
3686		free_list_t	*ptr = szone->tiny_magazines[mag_index].mag_free_list[slot];
3687		boolean_t	is_free;
3688		free_list_t	*previous = NULL;
3689
3690		while (ptr) {
3691			is_free = tiny_meta_header_is_free(ptr);
3692			if (! is_free) {
3693				malloc_printf("*** in-use ptr in free list slot=%d count=%d ptr=%p\n", slot, count, ptr);
3694				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3695				return 0;
3696			}
3697			if (((uintptr_t)ptr) & (TINY_QUANTUM - 1)) {
3698				malloc_printf("*** unaligned ptr in free list slot=%d  count=%d ptr=%p\n", slot, count, ptr);
3699				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3700				return 0;
3701			}
3702			if (!tiny_region_for_ptr_no_lock(szone, ptr)) {
3703				malloc_printf("*** ptr not in szone slot=%d  count=%d ptr=%p\n", slot, count, ptr);
3704				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3705				return 0;
3706			}
3707			if (free_list_unchecksum_ptr(szone, &ptr->previous) != previous) {
3708				malloc_printf("*** previous incorrectly set slot=%d  count=%d ptr=%p\n", slot, count, ptr);
3709				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3710				return 0;
3711			}
3712			previous = ptr;
3713			ptr = free_list_unchecksum_ptr(szone, &ptr->next);
3714			count++;
3715		}
3716
3717		SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
3718	}
3719	return 1;
3720}
3721
3722/*********************	SMALL FREE LIST UTILITIES	************************/
3723
3724/*
3725 * Mark a block as free.  Only the first quantum of a block is marked thusly,
3726 * the remainder are marked "middle".
3727 */
3728static INLINE void
3729small_meta_header_set_is_free(msize_t *meta_headers, unsigned index, msize_t msize)
3730{
3731	meta_headers[index] = msize | SMALL_IS_FREE;
3732}
3733
3734/*
3735 * Mark a block as in use.  Only the first quantum of a block is marked thusly,
3736 * the remainder are marked "middle".
3737 */
3738static INLINE void
3739small_meta_header_set_in_use(msize_t *meta_headers, msize_t index, msize_t msize)
3740{
3741	meta_headers[index] = msize;
3742}
3743
3744/*
3745 * Mark a quantum as being the second or later in a block.
3746 */
3747static INLINE void
3748small_meta_header_set_middle(msize_t *meta_headers, msize_t index)
3749{
3750	meta_headers[index] = 0;
3751}
3752
3753/*
3754 * Adds an item to the proper free list, and also marks the meta-header of the
3755 * block properly.
3756 * Assumes szone has been locked
3757 */
3758static void
3759small_free_list_add_ptr(szone_t *szone, magazine_t *small_mag_ptr, void *ptr, msize_t msize)
3760{
3761	grain_t	slot = (msize <= szone->num_small_slots) ? msize - 1 : szone->num_small_slots - 1;
3762	free_list_t	*free_ptr = ptr;
3763	free_list_t	*free_head = small_mag_ptr->mag_free_list[slot];
3764	void	*follower;
3765
3766#if DEBUG_MALLOC
3767	if (LOG(szone,ptr)) {
3768		malloc_printf("in %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
3769	}
3770	if (((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) {
3771		szone_error(szone, 1, "small_free_list_add_ptr: Unaligned ptr", ptr, NULL);
3772	}
3773#endif
3774	small_meta_header_set_is_free(SMALL_META_HEADER_FOR_PTR(ptr), SMALL_META_INDEX_FOR_PTR(ptr), msize);
3775
3776	if (free_head) {
3777#if DEBUG_MALLOC
3778		if (free_list_unchecksum_ptr(szone, &free_head->previous)) {
3779			szone_error(szone, 1, "small_free_list_add_ptr: Internal invariant broken (free_head->previous)", ptr,
3780						"ptr=%p slot=%d free_head=%p previous=%p\n", ptr, slot, (void *)free_head, free_head->previous.p);
3781		}
3782		if (!SMALL_PTR_IS_FREE(free_head)) {
3783			szone_error(szone, 1, "small_free_list_add_ptr: Internal invariant broken (free_head is not a free pointer)", ptr,
3784						"ptr=%p slot=%d free_head=%p\n", ptr, slot, (void *)free_head);
3785		}
3786#endif
3787		free_head->previous.u = free_list_checksum_ptr(szone, free_ptr);
3788	} else {
3789		BITMAPN_SET(small_mag_ptr->mag_bitmap, slot);
3790	}
3791	free_ptr->previous.u = free_list_checksum_ptr(szone, NULL);
3792	free_ptr->next.u = free_list_checksum_ptr(szone, free_head);
3793
3794	small_mag_ptr->mag_free_list[slot] = free_ptr;
3795
3796	// Store msize at the end of the block denoted by "ptr" (i.e. at a negative offset from "follower")
3797	follower = (void *)((uintptr_t)ptr + SMALL_BYTES_FOR_MSIZE(msize));
3798	SMALL_PREVIOUS_MSIZE(follower) = msize;
3799}
3800
3801/*
3802 * Removes the item pointed to by ptr in the proper free list.
3803 * Assumes szone has been locked
3804 */
3805static void
3806small_free_list_remove_ptr(szone_t *szone, magazine_t *small_mag_ptr, void *ptr, msize_t msize)
3807{
3808	grain_t	slot = (msize <= szone->num_small_slots) ? msize - 1 : szone->num_small_slots - 1;
3809	free_list_t	*free_ptr = ptr, *next, *previous;
3810
3811	next = free_list_unchecksum_ptr(szone, &free_ptr->next);
3812	previous = free_list_unchecksum_ptr(szone, &free_ptr->previous);
3813
3814#if DEBUG_MALLOC
3815	if (LOG(szone,ptr)) {
3816		malloc_printf("In %s, ptr=%p, msize=%d\n", __FUNCTION__, ptr, msize);
3817	}
3818#endif
3819
3820	if (!previous) {
3821		// The block to remove is the head of the free list
3822#if DEBUG_MALLOC
3823		if (small_mag_ptr->mag_free_list[slot] != ptr) {
3824			szone_error(szone, 1, "small_free_list_remove_ptr: Internal invariant broken (small_mag_ptr->mag_free_list[slot])", ptr,
3825						"ptr=%p slot=%d msize=%d small_mag_ptr->mag_free_list[slot]=%p\n",
3826						ptr, slot, msize, (void *)small_mag_ptr->mag_free_list[slot]);
3827			return;
3828		}
3829#endif
3830		small_mag_ptr->mag_free_list[slot] = next;
3831		if (!next) BITMAPN_CLR(small_mag_ptr->mag_bitmap, slot);
3832	} else {
3833		// We know free_ptr is already checksummed, so we don't need to do it
3834		// again.
3835		previous->next = free_ptr->next;
3836	}
3837	if (next) {
3838		// We know free_ptr is already checksummed, so we don't need to do it
3839		// again.
3840		next->previous = free_ptr->previous;
3841	}
3842}
3843
3844/*
3845 * small_region_for_ptr_no_lock - Returns the small region containing the pointer,
3846 * or NULL if not found.
3847 */
3848static INLINE region_t
3849small_region_for_ptr_no_lock(szone_t *szone, const void *ptr)
3850{
3851	rgnhdl_t r = hash_lookup_region_no_lock(szone->small_region_generation->hashed_regions,
3852											szone->small_region_generation->num_regions_allocated,
3853											szone->small_region_generation->num_regions_allocated_shift,
3854											SMALL_REGION_FOR_PTR(ptr));
3855	return r ? *r : r;
3856}
3857
3858static void
3859small_finalize_region(szone_t *szone, magazine_t *small_mag_ptr) {
3860	void      *last_block, *previous_block;
3861	msize_t   last_msize, previous_msize, last_index;
3862
3863	// It is possible that the block prior to the last block in the region has
3864	// been free'd, but was not coalesced with the free bytes at the end of the
3865	// block, since we treat the bytes at the end of the region as "in use" in
3866	// the meta headers. Attempt to coalesce the last block with the previous
3867	// block, so we don't violate the "no consecutive free blocks" invariant.
3868	//
3869	// FIXME: If we could calculate the previous small free size in the same
3870	//        manner as tiny_previous_preceding_free, it would eliminate the
3871	//        index & previous msize checks, which are a guard against reading
3872	//        bogus data out of in-use or written-on-freed memory.
3873	//
3874	// FIXME: Need to investigate how much work would be required to increase
3875	//        'mag_bytes_free_at_end' when freeing the preceding block, rather
3876	//        than performing this workaround.
3877	//
3878	if (small_mag_ptr->mag_bytes_free_at_end) {
3879		last_block = SMALL_REGION_END(small_mag_ptr->mag_last_region) - small_mag_ptr->mag_bytes_free_at_end;
3880		last_msize = SMALL_MSIZE_FOR_BYTES(small_mag_ptr->mag_bytes_free_at_end);
3881
3882		last_index = SMALL_META_INDEX_FOR_PTR(last_block);
3883		previous_msize = SMALL_PREVIOUS_MSIZE(last_block);
3884
3885		if (last_index && (previous_msize <= last_index)) {
3886			previous_block = (void *)((uintptr_t)last_block - SMALL_BYTES_FOR_MSIZE(previous_msize));
3887			if (*SMALL_METADATA_FOR_PTR(previous_block) == (previous_msize | SMALL_IS_FREE)) {
3888				msize_t *meta_headers = SMALL_META_HEADER_FOR_PTR(last_block);
3889
3890				small_meta_header_set_middle(meta_headers, last_index);
3891				small_free_list_remove_ptr(szone, small_mag_ptr, previous_block, previous_msize);
3892				last_block = (void *)((uintptr_t)last_block - SMALL_BYTES_FOR_MSIZE(previous_msize));
3893				last_msize += previous_msize;
3894			}
3895		}
3896
3897		// splice last_block into the free list
3898		small_free_list_add_ptr(szone, small_mag_ptr, last_block, last_msize);
3899		small_mag_ptr->mag_bytes_free_at_end = 0;
3900	}
3901
3902#if ASLR_INTERNAL
3903	if (small_mag_ptr->mag_bytes_free_at_start) {
3904		last_block = SMALL_REGION_ADDRESS(small_mag_ptr->mag_last_region);
3905		last_msize = SMALL_MSIZE_FOR_BYTES(small_mag_ptr->mag_bytes_free_at_start);
3906
3907		void *next_block = (void *) ((uintptr_t)last_block + small_mag_ptr->mag_bytes_free_at_start);
3908		if (SMALL_PTR_IS_FREE(next_block)) {
3909			msize_t next_msize = SMALL_PTR_SIZE(next_block);
3910
3911			small_meta_header_set_middle(SMALL_META_HEADER_FOR_PTR(next_block), SMALL_META_INDEX_FOR_PTR(next_block));
3912			small_free_list_remove_ptr(szone, small_mag_ptr, next_block, next_msize);
3913			last_msize += next_msize;
3914		}
3915
3916		// splice last_block into the free list
3917		small_free_list_add_ptr(szone, small_mag_ptr, last_block, last_msize);
3918		small_mag_ptr->mag_bytes_free_at_start = 0;
3919	}
3920#endif
3921
3922	// TODO: Will we ever need to coalesce the blocks at the beginning and end when we finalize?
3923
3924	small_mag_ptr->mag_last_region = NULL;
3925}
3926
3927static int
3928small_free_detach_region(szone_t *szone, magazine_t *small_mag_ptr, region_t r) {
3929	unsigned char	*ptr = SMALL_REGION_ADDRESS(r);
3930	msize_t		*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
3931	uintptr_t		start = (uintptr_t)SMALL_REGION_ADDRESS(r);
3932	uintptr_t		current = start;
3933	uintptr_t		limit =  (uintptr_t)SMALL_REGION_END(r);
3934	int			total_alloc = 0;
3935
3936	while (current < limit) {
3937		unsigned	index = SMALL_META_INDEX_FOR_PTR(current);
3938		msize_t		msize_and_free = meta_headers[index];
3939		boolean_t	is_free = msize_and_free & SMALL_IS_FREE;
3940		msize_t		msize = msize_and_free & ~ SMALL_IS_FREE;
3941
3942		if (!msize) {
3943#if DEBUG_MALLOC
3944			malloc_printf("*** small_free_detach_region error with %p: msize=%d is_free =%d\n",
3945						  (void *)current, msize, is_free);
3946#endif
3947			break;
3948		}
3949		if (is_free) {
3950			small_free_list_remove_ptr(szone, small_mag_ptr, (void *)current, msize);
3951		} else {
3952			total_alloc++;
3953		}
3954		current += SMALL_BYTES_FOR_MSIZE(msize);
3955	}
3956	return total_alloc;
3957}
3958
3959static size_t
3960small_free_reattach_region(szone_t *szone, magazine_t *small_mag_ptr, region_t r) {
3961	unsigned char	*ptr = SMALL_REGION_ADDRESS(r);
3962	msize_t		*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
3963	uintptr_t		start = (uintptr_t)SMALL_REGION_ADDRESS(r);
3964	uintptr_t		current = start;
3965	uintptr_t		limit =  (uintptr_t)SMALL_REGION_END(r);
3966	size_t		total_alloc = 0;
3967
3968	while (current < limit) {
3969		unsigned	index = SMALL_META_INDEX_FOR_PTR(current);
3970		msize_t		msize_and_free = meta_headers[index];
3971		boolean_t	is_free = msize_and_free & SMALL_IS_FREE;
3972		msize_t		msize = msize_and_free & ~ SMALL_IS_FREE;
3973
3974		if (!msize) {
3975#if DEBUG_MALLOC
3976			malloc_printf("*** small_free_reattach_region error with %p: msize=%d is_free =%d\n",
3977						  (void *)current, msize, is_free);
3978#endif
3979			break;
3980		}
3981		if (is_free) {
3982			small_free_list_add_ptr(szone, small_mag_ptr, (void *)current, msize);
3983		} else {
3984			total_alloc += SMALL_BYTES_FOR_MSIZE(msize);
3985		}
3986		current += SMALL_BYTES_FOR_MSIZE(msize);
3987	}
3988	return total_alloc;
3989}
3990
3991typedef struct {
3992	uint16_t pnum, size;
3993} small_pg_pair_t;
3994
3995static void NOINLINE /* want private stack frame for automatic array */
3996small_free_scan_madvise_free(szone_t *szone, magazine_t *depot_ptr, region_t r) {
3997	uintptr_t	start = (uintptr_t)SMALL_REGION_ADDRESS(r);
3998	uintptr_t	current = start;
3999	uintptr_t	limit =  (uintptr_t)SMALL_REGION_END(r);
4000	msize_t	*meta_headers = SMALL_META_HEADER_FOR_PTR(start);
4001	small_pg_pair_t advisory[((SMALL_REGION_PAYLOAD_BYTES + vm_page_quanta_size - 1) >> vm_page_quanta_shift) >> 1]; // 4096bytes stack allocated
4002	int		advisories = 0;
4003
4004	// Scan the metadata identifying blocks which span one or more pages. Mark the pages MADV_FREE taking care to preserve free list
4005	// management data.
4006	while (current < limit) {
4007		unsigned	index = SMALL_META_INDEX_FOR_PTR(current);
4008		msize_t		msize_and_free = meta_headers[index];
4009		boolean_t	is_free = msize_and_free & SMALL_IS_FREE;
4010		msize_t		msize = msize_and_free & ~ SMALL_IS_FREE;
4011
4012		if (is_free && !msize && (current == start)) {
4013#if DEBUG_MALLOC
4014			// first block is all free
4015			malloc_printf("*** small_free_scan_madvise_free first block is all free! %p: msize=%d is_free =%d\n",
4016						  (void *)current, msize, is_free);
4017#endif
4018			uintptr_t pgLo = round_page_quanta(start + sizeof(free_list_t) + sizeof(msize_t));
4019			uintptr_t pgHi = trunc_page_quanta(start + SMALL_REGION_SIZE - sizeof(msize_t));
4020
4021			if (pgLo < pgHi) {
4022				advisory[advisories].pnum = (pgLo - start) >> vm_page_quanta_shift;
4023				advisory[advisories].size = (pgHi - pgLo) >> vm_page_quanta_shift;
4024				advisories++;
4025			}
4026			break;
4027		}
4028		if (!msize) {
4029#if DEBUG_MALLOC
4030			malloc_printf("*** small_free_scan_madvise_free error with %p: msize=%d is_free =%d\n",
4031						  (void *)current, msize, is_free);
4032#endif
4033			break;
4034		}
4035		if (is_free) {
4036			uintptr_t pgLo = round_page_quanta(current + sizeof(free_list_t) + sizeof(msize_t));
4037			uintptr_t pgHi = trunc_page_quanta(current + SMALL_BYTES_FOR_MSIZE(msize) - sizeof(msize_t));
4038
4039			if (pgLo < pgHi) {
4040				advisory[advisories].pnum = (pgLo - start) >> vm_page_quanta_shift;
4041				advisory[advisories].size = (pgHi - pgLo) >> vm_page_quanta_shift;
4042				advisories++;
4043			}
4044		}
4045		current += SMALL_BYTES_FOR_MSIZE(msize);
4046	}
4047
4048	if (advisories > 0) {
4049		int i;
4050
4051		OSAtomicIncrement32Barrier(&(REGION_TRAILER_FOR_SMALL_REGION(r)->pinned_to_depot));
4052		SZONE_MAGAZINE_PTR_UNLOCK(szone, depot_ptr);
4053		for (i = 0; i < advisories; ++i) {
4054			uintptr_t addr = (advisory[i].pnum << vm_page_quanta_shift) + start;
4055			size_t size = advisory[i].size << vm_page_quanta_shift;
4056
4057			madvise_free_range(szone, r, addr, addr + size, NULL);
4058		}
4059		SZONE_MAGAZINE_PTR_LOCK(szone, depot_ptr);
4060		OSAtomicDecrement32Barrier(&(REGION_TRAILER_FOR_SMALL_REGION(r)->pinned_to_depot));
4061	}
4062}
4063
4064static region_t
4065small_free_try_depot_unmap_no_lock(szone_t *szone, magazine_t *depot_ptr, region_trailer_t *node)
4066{
4067	if (0 < node->bytes_used ||
4068		0 < node->pinned_to_depot ||
4069		depot_ptr->recirculation_entries < (szone->num_small_magazines * 2)) {
4070		return NULL;
4071	}
4072
4073	// disconnect first node from Depot
4074	recirc_list_extract(szone, depot_ptr, node);
4075
4076	// Iterate the region pulling its free entries off the (locked) Depot's free list
4077	region_t sparse_region = SMALL_REGION_FOR_PTR(node);
4078	int objects_in_use = small_free_detach_region(szone, depot_ptr, sparse_region);
4079
4080	if (0 == objects_in_use) {
4081		// Invalidate the hash table entry for this region with HASHRING_REGION_DEALLOCATED.
4082		// Using HASHRING_REGION_DEALLOCATED preserves the collision chain, using HASHRING_OPEN_ENTRY (0) would not.
4083		rgnhdl_t pSlot = hash_lookup_region_no_lock(szone->small_region_generation->hashed_regions,
4084													szone->small_region_generation->num_regions_allocated,
4085													szone->small_region_generation->num_regions_allocated_shift, sparse_region);
4086		if (NULL == pSlot) {
4087			szone_error(szone, 1, "small_free_try_depot_unmap_no_lock hash lookup failed:", NULL, "%p\n", sparse_region);
4088			return NULL;
4089		}
4090		*pSlot = HASHRING_REGION_DEALLOCATED;
4091		depot_ptr->num_bytes_in_magazine -= SMALL_REGION_PAYLOAD_BYTES;
4092		__sync_fetch_and_add( &(szone->num_small_regions_dealloc), 1); // Atomically increment num_small_regions_dealloc
4093
4094		// Caller will transfer ownership of the region back to the OS with no locks held
4095		MAGMALLOC_DEALLOCREGION((void *)szone, (void *)sparse_region, SMALL_REGION_SIZE); // DTrace USDT Probe
4096		return sparse_region;
4097
4098	} else {
4099		szone_error(szone, 1, "small_free_try_depot_unmap_no_lock objects_in_use not zero:", NULL, "%d\n", objects_in_use);
4100		return NULL;
4101	}
4102}
4103
4104static boolean_t
4105small_free_do_recirc_to_depot(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index)
4106{
4107	// The entire magazine crossed the "emptiness threshold". Transfer a region
4108	// from this magazine to the Depot. Choose a region that itself has crossed the emptiness threshold (i.e
4109	// is at least fraction "f" empty.) Such a region will be marked "suitable" on the recirculation list.
4110	region_trailer_t *node = small_mag_ptr->firstNode;
4111
4112	while (node && !node->recirc_suitable) {
4113		node = node->next;
4114	}
4115
4116	if (NULL == node) {
4117#if DEBUG_MALLOC
4118		malloc_printf("*** small_free_do_recirc_to_depot end of list\n");
4119#endif
4120		return TRUE; // Caller must SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
4121	}
4122
4123	region_t sparse_region = SMALL_REGION_FOR_PTR(node);
4124
4125	// Deal with unclaimed memory -- mag_bytes_free_at_end or mag_bytes_free_at start
4126	if (sparse_region == small_mag_ptr->mag_last_region && (small_mag_ptr->mag_bytes_free_at_end || small_mag_ptr->mag_bytes_free_at_start)) {
4127		small_finalize_region(szone, small_mag_ptr);
4128	}
4129
4130	// disconnect "suitable" node from magazine
4131	recirc_list_extract(szone, small_mag_ptr, node);
4132
4133	// Iterate the region pulling its free entries off its (locked) magazine's free list
4134	int objects_in_use = small_free_detach_region(szone, small_mag_ptr, sparse_region);
4135	magazine_t *depot_ptr = &(szone->small_magazines[DEPOT_MAGAZINE_INDEX]);
4136
4137	// hand over the region to the (locked) Depot
4138	SZONE_MAGAZINE_PTR_LOCK(szone,depot_ptr);
4139	// this will cause small_free_list_add_ptr called by small_free_reattach_region to use
4140	// the depot as its target magazine, rather than magazine formerly associated with sparse_region
4141	MAGAZINE_INDEX_FOR_SMALL_REGION(sparse_region) = DEPOT_MAGAZINE_INDEX;
4142	node->pinned_to_depot = 0;
4143
4144	// Iterate the region putting its free entries on Depot's free list
4145	size_t bytes_inplay = small_free_reattach_region(szone, depot_ptr, sparse_region);
4146
4147	small_mag_ptr->mag_num_bytes_in_objects -= bytes_inplay;
4148	small_mag_ptr->num_bytes_in_magazine -= SMALL_REGION_PAYLOAD_BYTES;
4149	small_mag_ptr->mag_num_objects -= objects_in_use;
4150
4151	SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr); // Unlock the originating magazine
4152
4153	depot_ptr->mag_num_bytes_in_objects += bytes_inplay;
4154	depot_ptr->num_bytes_in_magazine += SMALL_REGION_PAYLOAD_BYTES;
4155	depot_ptr->mag_num_objects += objects_in_use;
4156
4157	// connect to Depot as last node
4158	recirc_list_splice_last(szone, depot_ptr, node);
4159
4160	MAGMALLOC_RECIRCREGION((void *)szone, (int)mag_index, (void *)sparse_region, SMALL_REGION_SIZE,
4161						   (int)BYTES_USED_FOR_SMALL_REGION(sparse_region)); // DTrace USDT Probe
4162
4163	// Mark free'd dirty pages with MADV_FREE to reduce memory pressure
4164	small_free_scan_madvise_free(szone, depot_ptr, sparse_region);
4165
4166	// If the region is entirely empty vm_deallocate() it outside the depot lock
4167	region_t r_dealloc = small_free_try_depot_unmap_no_lock(szone, depot_ptr, node);
4168	SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
4169	if (r_dealloc)
4170		deallocate_pages(szone, r_dealloc, SMALL_REGION_SIZE, 0);
4171	return FALSE; // Caller need not unlock the originating magazine
4172}
4173
4174static region_t
4175small_find_msize_region(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index, msize_t msize)
4176{
4177	free_list_t		*ptr;
4178	grain_t		slot = (msize <= szone->num_small_slots) ? msize - 1 : szone->num_small_slots - 1;
4179	free_list_t		**free_list = small_mag_ptr->mag_free_list;
4180	free_list_t		**the_slot = free_list + slot;
4181	free_list_t		**limit;
4182	unsigned		bitmap;
4183
4184	// Assumes we've locked the magazine
4185	CHECK_MAGAZINE_PTR_LOCKED(szone, small_mag_ptr, __PRETTY_FUNCTION__);
4186
4187	// Look for an exact match by checking the freelist for this msize.
4188	ptr = *the_slot;
4189	if (ptr)
4190		return SMALL_REGION_FOR_PTR(ptr);
4191
4192	// Mask off the bits representing slots holding free blocks smaller than
4193	// the size we need.
4194	if (szone->is_largemem) {
4195		// BITMAPN_CTZ implementation
4196		unsigned idx = slot >> 5;
4197		bitmap = 0;
4198		unsigned mask = ~ ((1 << (slot & 31)) - 1);
4199		for ( ; idx < SMALL_BITMAP_WORDS; ++idx ) {
4200			bitmap = small_mag_ptr->mag_bitmap[idx] & mask;
4201			if (bitmap != 0)
4202				break;
4203			mask = ~0U;
4204		}
4205		// Check for fallthrough: No bits set in bitmap
4206		if ((bitmap == 0) && (idx == SMALL_BITMAP_WORDS))
4207			return NULL;
4208
4209		// Start looking at the first set bit, plus 32 bits for every word of
4210		// zeroes or entries that were too small.
4211		slot = BITMAP32_CTZ((&bitmap)) + (idx * 32);
4212	} else {
4213		bitmap = small_mag_ptr->mag_bitmap[0] & ~ ((1 << slot) - 1);
4214		if (!bitmap)
4215			return NULL;
4216
4217		slot = BITMAP32_CTZ((&bitmap));
4218	}
4219	limit = free_list + szone->num_small_slots - 1;
4220	free_list += slot;
4221
4222	if (free_list < limit) {
4223		ptr = *free_list;
4224		if (ptr)
4225			return SMALL_REGION_FOR_PTR(ptr);
4226		else {
4227			/* Shouldn't happen. Fall through to look at last slot. */
4228#if DEBUG_MALLOC
4229			malloc_printf("in small_malloc_from_free_list(), mag_bitmap out of sync, slot=%d\n",slot);
4230#endif
4231		}
4232	}
4233
4234	// We are now looking at the last slot, which contains blocks equal to, or
4235	// due to coalescing of free blocks, larger than (num_small_slots - 1) * (small quantum size).
4236	ptr = *limit;
4237	if (ptr)
4238		return SMALL_REGION_FOR_PTR(ptr);
4239
4240	return NULL;
4241}
4242
4243static boolean_t
4244small_get_region_from_depot(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index, msize_t msize)
4245{
4246	magazine_t *depot_ptr = &(szone->small_magazines[DEPOT_MAGAZINE_INDEX]);
4247
4248	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
4249	if (szone->num_small_magazines == 1) // Uniprocessor, single magazine, so no recirculation necessary
4250		return 0;
4251
4252#if DEBUG_MALLOC
4253	if (DEPOT_MAGAZINE_INDEX == mag_index) {
4254		szone_error(szone, 1, "small_get_region_from_depot called for magazine index -1", NULL, NULL);
4255		return 0;
4256	}
4257#endif
4258
4259	SZONE_MAGAZINE_PTR_LOCK(szone,depot_ptr);
4260
4261	// Appropriate a Depot'd region that can satisfy requested msize.
4262	region_trailer_t *node;
4263	region_t sparse_region;
4264
4265	while (1) {
4266		sparse_region = small_find_msize_region(szone, depot_ptr, DEPOT_MAGAZINE_INDEX, msize);
4267		if (NULL == sparse_region) { // Depot empty?
4268			SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
4269			return 0;
4270		}
4271
4272		node = REGION_TRAILER_FOR_SMALL_REGION(sparse_region);
4273		if (0 >= node->pinned_to_depot)
4274			break;
4275
4276		SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
4277		yield();
4278		SZONE_MAGAZINE_PTR_LOCK(szone,depot_ptr);
4279	}
4280
4281	// disconnect node from Depot
4282	recirc_list_extract(szone, depot_ptr, node);
4283
4284	// Iterate the region pulling its free entries off the (locked) Depot's free list
4285	int objects_in_use = small_free_detach_region(szone, depot_ptr, sparse_region);
4286
4287	// Transfer ownership of the region
4288	MAGAZINE_INDEX_FOR_SMALL_REGION(sparse_region) = mag_index;
4289	node->pinned_to_depot = 0;
4290
4291	// Iterate the region putting its free entries on its new (locked) magazine's free list
4292	size_t bytes_inplay = small_free_reattach_region(szone, small_mag_ptr, sparse_region);
4293
4294	depot_ptr->mag_num_bytes_in_objects -= bytes_inplay;
4295	depot_ptr->num_bytes_in_magazine -= SMALL_REGION_PAYLOAD_BYTES;
4296	depot_ptr->mag_num_objects -= objects_in_use;
4297
4298	small_mag_ptr->mag_num_bytes_in_objects += bytes_inplay;
4299	small_mag_ptr->num_bytes_in_magazine += SMALL_REGION_PAYLOAD_BYTES;
4300	small_mag_ptr->mag_num_objects += objects_in_use;
4301
4302	// connect to magazine as first node
4303	recirc_list_splice_first(szone, small_mag_ptr, node);
4304
4305	SZONE_MAGAZINE_PTR_UNLOCK(szone,depot_ptr);
4306
4307	// madvise() outside the Depot lock
4308	(void)madvise_reuse_range(szone, sparse_region, sparse_region, sparse_region+SMALL_REGION_PAYLOAD_BYTES);
4309
4310	MAGMALLOC_DEPOTREGION((void *)szone, (int)mag_index, (void *)sparse_region, SMALL_REGION_SIZE,
4311						  (int)BYTES_USED_FOR_SMALL_REGION(sparse_region)); // DTrace USDT Probe
4312
4313	return 1;
4314}
4315
4316#define K 1.5 // headroom measured in number of 8Mb regions
4317#define DENSITY_THRESHOLD(a) \
4318	((a) - ((a) >> 2)) // "Emptiness" f = 0.25, so "Density" is (1 - f)*a. Generally: ((a) - ((a) >> -log2(f)))
4319
4320static INLINE boolean_t
4321small_free_no_lock(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index, region_t region, void *ptr, msize_t msize)
4322{
4323	msize_t		*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
4324	unsigned		index = SMALL_META_INDEX_FOR_PTR(ptr);
4325	void		*original_ptr = ptr;
4326	size_t		original_size = SMALL_BYTES_FOR_MSIZE(msize);
4327	unsigned char	*next_block = ((unsigned char *)ptr + original_size);
4328	msize_t		next_index = index + msize;
4329	msize_t		previous_msize, next_msize;
4330	void		*previous;
4331
4332#if DEBUG_MALLOC
4333	if (LOG(szone,ptr)) {
4334		malloc_printf("in small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
4335	}
4336	if (!msize) {
4337		szone_error(szone, 1, "trying to free small block that is too small", ptr,
4338					"in small_free_no_lock(), ptr=%p, msize=%d\n", ptr, msize);
4339	}
4340#endif
4341
4342	// We try to coalesce this block with the preceeding one
4343	if (index && (SMALL_PREVIOUS_MSIZE(ptr) <= index)) {
4344		previous_msize = SMALL_PREVIOUS_MSIZE(ptr);
4345		if (meta_headers[index - previous_msize] == (previous_msize | SMALL_IS_FREE)) {
4346			previous = (void *)((uintptr_t)ptr - SMALL_BYTES_FOR_MSIZE(previous_msize));
4347			// previous is really to be coalesced
4348#if DEBUG_MALLOC
4349			if (LOG(szone, ptr) || LOG(szone,previous)) {
4350				malloc_printf("in small_free_no_lock(), coalesced backwards for %p previous=%p\n", ptr, previous);
4351			}
4352#endif
4353			small_free_list_remove_ptr(szone, small_mag_ptr, previous, previous_msize);
4354			small_meta_header_set_middle(meta_headers, index);
4355			ptr = previous;
4356			msize += previous_msize;
4357			index -= previous_msize;
4358		}
4359	}
4360	// We try to coalesce with the next block
4361	if ((next_block < SMALL_REGION_END(region)) && (meta_headers[next_index] & SMALL_IS_FREE)) {
4362		// next block is free, we coalesce
4363		next_msize = meta_headers[next_index] & ~ SMALL_IS_FREE;
4364#if DEBUG_MALLOC
4365		if (LOG(szone,ptr))
4366			malloc_printf("In small_free_no_lock(), for ptr=%p, msize=%d coalesced next block=%p next_msize=%d\n",
4367						  ptr, msize, next_block, next_msize);
4368#endif
4369		small_free_list_remove_ptr(szone, small_mag_ptr, next_block, next_msize);
4370		small_meta_header_set_middle(meta_headers, next_index);
4371		msize += next_msize;
4372	}
4373	if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) {
4374		if (!msize) {
4375			szone_error(szone, 1, "incorrect size information - block header was damaged", ptr, NULL);
4376		} else {
4377			memset(ptr, SCRABBLE_BYTE, SMALL_BYTES_FOR_MSIZE(msize));
4378		}
4379	}
4380	small_free_list_add_ptr(szone, small_mag_ptr, ptr, msize);
4381	small_mag_ptr->mag_num_objects--;
4382	// we use original_size and not msize to avoid double counting the coalesced blocks
4383	small_mag_ptr->mag_num_bytes_in_objects -= original_size;
4384
4385	// Update this region's bytes in use count
4386	region_trailer_t *node = REGION_TRAILER_FOR_SMALL_REGION(region);
4387	size_t bytes_used = node->bytes_used - original_size;
4388	node->bytes_used = bytes_used;
4389
4390#if !TARGET_OS_EMBEDDED // Always madvise for embedded platforms
4391	/* FIXME: Would Uniprocessor benefit from recirc and MADV_FREE? */
4392	if (szone->num_small_magazines == 1) { // Uniprocessor, single magazine, so no recirculation necessary
4393		/* NOTHING */
4394	} else if (DEPOT_MAGAZINE_INDEX != mag_index) {
4395		// Emptiness discriminant
4396		if (bytes_used < DENSITY_THRESHOLD(SMALL_REGION_PAYLOAD_BYTES)) {
4397			/* Region has crossed threshold from density to sparsity. Mark it "suitable" on the
4398			 recirculation candidates list. */
4399			node->recirc_suitable = TRUE;
4400		} else {
4401			/* After this free, we've found the region is still dense, so it must have been even more so before
4402			 the free. That implies the region is already correctly marked. Do nothing. */
4403		}
4404
4405		// Has the entire magazine crossed the "emptiness threshold"? If so, transfer a region
4406		// from this magazine to the Depot. Choose a region that itself has crossed the emptiness threshold (i.e
4407		// is at least fraction "f" empty.) Such a region will be marked "suitable" on the recirculation list.
4408
4409		size_t a = small_mag_ptr->num_bytes_in_magazine; // Total bytes allocated to this magazine
4410		size_t u = small_mag_ptr->mag_num_bytes_in_objects; // In use (malloc'd) from this magaqzine
4411
4412		if (a - u > ((3 * SMALL_REGION_PAYLOAD_BYTES) / 2) && u < DENSITY_THRESHOLD(a)) {
4413			return small_free_do_recirc_to_depot(szone, small_mag_ptr, mag_index);
4414		}
4415
4416	} else {
4417#endif
4418		// Freed to Depot. N.B. Lock on small_magazines[DEPOT_MAGAZINE_INDEX] is already held
4419		// Calcuate the first page in the coalesced block that would be safe to mark MADV_FREE
4420		size_t free_header_size = sizeof(free_list_t) + sizeof(msize_t);
4421		uintptr_t safe_ptr = (uintptr_t)ptr + free_header_size;
4422		uintptr_t round_safe = round_page_quanta(safe_ptr);
4423
4424		// Calcuate the last page in the coalesced block that would be safe to mark MADV_FREE
4425		size_t free_tail_size = sizeof(msize_t);
4426		uintptr_t safe_extent = (uintptr_t)ptr + SMALL_BYTES_FOR_MSIZE(msize) - free_tail_size;
4427		uintptr_t trunc_extent = trunc_page_quanta(safe_extent);
4428
4429		// The newly freed block may complete a span of bytes that cover one or more pages. Mark the span with MADV_FREE.
4430		if (round_safe < trunc_extent) { // Coalesced area covers a page (perhaps many)
4431			// Extend the freed block by the free region header and tail sizes to include pages
4432			// we may have coalesced that no longer host free region tails and headers.
4433			// This may extend over in-use ranges, but the MIN/MAX clamping below will fix that up.
4434			uintptr_t lo = trunc_page_quanta((uintptr_t)original_ptr - free_tail_size);
4435			uintptr_t hi = round_page_quanta((uintptr_t)original_ptr + original_size + free_header_size);
4436
4437			uintptr_t free_lo = MAX(round_safe, lo);
4438			uintptr_t free_hi = MIN(trunc_extent, hi);
4439
4440			if (free_lo < free_hi) {
4441				small_free_list_remove_ptr(szone, small_mag_ptr, ptr, msize);
4442				small_meta_header_set_in_use(meta_headers, index, msize);
4443
4444				OSAtomicIncrement32Barrier(&(node->pinned_to_depot));
4445				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
4446				madvise_free_range(szone, region, free_lo, free_hi, &szone->last_small_advise);
4447				SZONE_MAGAZINE_PTR_LOCK(szone, small_mag_ptr);
4448				OSAtomicDecrement32Barrier(&(node->pinned_to_depot));
4449
4450				small_meta_header_set_is_free(meta_headers, index, msize);
4451				small_free_list_add_ptr(szone, small_mag_ptr, ptr, msize);
4452			}
4453		}
4454
4455#if !TARGET_OS_EMBEDDED
4456		if (0 < bytes_used || 0 < node->pinned_to_depot) {
4457			/* Depot'd region is still live. Leave it in place on the Depot's recirculation list
4458			 so as to avoid thrashing between the Depot's free list and a magazines's free list
4459			 with detach_region/reattach_region */
4460		} else {
4461			/* Depot'd region is just now empty. Consider return to OS. */
4462			region_t r_dealloc = small_free_try_depot_unmap_no_lock(szone, small_mag_ptr, node);
4463			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
4464			if (r_dealloc)
4465				deallocate_pages(szone, r_dealloc, SMALL_REGION_SIZE, 0);
4466			return FALSE; // Caller need not unlock
4467		}
4468	}
4469#endif
4470
4471	return TRUE; // Caller must do SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr)
4472}
4473
4474// Allocates from the last region or a freshly allocated region
4475static void *
4476small_malloc_from_region_no_lock(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index,
4477								 msize_t msize, void *aligned_address)
4478{
4479	void	*ptr;
4480
4481	// Before anything we transform the mag_bytes_free_at_end or mag_bytes_free_at_start - if any - to a regular free block
4482	/* FIXME: last_block needs to be coalesced with previous entry if free, <rdar://5462322> */
4483	if (small_mag_ptr->mag_bytes_free_at_end || small_mag_ptr->mag_bytes_free_at_start)
4484		small_finalize_region(szone, small_mag_ptr);
4485
4486	// Here find the only place in smallville that (infrequently) takes the small_regions_lock.
4487	// Only one thread at a time should be permitted to assess the density of the hash
4488	// ring and adjust if needed.
4489	// Only one thread at a time should be permitted to insert its new region on
4490	// the hash ring.
4491	// It is safe for all other threads to read the hash ring (hashed_regions) and
4492	// the associated sizes (num_regions_allocated and num_small_regions).
4493
4494	_malloc_lock_lock(&szone->small_regions_lock);
4495	// Check to see if the hash ring of small regions needs to grow.  Try to
4496	// avoid the hash ring becoming too dense.
4497	if (szone->small_region_generation->num_regions_allocated < (2 * szone->num_small_regions)) {
4498		region_t *new_regions;
4499		size_t new_size;
4500		size_t new_shift = szone->small_region_generation->num_regions_allocated_shift; // In/Out parameter
4501		new_regions = hash_regions_grow_no_lock(szone, szone->small_region_generation->hashed_regions,
4502												szone->small_region_generation->num_regions_allocated,
4503												&new_shift,
4504												&new_size);
4505		// Do not deallocate the current hashed_regions allocation since someone
4506		// may be iterating it.  Instead, just leak it.
4507
4508		// Prepare to advance to the "next generation" of the hash ring.
4509		szone->small_region_generation->nextgen->hashed_regions = new_regions;
4510		szone->small_region_generation->nextgen->num_regions_allocated = new_size;
4511		szone->small_region_generation->nextgen->num_regions_allocated_shift = new_shift;
4512
4513		// Throw the switch to atomically advance to the next generation.
4514		szone->small_region_generation = szone->small_region_generation->nextgen;
4515		// Ensure everyone sees the advance.
4516		OSMemoryBarrier();
4517	}
4518	// Tag the region at "aligned_address" as belonging to us,
4519	// and so put it under the protection of the magazine lock we are holding.
4520	// Do this before advertising "aligned_address" on the hash ring(!)
4521	MAGAZINE_INDEX_FOR_SMALL_REGION(aligned_address) = mag_index;
4522
4523	// Insert the new region into the hash ring, and update malloc statistics
4524	hash_region_insert_no_lock(szone->small_region_generation->hashed_regions,
4525							   szone->small_region_generation->num_regions_allocated,
4526							   szone->small_region_generation->num_regions_allocated_shift,
4527							   aligned_address);
4528
4529	szone->num_small_regions++;
4530
4531	_malloc_lock_unlock(&szone->small_regions_lock);
4532
4533	small_mag_ptr->mag_last_region = aligned_address;
4534	BYTES_USED_FOR_SMALL_REGION(aligned_address) = SMALL_BYTES_FOR_MSIZE(msize);
4535#if ASLR_INTERNAL
4536	int offset_msize = malloc_entropy[1] & SMALL_ENTROPY_MASK;
4537#if DEBUG_MALLOC
4538	if (getenv("MallocASLRForce")) offset_msize = strtol(getenv("MallocASLRForce"), NULL, 0) & SMALL_ENTROPY_MASK;
4539	if (getenv("MallocASLRPrint")) malloc_printf("Region: %p offset: %d\n", aligned_address, offset_msize);
4540#endif
4541#else
4542	int offset_msize = 0;
4543#endif
4544	ptr = (void *)((uintptr_t) aligned_address + SMALL_BYTES_FOR_MSIZE(offset_msize));
4545	small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(ptr), offset_msize, msize);
4546	small_mag_ptr->mag_num_objects++;
4547	small_mag_ptr->mag_num_bytes_in_objects += SMALL_BYTES_FOR_MSIZE(msize);
4548	small_mag_ptr->num_bytes_in_magazine += SMALL_REGION_PAYLOAD_BYTES;
4549
4550	// add a big free block at the end
4551	small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(ptr), offset_msize + msize, NUM_SMALL_BLOCKS - msize - offset_msize);
4552	small_mag_ptr->mag_bytes_free_at_end = SMALL_BYTES_FOR_MSIZE(NUM_SMALL_BLOCKS - msize - offset_msize);
4553
4554#if ASLR_INTERNAL
4555	// add a big free block at the start
4556	small_mag_ptr->mag_bytes_free_at_start = SMALL_BYTES_FOR_MSIZE(offset_msize);
4557	if (offset_msize) {
4558		small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(ptr), 0, offset_msize);
4559	}
4560#else
4561	small_mag_ptr->mag_bytes_free_at_start = 0;
4562#endif
4563
4564	// connect to magazine as last node
4565	recirc_list_splice_last(szone, small_mag_ptr, REGION_TRAILER_FOR_SMALL_REGION(aligned_address));
4566
4567	return ptr;
4568}
4569
4570static INLINE void *
4571small_try_shrink_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_good_size)
4572{
4573	msize_t new_msize = SMALL_MSIZE_FOR_BYTES(new_good_size);
4574	msize_t mshrinkage = SMALL_MSIZE_FOR_BYTES(old_size) - new_msize;
4575
4576	if (mshrinkage) {
4577		void *q = (void *)((uintptr_t)ptr + SMALL_BYTES_FOR_MSIZE(new_msize));
4578		magazine_t *small_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->small_magazines,
4579																	 REGION_TRAILER_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr)),
4580																	 MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr)));
4581
4582		// Mark q as block header and in-use, thus creating two blocks.
4583		small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(ptr), SMALL_META_INDEX_FOR_PTR(ptr), new_msize);
4584		small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(q), SMALL_META_INDEX_FOR_PTR(q), mshrinkage);
4585		small_mag_ptr->mag_num_objects++;
4586
4587		SZONE_MAGAZINE_PTR_UNLOCK(szone,small_mag_ptr);
4588		szone_free(szone, q); // avoid inlining free_small(szone, q, ...);
4589	}
4590
4591	return ptr;
4592}
4593
4594static INLINE boolean_t
4595small_try_realloc_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size)
4596{
4597	// returns 1 on success
4598	msize_t	*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
4599	unsigned	index;
4600	msize_t	old_msize, new_msize;
4601	unsigned	next_index;
4602	void	*next_block;
4603	msize_t	next_msize_and_free;
4604	boolean_t	is_free;
4605	msize_t	next_msize, leftover_msize;
4606	void	*leftover;
4607
4608	index = SMALL_META_INDEX_FOR_PTR(ptr);
4609	old_msize = SMALL_MSIZE_FOR_BYTES(old_size);
4610	new_msize = SMALL_MSIZE_FOR_BYTES(new_size + SMALL_QUANTUM - 1);
4611	next_index = index + old_msize;
4612
4613	if (next_index >= NUM_SMALL_BLOCKS) {
4614		return 0;
4615	}
4616	next_block = (char *)ptr + old_size;
4617
4618#if DEBUG_MALLOC
4619	if ((uintptr_t)next_block & (SMALL_QUANTUM - 1)) {
4620		szone_error(szone, 1, "internal invariant broken in realloc(next_block)", next_block, NULL);
4621	}
4622	if (meta_headers[index] != old_msize)
4623		malloc_printf("*** small_try_realloc_in_place incorrect old %d %d\n",
4624					  meta_headers[index], old_msize);
4625#endif
4626
4627	magazine_t	*small_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->small_magazines,
4628																  REGION_TRAILER_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr)),
4629																  MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr)));
4630
4631	/*
4632	 * Look for a free block immediately afterwards.  If it's large enough, we can consume (part of)
4633	 * it.
4634	 */
4635	next_msize_and_free = meta_headers[next_index];
4636	is_free = next_msize_and_free & SMALL_IS_FREE;
4637	if (!is_free) {
4638		SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
4639		return 0; // next_block is in use;
4640	}
4641	next_msize = next_msize_and_free & ~ SMALL_IS_FREE;
4642	if (old_msize + next_msize < new_msize) {
4643		SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
4644		return 0; // even with next block, not enough
4645	}
4646	/*
4647	 * The following block is big enough; pull it from its freelist and chop off enough to satisfy
4648	 * our needs.
4649	 */
4650	small_free_list_remove_ptr(szone, small_mag_ptr, next_block, next_msize);
4651	small_meta_header_set_middle(meta_headers, next_index);
4652	leftover_msize = old_msize + next_msize - new_msize;
4653	if (leftover_msize) {
4654		/* there's some left, so put the remainder back */
4655		leftover = (unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(new_msize);
4656
4657		small_free_list_add_ptr(szone, small_mag_ptr, leftover, leftover_msize);
4658	}
4659#if DEBUG_MALLOC
4660	if (SMALL_BYTES_FOR_MSIZE(new_msize) > szone->large_threshold) {
4661		malloc_printf("*** realloc in place for %p exceeded msize=%d\n", new_msize);
4662	}
4663#endif
4664	small_meta_header_set_in_use(meta_headers, index, new_msize);
4665#if DEBUG_MALLOC
4666	if (LOG(szone,ptr)) {
4667		malloc_printf("in small_try_realloc_in_place(), ptr=%p, msize=%d\n", ptr, *SMALL_METADATA_FOR_PTR(ptr));
4668	}
4669#endif
4670	small_mag_ptr->mag_num_bytes_in_objects += SMALL_BYTES_FOR_MSIZE(new_msize - old_msize);
4671
4672	// Update this region's bytes in use count
4673	region_trailer_t *node = REGION_TRAILER_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr));
4674	size_t bytes_used = node->bytes_used + SMALL_BYTES_FOR_MSIZE(new_msize - old_msize);
4675	node->bytes_used = bytes_used;
4676
4677	// Emptiness discriminant
4678	if (bytes_used < DENSITY_THRESHOLD(SMALL_REGION_PAYLOAD_BYTES)) {
4679		/* After this reallocation the region is still sparse, so it must have been even more so before
4680		 the reallocation. That implies the region is already correctly marked. Do nothing. */
4681	} else {
4682		/* Region has crossed threshold from sparsity to density. Mark it not "suitable" on the
4683		 recirculation candidates list. */
4684		node->recirc_suitable = FALSE;
4685	}
4686
4687	SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
4688	CHECK(szone, __PRETTY_FUNCTION__);
4689	return 1;
4690}
4691
4692static boolean_t
4693small_check_region(szone_t *szone, region_t region)
4694{
4695	unsigned char	*ptr = SMALL_REGION_ADDRESS(region);
4696	msize_t		*meta_headers = SMALL_META_HEADER_FOR_PTR(ptr);
4697	unsigned char	*region_end = SMALL_REGION_END(region);
4698	msize_t		prev_free = 0;
4699	unsigned		index;
4700	msize_t		msize_and_free;
4701	msize_t		msize;
4702	free_list_t		*free_head;
4703	void		*previous, *next;
4704	msize_t		*follower;
4705	mag_index_t		mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr));
4706	magazine_t		*small_mag_ptr = &(szone->small_magazines[mag_index]);
4707
4708	// Assumes locked
4709	CHECK_MAGAZINE_PTR_LOCKED(szone, small_mag_ptr, __PRETTY_FUNCTION__);
4710
4711	if (region == small_mag_ptr->mag_last_region) {
4712		ptr += small_mag_ptr->mag_bytes_free_at_start;
4713		region_end -= small_mag_ptr->mag_bytes_free_at_end;
4714	}
4715
4716	while (ptr < region_end) {
4717		index = SMALL_META_INDEX_FOR_PTR(ptr);
4718		msize_and_free = meta_headers[index];
4719		if (!(msize_and_free & SMALL_IS_FREE)) {
4720			// block is in use
4721			msize = msize_and_free;
4722			if (!msize) {
4723				malloc_printf("*** invariant broken: null msize ptr=%p num_small_regions=%d end=%p\n",
4724							  ptr, szone->num_small_regions, region_end);
4725				return 0;
4726			}
4727#if !RELAXED_INVARIANT_CHECKS
4728			if (SMALL_BYTES_FOR_MSIZE(msize) > szone->large_threshold) {
4729				malloc_printf("*** invariant broken for %p this small msize=%d - size is too large\n",
4730							  ptr, msize_and_free);
4731				return 0;
4732			}
4733#endif // RELAXED_INVARIANT_CHECKS
4734			ptr += SMALL_BYTES_FOR_MSIZE(msize);
4735			prev_free = 0;
4736		} else {
4737			// free pointer
4738			msize = msize_and_free & ~ SMALL_IS_FREE;
4739			free_head = (free_list_t *)ptr;
4740			follower = (msize_t *)FOLLOWING_SMALL_PTR(ptr, msize);
4741			if (!msize) {
4742				malloc_printf("*** invariant broken for free block %p this msize=%d\n", ptr, msize);
4743				return 0;
4744			}
4745#if !RELAXED_INVARIANT_CHECKS
4746			if (prev_free) {
4747				malloc_printf("*** invariant broken for %p (2 free in a row)\n", ptr);
4748				return 0;
4749			}
4750#endif
4751			previous = free_list_unchecksum_ptr(szone, &free_head->previous);
4752			next = free_list_unchecksum_ptr(szone, &free_head->next);
4753			if (previous && !SMALL_PTR_IS_FREE(previous)) {
4754				malloc_printf("*** invariant broken for %p (previous %p is not a free pointer)\n",
4755							  ptr, free_head->previous);
4756				return 0;
4757			}
4758			if (next && !SMALL_PTR_IS_FREE(next)) {
4759				malloc_printf("*** invariant broken for %p (next is not a free pointer)\n", ptr);
4760				return 0;
4761			}
4762			if (SMALL_PREVIOUS_MSIZE(follower) != msize) {
4763				malloc_printf("*** invariant broken for small free %p followed by %p in region [%p-%p] "
4764							  "(end marker incorrect) should be %d; in fact %d\n",
4765							  ptr, follower, SMALL_REGION_ADDRESS(region), region_end, msize, SMALL_PREVIOUS_MSIZE(follower));
4766				return 0;
4767			}
4768			ptr = (unsigned char *)follower;
4769			prev_free = SMALL_IS_FREE;
4770		}
4771	}
4772	return 1;
4773}
4774
4775static kern_return_t
4776small_in_use_enumerator(task_t task, void *context, unsigned type_mask, szone_t *szone,
4777						memory_reader_t reader, vm_range_recorder_t recorder)
4778{
4779	size_t		num_regions;
4780	size_t		index;
4781	region_t		*regions;
4782	vm_range_t		buffer[MAX_RECORDER_BUFFER];
4783	unsigned		count = 0;
4784	kern_return_t	err;
4785	region_t		region;
4786	vm_range_t		range;
4787	vm_range_t		admin_range;
4788	vm_range_t		ptr_range;
4789	unsigned char	*mapped_region;
4790	msize_t		*block_header;
4791	unsigned		block_index;
4792	unsigned		block_limit;
4793	msize_t		msize_and_free;
4794	msize_t		msize;
4795	magazine_t          *small_mag_base = NULL;
4796
4797	region_hash_generation_t *srg_ptr;
4798	err = reader(task, (vm_address_t)szone->small_region_generation, sizeof(region_hash_generation_t), (void **)&srg_ptr);
4799	if (err) return err;
4800
4801	num_regions = srg_ptr->num_regions_allocated;
4802	err = reader(task, (vm_address_t)srg_ptr->hashed_regions, sizeof(region_t) * num_regions, (void **)&regions);
4803	if (err) return err;
4804
4805	if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
4806		// Map in all active magazines. Do this outside the iteration over regions.
4807		err = reader(task, (vm_address_t)(szone->small_magazines),
4808					 szone->num_small_magazines*sizeof(magazine_t),(void **)&small_mag_base);
4809		if (err) return err;
4810	}
4811
4812	for (index = 0; index < num_regions; ++index) {
4813		region = regions[index];
4814		if (HASHRING_OPEN_ENTRY != region && HASHRING_REGION_DEALLOCATED != region) {
4815			range.address = (vm_address_t)SMALL_REGION_ADDRESS(region);
4816			range.size = SMALL_REGION_SIZE;
4817			if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) {
4818				admin_range.address = range.address + SMALL_METADATA_START;
4819				admin_range.size = SMALL_METADATA_SIZE;
4820				recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &admin_range, 1);
4821			}
4822			if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) {
4823				ptr_range.address = range.address;
4824				ptr_range.size = NUM_SMALL_BLOCKS * SMALL_QUANTUM;
4825				recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1);
4826			}
4827			if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
4828				void 		*mag_last_free;
4829				vm_address_t	mag_last_free_ptr = 0;
4830				msize_t		mag_last_free_msize = 0;
4831
4832				err = reader(task, range.address, range.size, (void **)&mapped_region);
4833				if (err)
4834					return err;
4835
4836				mag_index_t mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(mapped_region);
4837				magazine_t *small_mag_ptr = small_mag_base + mag_index;
4838
4839				if (DEPOT_MAGAZINE_INDEX != mag_index) {
4840					mag_last_free = small_mag_ptr->mag_last_free;
4841					if (mag_last_free) {
4842						mag_last_free_ptr = (uintptr_t) mag_last_free & ~(SMALL_QUANTUM - 1);
4843						mag_last_free_msize = (uintptr_t) mag_last_free & (SMALL_QUANTUM - 1);
4844					}
4845				} else {
4846					for (mag_index = 0; mag_index < szone->num_small_magazines; mag_index++) {
4847						if ((void *)range.address == (small_mag_base + mag_index)->mag_last_free_rgn) {
4848							mag_last_free = (small_mag_base + mag_index)->mag_last_free;
4849							if (mag_last_free) {
4850								mag_last_free_ptr = (uintptr_t) mag_last_free & ~(SMALL_QUANTUM - 1);
4851								mag_last_free_msize = (uintptr_t) mag_last_free & (SMALL_QUANTUM - 1);
4852							}
4853						}
4854					}
4855				}
4856
4857				block_header = (msize_t *)(mapped_region + SMALL_METADATA_START + sizeof(region_trailer_t));
4858				block_index = 0;
4859				block_limit = NUM_SMALL_BLOCKS;
4860				if (region == small_mag_ptr->mag_last_region) {
4861					block_index += SMALL_MSIZE_FOR_BYTES(small_mag_ptr->mag_bytes_free_at_start);
4862					block_limit -= SMALL_MSIZE_FOR_BYTES(small_mag_ptr->mag_bytes_free_at_end);
4863				}
4864				while (block_index < block_limit) {
4865					msize_and_free = block_header[block_index];
4866					msize = msize_and_free & ~ SMALL_IS_FREE;
4867					if (! (msize_and_free & SMALL_IS_FREE) &&
4868						range.address + SMALL_BYTES_FOR_MSIZE(block_index) != mag_last_free_ptr) {
4869						// Block in use
4870						buffer[count].address = range.address + SMALL_BYTES_FOR_MSIZE(block_index);
4871						buffer[count].size = SMALL_BYTES_FOR_MSIZE(msize);
4872						count++;
4873						if (count >= MAX_RECORDER_BUFFER) {
4874							recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
4875							count = 0;
4876						}
4877					}
4878
4879					if (!msize)
4880						return KERN_FAILURE; // Somethings amiss. Avoid looping at this block_index.
4881
4882					block_index += msize;
4883				}
4884				if (count) {
4885					recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
4886					count = 0;
4887				}
4888			}
4889		}
4890	}
4891	return 0;
4892}
4893
4894static void *
4895small_malloc_from_free_list(szone_t *szone, magazine_t *small_mag_ptr, mag_index_t mag_index, msize_t msize)
4896{
4897	free_list_t		*ptr;
4898	msize_t		this_msize;
4899	grain_t		slot = (msize <= szone->num_small_slots) ? msize - 1 : szone->num_small_slots - 1;
4900	free_list_t		**free_list = small_mag_ptr->mag_free_list;
4901	free_list_t		**the_slot = free_list + slot;
4902	free_list_t		*next;
4903	free_list_t		**limit;
4904	unsigned		bitmap;
4905	msize_t		leftover_msize;
4906	free_list_t		*leftover_ptr;
4907
4908	// Assumes we've locked the region
4909	CHECK_MAGAZINE_PTR_LOCKED(szone, small_mag_ptr, __PRETTY_FUNCTION__);
4910
4911	// Look for an exact match by checking the freelist for this msize.
4912	//
4913	ptr = *the_slot;
4914	if (ptr) {
4915		next = free_list_unchecksum_ptr(szone, &ptr->next);
4916		if (next) {
4917			next->previous = ptr->previous;
4918		} else {
4919			BITMAPN_CLR(small_mag_ptr->mag_bitmap, slot);
4920		}
4921		*the_slot = next;
4922		this_msize = msize;
4923		goto return_small_alloc;
4924	}
4925
4926	// Mask off the bits representing slots holding free blocks smaller than
4927	// the size we need.  If there are no larger free blocks, try allocating
4928	// from the free space at the end of the small region.
4929	if (szone->is_largemem) {
4930		// BITMAPN_CTZ implementation
4931		unsigned idx = slot >> 5;
4932		bitmap = 0;
4933		unsigned mask = ~ ((1 << (slot & 31)) - 1);
4934		for ( ; idx < SMALL_BITMAP_WORDS; ++idx ) {
4935			bitmap = small_mag_ptr->mag_bitmap[idx] & mask;
4936			if (bitmap != 0)
4937				break;
4938			mask = ~0U;
4939		}
4940		// Check for fallthrough: No bits set in bitmap
4941		if ((bitmap == 0) && (idx == SMALL_BITMAP_WORDS))
4942			goto try_small_from_end;
4943
4944		// Start looking at the first set bit, plus 32 bits for every word of
4945		// zeroes or entries that were too small.
4946		slot = BITMAP32_CTZ((&bitmap)) + (idx * 32);
4947	} else {
4948		bitmap = small_mag_ptr->mag_bitmap[0] & ~ ((1 << slot) - 1);
4949		if (!bitmap)
4950			goto try_small_from_end;
4951
4952		slot = BITMAP32_CTZ((&bitmap));
4953	}
4954	// FIXME: Explain use of - 1 here, last slot has special meaning
4955	limit = free_list + szone->num_small_slots - 1;
4956	free_list += slot;
4957
4958	if (free_list < limit) {
4959		ptr = *free_list;
4960		if (ptr) {
4961
4962			next = free_list_unchecksum_ptr(szone, &ptr->next);
4963			*free_list = next;
4964			if (next) {
4965				next->previous = ptr->previous;
4966			} else {
4967				BITMAPN_CLR(small_mag_ptr->mag_bitmap, slot);
4968			}
4969			this_msize = SMALL_PTR_SIZE(ptr);
4970			goto add_leftover_and_proceed;
4971		}
4972#if DEBUG_MALLOC
4973		malloc_printf("in small_malloc_from_free_list(), mag_bitmap out of sync, slot=%d\n",slot);
4974#endif
4975	}
4976
4977	// We are now looking at the last slot, which contains blocks equal to, or
4978	// due to coalescing of free blocks, larger than (num_small_slots - 1) * (small quantum size).
4979	// If the last freelist is not empty, and the head contains a block that is
4980	// larger than our request, then the remainder is put back on the free list.
4981	//
4982	ptr = *limit;
4983	if (ptr) {
4984		this_msize = SMALL_PTR_SIZE(ptr);
4985		next = free_list_unchecksum_ptr(szone, &ptr->next);
4986		if (this_msize - msize >= szone->num_small_slots) {
4987			// the leftover will go back to the free list, so we optimize by
4988			// modifying the free list rather than a pop and push of the head
4989			leftover_msize = this_msize - msize;
4990			leftover_ptr = (free_list_t *)((unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(msize));
4991			*limit = leftover_ptr;
4992			if (next) {
4993				next->previous.u = free_list_checksum_ptr(szone, leftover_ptr);
4994			}
4995			leftover_ptr->previous = ptr->previous;
4996			leftover_ptr->next = ptr->next;
4997			small_meta_header_set_is_free(SMALL_META_HEADER_FOR_PTR(leftover_ptr),
4998										  SMALL_META_INDEX_FOR_PTR(leftover_ptr), leftover_msize);
4999			// Store msize at the end of the block denoted by "leftover_ptr" (i.e. at a negative offset from follower)
5000			SMALL_PREVIOUS_MSIZE(FOLLOWING_SMALL_PTR(leftover_ptr, leftover_msize)) = leftover_msize; // Access is safe
5001#if DEBUG_MALLOC
5002			if (LOG(szone,ptr)) {
5003				malloc_printf("in small_malloc_from_free_list(), last slot ptr=%p, msize=%d this_msize=%d\n", ptr, msize, this_msize);
5004			}
5005#endif
5006			this_msize = msize;
5007			goto return_small_alloc;
5008		}
5009		if (next) {
5010			next->previous = ptr->previous;
5011		}
5012		*limit = next;
5013		goto add_leftover_and_proceed;
5014	}
5015
5016try_small_from_end:
5017	// Let's see if we can use small_mag_ptr->mag_bytes_free_at_end
5018	if (small_mag_ptr->mag_bytes_free_at_end >= SMALL_BYTES_FOR_MSIZE(msize)) {
5019		ptr = (free_list_t *)(SMALL_REGION_END(small_mag_ptr->mag_last_region) -
5020							  small_mag_ptr->mag_bytes_free_at_end);
5021		small_mag_ptr->mag_bytes_free_at_end -= SMALL_BYTES_FOR_MSIZE(msize);
5022		if (small_mag_ptr->mag_bytes_free_at_end) {
5023			// let's mark this block as in use to serve as boundary
5024			small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(ptr),
5025										 SMALL_META_INDEX_FOR_PTR((unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(msize)),
5026										 SMALL_MSIZE_FOR_BYTES(small_mag_ptr->mag_bytes_free_at_end));
5027		}
5028		this_msize = msize;
5029		goto return_small_alloc;
5030	}
5031#if ASLR_INTERNAL
5032	// Try from start if nothing left at end
5033	if (small_mag_ptr->mag_bytes_free_at_start >= SMALL_BYTES_FOR_MSIZE(msize)) {
5034		ptr = (free_list_t *)(SMALL_REGION_ADDRESS(small_mag_ptr->mag_last_region) +
5035							  small_mag_ptr->mag_bytes_free_at_start - SMALL_BYTES_FOR_MSIZE(msize));
5036		small_mag_ptr->mag_bytes_free_at_start -= SMALL_BYTES_FOR_MSIZE(msize);
5037		if (small_mag_ptr->mag_bytes_free_at_start) {
5038			// let's mark this block as in use to serve as boundary
5039			small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(ptr), 0, SMALL_MSIZE_FOR_BYTES(small_mag_ptr->mag_bytes_free_at_start));
5040		}
5041		this_msize = msize;
5042		goto return_small_alloc;
5043	}
5044#endif
5045	return NULL;
5046
5047add_leftover_and_proceed:
5048	if (this_msize > msize) {
5049		leftover_msize = this_msize - msize;
5050		leftover_ptr = (free_list_t *)((unsigned char *)ptr + SMALL_BYTES_FOR_MSIZE(msize));
5051#if DEBUG_MALLOC
5052		if (LOG(szone,ptr)) {
5053			malloc_printf("in small_malloc_from_free_list(), adding leftover ptr=%p, this_msize=%d\n", ptr, this_msize);
5054		}
5055#endif
5056		small_free_list_add_ptr(szone, small_mag_ptr, leftover_ptr, leftover_msize);
5057		this_msize = msize;
5058	}
5059
5060return_small_alloc:
5061	small_mag_ptr->mag_num_objects++;
5062	small_mag_ptr->mag_num_bytes_in_objects += SMALL_BYTES_FOR_MSIZE(this_msize);
5063
5064	// Update this region's bytes in use count
5065	region_trailer_t *node = REGION_TRAILER_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr));
5066	size_t bytes_used = node->bytes_used + SMALL_BYTES_FOR_MSIZE(this_msize);
5067	node->bytes_used = bytes_used;
5068
5069	// Emptiness discriminant
5070	if (bytes_used < DENSITY_THRESHOLD(SMALL_REGION_PAYLOAD_BYTES)) {
5071		/* After this allocation the region is still sparse, so it must have been even more so before
5072		 the allocation. That implies the region is already correctly marked. Do nothing. */
5073	} else {
5074		/* Region has crossed threshold from sparsity to density. Mark in not "suitable" on the
5075		 recirculation candidates list. */
5076		node->recirc_suitable = FALSE;
5077	}
5078#if DEBUG_MALLOC
5079	if (LOG(szone,ptr)) {
5080		malloc_printf("in small_malloc_from_free_list(), ptr=%p, this_msize=%d, msize=%d\n", ptr, this_msize, msize);
5081	}
5082#endif
5083	small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(ptr), SMALL_META_INDEX_FOR_PTR(ptr), this_msize);
5084	return ptr;
5085}
5086#undef DENSITY_THRESHOLD
5087#undef K
5088
5089static INLINE void *
5090small_malloc_should_clear(szone_t *szone, msize_t msize, boolean_t cleared_requested)
5091{
5092	void	*ptr;
5093	mag_index_t	mag_index = mag_get_thread_index(szone);
5094	magazine_t	*small_mag_ptr = &(szone->small_magazines[mag_index]);
5095
5096	SZONE_MAGAZINE_PTR_LOCK(szone, small_mag_ptr);
5097
5098#if SMALL_CACHE
5099	ptr = (void *)small_mag_ptr->mag_last_free;
5100
5101	if ((((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) == msize) {
5102		// we have a winner
5103		small_mag_ptr->mag_last_free = NULL;
5104		small_mag_ptr->mag_last_free_rgn = NULL;
5105		SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5106		CHECK(szone, __PRETTY_FUNCTION__);
5107		ptr = (void *)((uintptr_t)ptr & ~ (SMALL_QUANTUM - 1));
5108		if (cleared_requested) {
5109			memset(ptr, 0, SMALL_BYTES_FOR_MSIZE(msize));
5110		}
5111		return ptr;
5112	}
5113#endif /* SMALL_CACHE */
5114
5115	while(1) {
5116		ptr = small_malloc_from_free_list(szone, small_mag_ptr, mag_index, msize);
5117		if (ptr) {
5118			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5119			CHECK(szone, __PRETTY_FUNCTION__);
5120			if (cleared_requested) {
5121				memset(ptr, 0, SMALL_BYTES_FOR_MSIZE(msize));
5122			}
5123			return ptr;
5124		}
5125
5126		if (small_get_region_from_depot(szone, small_mag_ptr, mag_index, msize)) {
5127			ptr = small_malloc_from_free_list(szone, small_mag_ptr, mag_index, msize);
5128			if (ptr) {
5129				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5130				CHECK(szone, __PRETTY_FUNCTION__);
5131				if (cleared_requested) {
5132					memset(ptr, 0, SMALL_BYTES_FOR_MSIZE(msize));
5133				}
5134				return ptr;
5135			}
5136		}
5137
5138		// The magazine is exhausted. A new region (heap) must be allocated to satisfy this call to malloc().
5139		// The allocation, an mmap() system call, will be performed outside the magazine spin locks by the first
5140		// thread that suffers the exhaustion. That thread sets "alloc_underway" and enters a critical section.
5141		// Threads arriving here later are excluded from the critical section, yield the CPU, and then retry the
5142		// allocation. After some time the magazine is resupplied, the original thread leaves with its allocation,
5143		// and retry-ing threads succeed in the code just above.
5144		if (!small_mag_ptr->alloc_underway) {
5145			void *fresh_region;
5146
5147			// time to create a new region (do this outside the magazine lock)
5148			small_mag_ptr->alloc_underway = TRUE;
5149			OSMemoryBarrier();
5150			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5151			fresh_region = allocate_pages_securely(szone, SMALL_REGION_SIZE, SMALL_BLOCKS_ALIGN, VM_MEMORY_MALLOC_SMALL);
5152			SZONE_MAGAZINE_PTR_LOCK(szone, small_mag_ptr);
5153
5154			MAGMALLOC_ALLOCREGION((void *)szone, (int)mag_index, fresh_region, SMALL_REGION_SIZE); // DTrace USDT Probe
5155
5156			if (!fresh_region) { // out of memory!
5157				small_mag_ptr->alloc_underway = FALSE;
5158				OSMemoryBarrier();
5159				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5160				return NULL;
5161			}
5162
5163			ptr = small_malloc_from_region_no_lock(szone, small_mag_ptr, mag_index, msize, fresh_region);
5164
5165			// we don't clear because this freshly allocated space is pristine
5166			small_mag_ptr->alloc_underway = FALSE;
5167			OSMemoryBarrier();
5168			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5169			CHECK(szone, __PRETTY_FUNCTION__);
5170			return ptr;
5171		} else {
5172			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5173			yield();
5174			SZONE_MAGAZINE_PTR_LOCK(szone, small_mag_ptr);
5175		}
5176	}
5177	/* NOTREACHED */
5178}
5179
5180static NOINLINE void
5181free_small_botch(szone_t *szone, free_list_t *ptr)
5182{
5183	mag_index_t	mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr));
5184	magazine_t	*small_mag_ptr = &(szone->small_magazines[mag_index]);
5185	SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5186	szone_error(szone, 1, "double free", ptr, NULL);
5187}
5188
5189static INLINE void
5190free_small(szone_t *szone, void *ptr, region_t small_region, size_t known_size)
5191{
5192	msize_t	msize;
5193	mag_index_t	mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr));
5194	magazine_t	*small_mag_ptr = &(szone->small_magazines[mag_index]);
5195
5196	// ptr is known to be in small_region
5197	if (known_size) {
5198		msize = SMALL_MSIZE_FOR_BYTES(known_size + SMALL_QUANTUM - 1);
5199	} else {
5200		msize = SMALL_PTR_SIZE(ptr);
5201		if (SMALL_PTR_IS_FREE(ptr)) {
5202			free_small_botch(szone, ptr);
5203			return;
5204		}
5205	}
5206
5207	SZONE_MAGAZINE_PTR_LOCK(szone, small_mag_ptr);
5208
5209#if SMALL_CACHE
5210	// Depot does not participate in SMALL_CACHE since it can't be directly malloc()'d
5211	if (DEPOT_MAGAZINE_INDEX != mag_index) {
5212
5213		void *ptr2 = small_mag_ptr->mag_last_free; // Might be NULL
5214		region_t rgn2 = small_mag_ptr->mag_last_free_rgn;
5215
5216		/* check that we don't already have this pointer in the cache */
5217		if (ptr == (void *)((uintptr_t)ptr2 & ~ (SMALL_QUANTUM - 1))) {
5218			free_small_botch(szone, ptr);
5219			return;
5220		}
5221
5222		if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && msize)
5223			memset(ptr, SCRABBLE_BYTE, SMALL_BYTES_FOR_MSIZE(msize));
5224
5225		small_mag_ptr->mag_last_free = (void *)(((uintptr_t)ptr) | msize);
5226		small_mag_ptr->mag_last_free_rgn = small_region;
5227
5228		if (!ptr2) {
5229			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5230			CHECK(szone, __PRETTY_FUNCTION__);
5231			return;
5232		}
5233
5234		msize = (uintptr_t)ptr2 & (SMALL_QUANTUM - 1);
5235		ptr = (void *)(((uintptr_t)ptr2) & ~(SMALL_QUANTUM - 1));
5236		small_region = rgn2;
5237	}
5238#endif /* SMALL_CACHE */
5239
5240	// Now in the time it took to acquire the lock, the region may have migrated
5241	// from one magazine to another. I.e. trailer->mag_index is volatile.
5242	// In which case the magazine lock we obtained (namely magazines[mag_index].mag_lock)
5243	// is stale. If so, keep on tryin' ...
5244	region_trailer_t *trailer = REGION_TRAILER_FOR_SMALL_REGION(small_region);
5245	mag_index_t refreshed_index;
5246
5247	while (mag_index != (refreshed_index = trailer->mag_index)) { // Note assignment
5248
5249		SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5250
5251		mag_index = refreshed_index;
5252		small_mag_ptr = &(szone->small_magazines[mag_index]);
5253		SZONE_MAGAZINE_PTR_LOCK(szone, small_mag_ptr);
5254	}
5255
5256	if (small_free_no_lock(szone, small_mag_ptr, mag_index, small_region, ptr, msize))
5257		SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5258
5259	CHECK(szone, __PRETTY_FUNCTION__);
5260}
5261
5262static void
5263print_small_free_list(szone_t *szone)
5264{
5265	free_list_t		*ptr;
5266	_SIMPLE_STRING	b = _simple_salloc();
5267	mag_index_t mag_index;
5268
5269	if (b) {
5270		_simple_sappend(b, "small free sizes:\n");
5271		for (mag_index = -1; mag_index < szone->num_small_magazines; mag_index++) {
5272			grain_t	slot = 0;
5273			_simple_sprintf(b,"\tMagazine %d: ", mag_index);
5274			while (slot < szone->num_small_slots) {
5275				ptr = szone->small_magazines[mag_index].mag_free_list[slot];
5276				if (ptr) {
5277					_simple_sprintf(b, "%s%y[%d]; ", (slot == szone->num_small_slots-1) ? ">=" : "",
5278									(slot + 1) * SMALL_QUANTUM, free_list_count(szone, ptr));
5279				}
5280				slot++;
5281			}
5282			_simple_sappend(b,"\n");
5283		}
5284		_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
5285		_simple_sfree(b);
5286	}
5287}
5288
5289static void
5290print_small_region(szone_t *szone, boolean_t verbose, region_t region, size_t bytes_at_start, size_t bytes_at_end)
5291{
5292	unsigned		counts[1024];
5293	unsigned		in_use = 0;
5294	uintptr_t		start = (uintptr_t)SMALL_REGION_ADDRESS(region);
5295	uintptr_t		current = start + bytes_at_start;
5296	uintptr_t		limit = (uintptr_t)SMALL_REGION_END(region) - bytes_at_end;
5297	msize_t		msize_and_free;
5298	msize_t		msize;
5299	unsigned		ci;
5300	_SIMPLE_STRING	b;
5301	uintptr_t		pgTot = 0;
5302
5303	if (region == HASHRING_REGION_DEALLOCATED) {
5304		if ((b = _simple_salloc()) != NULL) {
5305			_simple_sprintf(b, "Small region [unknown address] was returned to the OS\n");
5306			_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
5307			_simple_sfree(b);
5308		}
5309		return;
5310	}
5311
5312	memset(counts, 0, sizeof(counts));
5313	while (current < limit) {
5314		msize_and_free = *SMALL_METADATA_FOR_PTR(current);
5315		msize = msize_and_free & ~ SMALL_IS_FREE;
5316		if (!msize) {
5317			malloc_printf("*** error with %p: msize=%d\n", (void *)current, (unsigned)msize);
5318			break;
5319		}
5320		if (!(msize_and_free & SMALL_IS_FREE)) {
5321			// block in use
5322			if (msize < 1024)
5323				counts[msize]++;
5324			in_use++;
5325		} else {
5326			uintptr_t pgLo = round_page_quanta(current + sizeof(free_list_t) + sizeof(msize_t));
5327			uintptr_t pgHi = trunc_page_quanta(current + SMALL_BYTES_FOR_MSIZE(msize) - sizeof(msize_t));
5328
5329			if (pgLo < pgHi) {
5330				pgTot += (pgHi - pgLo);
5331			}
5332		}
5333		current += SMALL_BYTES_FOR_MSIZE(msize);
5334	}
5335	if ((b = _simple_salloc()) != NULL) {
5336		_simple_sprintf(b, "Small region [%p-%p, %y] \t", (void *)start, SMALL_REGION_END(region), (int)SMALL_REGION_SIZE);
5337		_simple_sprintf(b, "Magazine=%d \t", MAGAZINE_INDEX_FOR_SMALL_REGION(region));
5338		_simple_sprintf(b, "Allocations in use=%d \t Bytes in use=%ly \t", in_use, BYTES_USED_FOR_SMALL_REGION(region));
5339		if (bytes_at_end || bytes_at_start)
5340			_simple_sprintf(b, "Untouched=%ly ", bytes_at_end + bytes_at_start);
5341		if (DEPOT_MAGAZINE_INDEX == MAGAZINE_INDEX_FOR_SMALL_REGION(region)) {
5342			_simple_sprintf(b, "Advised MADV_FREE=%ly", pgTot);
5343		} else {
5344			_simple_sprintf(b, "Fragments subject to reclamation=%ly", pgTot);
5345		}
5346		if (verbose && in_use) {
5347			_simple_sappend(b, "\n\tSizes in use: ");
5348			for (ci = 0; ci < 1024; ci++)
5349				if (counts[ci])
5350					_simple_sprintf(b, "%d[%d] ", SMALL_BYTES_FOR_MSIZE(ci), counts[ci]);
5351		}
5352		_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
5353		_simple_sfree(b);
5354	}
5355}
5356
5357static boolean_t
5358small_free_list_check(szone_t *szone, grain_t slot)
5359{
5360	mag_index_t mag_index;
5361
5362	for (mag_index = -1; mag_index < szone->num_small_magazines; mag_index++) {
5363		magazine_t	*small_mag_ptr = &(szone->small_magazines[mag_index]);
5364		SZONE_MAGAZINE_PTR_LOCK(szone, small_mag_ptr);
5365
5366		unsigned	count = 0;
5367		free_list_t	*ptr = szone->small_magazines[mag_index].mag_free_list[slot];
5368		msize_t		msize_and_free;
5369		free_list_t	*previous = NULL;
5370
5371		while (ptr) {
5372			msize_and_free = *SMALL_METADATA_FOR_PTR(ptr);
5373			if (!(msize_and_free & SMALL_IS_FREE)) {
5374				malloc_printf("*** in-use ptr in free list slot=%d count=%d ptr=%p\n", slot, count, ptr);
5375				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5376				return 0;
5377			}
5378			if (((uintptr_t)ptr) & (SMALL_QUANTUM - 1)) {
5379				malloc_printf("*** unaligned ptr in free list slot=%d  count=%d ptr=%p\n", slot, count, ptr);
5380				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5381				return 0;
5382			}
5383			if (!small_region_for_ptr_no_lock(szone, ptr)) {
5384				malloc_printf("*** ptr not in szone slot=%d  count=%d ptr=%p\n", slot, count, ptr);
5385				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5386				return 0;
5387			}
5388			if (free_list_unchecksum_ptr(szone, &ptr->previous) != previous) {
5389				malloc_printf("*** previous incorrectly set slot=%d  count=%d ptr=%p\n", slot, count, ptr);
5390				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5391				return 0;
5392			}
5393			previous = ptr;
5394			ptr = free_list_unchecksum_ptr(szone, &ptr->next);
5395			count++;
5396		}
5397
5398		SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
5399	}
5400	return 1;
5401}
5402
5403/*******************************************************************************
5404 * Large allocator implementation
5405 ******************************************************************************/
5406#pragma mark large allocator
5407
5408#if DEBUG_MALLOC
5409
5410static void
5411large_debug_print(szone_t *szone)
5412{
5413	unsigned		index;
5414	large_entry_t	*range;
5415	_SIMPLE_STRING	b = _simple_salloc();
5416
5417	if (b) {
5418		for (index = 0, range = szone->large_entries; index < szone->num_large_entries; index++, range++)
5419			if (range->address)
5420				_simple_sprintf(b, "%d: %p(%y);  ", index, range->address, range->size);
5421
5422		_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, "%s\n", _simple_string(b));
5423		_simple_sfree(b);
5424	}
5425}
5426#endif
5427
5428/*
5429 * Scan the hash ring looking for an entry for the given pointer.
5430 */
5431static large_entry_t *
5432large_entry_for_pointer_no_lock(szone_t *szone, const void *ptr)
5433{
5434	// result only valid with lock held
5435	unsigned		num_large_entries = szone->num_large_entries;
5436	unsigned		hash_index;
5437	unsigned		index;
5438	large_entry_t	*range;
5439
5440	if (!num_large_entries)
5441		return NULL;
5442
5443	hash_index = ((uintptr_t)ptr >> vm_page_quanta_shift) % num_large_entries;
5444	index = hash_index;
5445
5446	do {
5447		range = szone->large_entries + index;
5448		if (range->address == (vm_address_t)ptr)
5449			return range;
5450		if (0 == range->address)
5451			return NULL; // end of chain
5452		index++;
5453		if (index == num_large_entries)
5454			index = 0;
5455	} while (index != hash_index);
5456
5457	return NULL;
5458}
5459
5460static void
5461large_entry_insert_no_lock(szone_t *szone, large_entry_t range)
5462{
5463	unsigned		num_large_entries = szone->num_large_entries;
5464	unsigned		hash_index = (((uintptr_t)(range.address)) >> vm_page_quanta_shift) % num_large_entries;
5465	unsigned		index = hash_index;
5466	large_entry_t	*entry;
5467
5468	// assert(szone->num_large_objects_in_use < szone->num_large_entries); /* must be called with room to spare */
5469
5470	do {
5471		entry = szone->large_entries + index;
5472		if (0 == entry->address) {
5473			*entry = range;
5474			return; // end of chain
5475		}
5476		index++;
5477		if (index == num_large_entries)
5478			index = 0;
5479	} while (index != hash_index);
5480
5481	// assert(0); /* must not fallthrough! */
5482}
5483
5484// FIXME: can't we simply swap the (now empty) entry with the last entry on the collision chain for this hash slot?
5485static INLINE void
5486large_entries_rehash_after_entry_no_lock(szone_t *szone, large_entry_t *entry)
5487{
5488	unsigned		num_large_entries = szone->num_large_entries;
5489	unsigned		hash_index = entry - szone->large_entries;
5490	unsigned		index = hash_index;
5491	large_entry_t	range;
5492
5493	// assert(entry->address == 0) /* caller must have cleared *entry */
5494
5495	do {
5496		index++;
5497		if (index == num_large_entries)
5498			index = 0;
5499		range = szone->large_entries[index];
5500		if (0 == range.address)
5501			return;
5502		szone->large_entries[index].address = (vm_address_t)0;
5503		szone->large_entries[index].size = 0;
5504		szone->large_entries[index].did_madvise_reusable = FALSE;
5505		large_entry_insert_no_lock(szone, range); // this will reinsert in the
5506		// proper place
5507	} while (index != hash_index);
5508
5509	// assert(0); /* since entry->address == 0, must not fallthrough! */
5510}
5511
5512// FIXME: num should probably be a size_t, since you can theoretically allocate
5513// more than 2^32-1 large_threshold objects in 64 bit.
5514static INLINE large_entry_t *
5515large_entries_alloc_no_lock(szone_t *szone, unsigned num)
5516{
5517	size_t	size = num * sizeof(large_entry_t);
5518
5519	// Note that we allocate memory (via a system call) under a spin lock
5520	// That is certainly evil, however it's very rare in the lifetime of a process
5521	// The alternative would slow down the normal case
5522	return allocate_pages(szone, round_page_quanta(size), 0, 0, VM_MEMORY_MALLOC_LARGE);
5523}
5524
5525static void
5526large_entries_free_no_lock(szone_t *szone, large_entry_t *entries, unsigned num, vm_range_t *range_to_deallocate)
5527{
5528	size_t	size = num * sizeof(large_entry_t);
5529
5530	range_to_deallocate->address = (vm_address_t)entries;
5531	range_to_deallocate->size = round_page_quanta(size);
5532}
5533
5534static large_entry_t *
5535large_entries_grow_no_lock(szone_t *szone, vm_range_t *range_to_deallocate)
5536{
5537	// sets range_to_deallocate
5538	unsigned		old_num_entries = szone->num_large_entries;
5539	large_entry_t	*old_entries = szone->large_entries;
5540	// always an odd number for good hashing
5541	unsigned		new_num_entries = (old_num_entries) ? old_num_entries * 2 + 1 :
5542	((vm_page_quanta_size / sizeof(large_entry_t)) - 1);
5543	large_entry_t	*new_entries = large_entries_alloc_no_lock(szone, new_num_entries);
5544	unsigned		index = old_num_entries;
5545	large_entry_t	oldRange;
5546
5547	// if the allocation of new entries failed, bail
5548	if (new_entries == NULL)
5549		return NULL;
5550
5551	szone->num_large_entries = new_num_entries;
5552	szone->large_entries = new_entries;
5553
5554	/* rehash entries into the new list */
5555	while (index--) {
5556		oldRange = old_entries[index];
5557		if (oldRange.address) {
5558			large_entry_insert_no_lock(szone, oldRange);
5559		}
5560	}
5561
5562	if (old_entries) {
5563		large_entries_free_no_lock(szone, old_entries, old_num_entries, range_to_deallocate);
5564	} else {
5565		range_to_deallocate->address = (vm_address_t)0;
5566		range_to_deallocate->size = 0;
5567	}
5568
5569	return new_entries;
5570}
5571
5572// frees the specific entry in the size table
5573// returns a range to truly deallocate
5574static vm_range_t
5575large_entry_free_no_lock(szone_t *szone, large_entry_t *entry)
5576{
5577	vm_range_t		range;
5578
5579	range.address = entry->address;
5580	range.size = entry->size;
5581
5582	if (szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) {
5583		protect((void *)range.address, range.size, PROT_READ | PROT_WRITE, szone->debug_flags);
5584		range.address -= vm_page_quanta_size;
5585		range.size += 2 * vm_page_quanta_size;
5586	}
5587
5588	entry->address = 0;
5589	entry->size = 0;
5590	entry->did_madvise_reusable = FALSE;
5591	large_entries_rehash_after_entry_no_lock(szone, entry);
5592
5593#if DEBUG_MALLOC
5594	if (large_entry_for_pointer_no_lock(szone, (void *)range.address)) {
5595		malloc_printf("*** freed entry %p still in use; num_large_entries=%d\n",
5596					  range.address, szone->num_large_entries);
5597		large_debug_print(szone);
5598		szone_sleep();
5599	}
5600#endif
5601	return range;
5602}
5603
5604static NOINLINE kern_return_t
5605large_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t large_entries_address,
5606						unsigned num_entries, memory_reader_t reader, vm_range_recorder_t recorder)
5607{
5608	unsigned		index = 0;
5609	vm_range_t		buffer[MAX_RECORDER_BUFFER];
5610	unsigned		count = 0;
5611	large_entry_t	*entries;
5612	kern_return_t	err;
5613	vm_range_t		range;
5614	large_entry_t	entry;
5615
5616	err = reader(task, large_entries_address, sizeof(large_entry_t) * num_entries, (void **)&entries);
5617	if (err)
5618		return err;
5619
5620	index = num_entries;
5621	if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) {
5622		range.address = large_entries_address;
5623		range.size = round_page_quanta(num_entries * sizeof(large_entry_t));
5624		recorder(task, context, MALLOC_ADMIN_REGION_RANGE_TYPE, &range, 1);
5625	}
5626	if (type_mask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE)) {
5627		while (index--) {
5628			entry = entries[index];
5629			if (entry.address) {
5630				range.address = entry.address;
5631				range.size = entry.size;
5632				buffer[count++] = range;
5633				if (count >= MAX_RECORDER_BUFFER) {
5634					recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE,
5635							 buffer, count);
5636					count = 0;
5637				}
5638			}
5639		}
5640	}
5641	if (count) {
5642		recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE,
5643				 buffer, count);
5644	}
5645	return 0;
5646}
5647
5648static void *
5649large_malloc(szone_t *szone, size_t num_kernel_pages, unsigned char alignment,
5650			 boolean_t cleared_requested)
5651{
5652	void		*addr;
5653	vm_range_t		range_to_deallocate;
5654	size_t		size;
5655	large_entry_t	large_entry;
5656
5657	if (!num_kernel_pages)
5658		num_kernel_pages = 1; // minimal allocation size for this szone
5659	size = (size_t)num_kernel_pages << vm_page_quanta_shift;
5660	range_to_deallocate.size = 0;
5661	range_to_deallocate.address = 0;
5662
5663#if LARGE_CACHE
5664	if (size < LARGE_CACHE_SIZE_ENTRY_LIMIT) { // Look for a large_entry_t on the death-row cache?
5665		SZONE_LOCK(szone);
5666
5667		int i, best = -1, idx = szone->large_entry_cache_newest, stop_idx = szone->large_entry_cache_oldest;
5668		size_t best_size = SIZE_T_MAX;
5669
5670		while (1) { // Scan large_entry_cache for best fit, starting with most recent entry
5671			size_t this_size = szone->large_entry_cache[idx].size;
5672			addr = (void *)szone->large_entry_cache[idx].address;
5673
5674			if (0 == alignment || 0 == (((uintptr_t) addr) & (((uintptr_t) 1 << alignment) - 1))) {
5675				if (size == this_size) { // size match!
5676					best = idx;
5677					best_size = this_size;
5678					break;
5679				}
5680
5681				if (size <= this_size && this_size < best_size) { // improved fit?
5682					best = idx;
5683					best_size = this_size;
5684				}
5685			}
5686
5687			if (idx == stop_idx) // exhausted live ring?
5688				break;
5689
5690			if (idx)
5691				idx--; // bump idx down
5692			else
5693				idx = LARGE_ENTRY_CACHE_SIZE - 1; // wrap idx
5694		}
5695
5696		if (best > -1 && (best_size - size) < size) { //limit fragmentation to 50%
5697			addr = (void *)szone->large_entry_cache[best].address;
5698			boolean_t was_madvised_reusable = szone->large_entry_cache[best].did_madvise_reusable;
5699
5700			// Compact live ring to fill entry now vacated at large_entry_cache[best]
5701			// while preserving time-order
5702			if (szone->large_entry_cache_oldest < szone->large_entry_cache_newest) {
5703
5704				// Ring hasn't wrapped. Fill in from right.
5705				for (i = best; i < szone->large_entry_cache_newest; ++i)
5706					szone->large_entry_cache[i] = szone->large_entry_cache[i + 1];
5707
5708				szone->large_entry_cache_newest--; // Pull in right endpoint.
5709
5710			} else if (szone->large_entry_cache_newest < szone->large_entry_cache_oldest) {
5711
5712				// Ring has wrapped. Arrange to fill in from the contiguous side.
5713				if (best <= szone->large_entry_cache_newest) {
5714					// Fill from right.
5715					for (i = best; i < szone->large_entry_cache_newest; ++i)
5716						szone->large_entry_cache[i] = szone->large_entry_cache[i + 1];
5717
5718					if (0 < szone->large_entry_cache_newest)
5719						szone->large_entry_cache_newest--;
5720					else
5721						szone->large_entry_cache_newest = LARGE_ENTRY_CACHE_SIZE - 1;
5722				} else {
5723					// Fill from left.
5724					for ( i = best; i > szone->large_entry_cache_oldest; --i)
5725						szone->large_entry_cache[i] = szone->large_entry_cache[i - 1];
5726
5727					if (szone->large_entry_cache_oldest < LARGE_ENTRY_CACHE_SIZE - 1)
5728						szone->large_entry_cache_oldest++;
5729					else
5730						szone->large_entry_cache_oldest = 0;
5731				}
5732
5733			} else {
5734				// By trichotomy, large_entry_cache_newest == large_entry_cache_oldest.
5735				// That implies best == large_entry_cache_newest == large_entry_cache_oldest
5736				// and the ring is now empty.
5737				szone->large_entry_cache[best].address = 0;
5738				szone->large_entry_cache[best].size = 0;
5739				szone->large_entry_cache[best].did_madvise_reusable = FALSE;
5740			}
5741
5742			if ((szone->num_large_objects_in_use + 1) * 4 > szone->num_large_entries) {
5743				// density of hash table too high; grow table
5744				// we do that under lock to avoid a race
5745				large_entry_t *entries = large_entries_grow_no_lock(szone, &range_to_deallocate);
5746				if (entries == NULL) {
5747					SZONE_UNLOCK(szone);
5748					return NULL;
5749				}
5750			}
5751
5752			large_entry.address = (vm_address_t)addr;
5753			large_entry.size = best_size;
5754			large_entry.did_madvise_reusable = FALSE;
5755			large_entry_insert_no_lock(szone, large_entry);
5756
5757			szone->num_large_objects_in_use ++;
5758			szone->num_bytes_in_large_objects += best_size;
5759			if (!was_madvised_reusable)
5760				szone->large_entry_cache_reserve_bytes -= best_size;
5761
5762			szone->large_entry_cache_bytes -= best_size;
5763
5764			if (szone->flotsam_enabled && szone->large_entry_cache_bytes < SZONE_FLOTSAM_THRESHOLD_LOW) {
5765				szone->flotsam_enabled = FALSE;
5766			}
5767
5768			SZONE_UNLOCK(szone);
5769
5770			if (range_to_deallocate.size) {
5771				// we deallocate outside the lock
5772				deallocate_pages(szone, (void *)range_to_deallocate.address, range_to_deallocate.size, 0);
5773			}
5774
5775			// Perform the madvise() outside the lock.
5776			// Typically the madvise() is successful and we'll quickly return from this routine.
5777			// In the unusual case of failure, reacquire the lock to unwind.
5778#if TARGET_OS_EMBEDDED
5779			// Ok to do this madvise on embedded because we won't call MADV_FREE_REUSABLE on a large
5780			// cache block twice without MADV_FREE_REUSE in between.
5781#endif
5782			if (was_madvised_reusable && -1 == madvise(addr, size, MADV_FREE_REUSE)) {
5783				/* -1 return: VM map entry change makes this unfit for reuse. */
5784#if DEBUG_MADVISE
5785				szone_error(szone, 0, "large_malloc madvise(..., MADV_FREE_REUSE) failed",
5786							addr, "length=%d\n", size);
5787#endif
5788
5789				SZONE_LOCK(szone);
5790				szone->num_large_objects_in_use--;
5791				szone->num_bytes_in_large_objects -= large_entry.size;
5792
5793				// Re-acquire "entry" after interval just above where we let go the lock.
5794				large_entry_t *entry = large_entry_for_pointer_no_lock(szone, addr);
5795				if (NULL == entry) {
5796					szone_error(szone, 1, "entry for pointer being discarded from death-row vanished", addr, NULL);
5797					SZONE_UNLOCK(szone);
5798				} else {
5799
5800					range_to_deallocate = large_entry_free_no_lock(szone, entry);
5801					SZONE_UNLOCK(szone);
5802
5803					if (range_to_deallocate.size) {
5804						// we deallocate outside the lock
5805						deallocate_pages(szone, (void *)range_to_deallocate.address, range_to_deallocate.size, 0);
5806					}
5807				}
5808				/* Fall through to allocate_pages() afresh. */
5809			} else {
5810				if (cleared_requested) {
5811					memset(addr, 0, size);
5812				}
5813
5814				return addr;
5815			}
5816		} else {
5817			SZONE_UNLOCK(szone);
5818		}
5819	}
5820
5821	range_to_deallocate.size = 0;
5822	range_to_deallocate.address = 0;
5823#endif /* LARGE_CACHE */
5824
5825	addr = allocate_pages(szone, size, alignment, szone->debug_flags, VM_MEMORY_MALLOC_LARGE);
5826	if (addr == NULL) {
5827		return NULL;
5828	}
5829
5830	SZONE_LOCK(szone);
5831	if ((szone->num_large_objects_in_use + 1) * 4 > szone->num_large_entries) {
5832		// density of hash table too high; grow table
5833		// we do that under lock to avoid a race
5834		large_entry_t *entries = large_entries_grow_no_lock(szone, &range_to_deallocate);
5835		if (entries == NULL) {
5836			SZONE_UNLOCK(szone);
5837			return NULL;
5838		}
5839	}
5840
5841	large_entry.address = (vm_address_t)addr;
5842	large_entry.size = size;
5843	large_entry.did_madvise_reusable = FALSE;
5844	large_entry_insert_no_lock(szone, large_entry);
5845
5846	szone->num_large_objects_in_use ++;
5847	szone->num_bytes_in_large_objects += size;
5848	SZONE_UNLOCK(szone);
5849
5850	if (range_to_deallocate.size) {
5851		// we deallocate outside the lock
5852		deallocate_pages(szone, (void *)range_to_deallocate.address, range_to_deallocate.size, 0);
5853	}
5854	return addr;
5855}
5856
5857static NOINLINE void
5858free_large(szone_t *szone, void *ptr)
5859{
5860	// We have established ptr is page-aligned and neither tiny nor small
5861	large_entry_t	*entry;
5862	vm_range_t		vm_range_to_deallocate;
5863
5864	SZONE_LOCK(szone);
5865	entry = large_entry_for_pointer_no_lock(szone, ptr);
5866	if (entry) {
5867#if LARGE_CACHE
5868		if (entry->size < LARGE_CACHE_SIZE_ENTRY_LIMIT &&
5869			-1 != madvise((void *)(entry->address), entry->size, MADV_CAN_REUSE)) { // Put the large_entry_t on the death-row cache?
5870			int idx = szone->large_entry_cache_newest, stop_idx = szone->large_entry_cache_oldest;
5871			large_entry_t this_entry = *entry; // Make a local copy, "entry" is volatile when lock is let go.
5872			boolean_t reusable = TRUE;
5873			boolean_t should_madvise = szone->large_entry_cache_reserve_bytes + this_entry.size > szone->large_entry_cache_reserve_limit;
5874
5875			// Already freed?
5876			// [Note that repeated entries in death-row risk vending the same entry subsequently
5877			// to two different malloc() calls. By checking here the (illegal) double free
5878			// is accommodated, matching the behavior of the previous implementation.]
5879			while (1) { // Scan large_entry_cache starting with most recent entry
5880				if (szone->large_entry_cache[idx].address == entry->address) {
5881					szone_error(szone, 1, "pointer being freed already on death-row", ptr, NULL);
5882					SZONE_UNLOCK(szone);
5883					return;
5884				}
5885
5886				if (idx == stop_idx) // exhausted live ring?
5887					break;
5888
5889				if (idx)
5890					idx--; // bump idx down
5891				else
5892					idx = LARGE_ENTRY_CACHE_SIZE - 1; // wrap idx
5893			}
5894
5895			SZONE_UNLOCK(szone);
5896
5897			if (szone->debug_flags & SCALABLE_MALLOC_PURGEABLE) { // Are we a purgable zone?
5898				int state = VM_PURGABLE_NONVOLATILE; // restore to default condition
5899
5900				if (KERN_SUCCESS != vm_purgable_control(mach_task_self(), this_entry.address, VM_PURGABLE_SET_STATE, &state)) {
5901					malloc_printf("*** can't vm_purgable_control(..., VM_PURGABLE_SET_STATE) for large freed block at %p\n",
5902								  this_entry.address);
5903					reusable = FALSE;
5904				}
5905			}
5906
5907			if (szone->large_legacy_reset_mprotect) { // Linked for Leopard?
5908				// Accomodate Leopard apps that (illegally) mprotect() their own guard pages on large malloc'd allocations
5909				int err = mprotect((void *)(this_entry.address), this_entry.size, PROT_READ | PROT_WRITE);
5910				if (err) {
5911					malloc_printf("*** can't reset protection for large freed block at %p\n", this_entry.address);
5912					reusable = FALSE;
5913				}
5914			}
5915
5916			// madvise(..., MADV_REUSABLE) death-row arrivals if hoarding would exceed large_entry_cache_reserve_limit
5917			if (should_madvise) {
5918				// Issue madvise to avoid paging out the dirtied free()'d pages in "entry"
5919				MAGMALLOC_MADVFREEREGION((void *)szone, (void *)0, (void *)(this_entry.address), this_entry.size); // DTrace USDT Probe
5920
5921#if TARGET_OS_EMBEDDED
5922				// Ok to do this madvise on embedded because we won't call MADV_FREE_REUSABLE on a large
5923				// cache block twice without MADV_FREE_REUSE in between.
5924#endif
5925				if (-1 == madvise((void *)(this_entry.address), this_entry.size, MADV_FREE_REUSABLE)) {
5926					/* -1 return: VM map entry change makes this unfit for reuse. */
5927#if DEBUG_MADVISE
5928					szone_error(szone, 0, "free_large madvise(..., MADV_FREE_REUSABLE) failed",
5929								(void *)this_entry.address, "length=%d\n", this_entry.size);
5930#endif
5931					reusable = FALSE;
5932				}
5933			}
5934
5935			SZONE_LOCK(szone);
5936
5937			// Re-acquire "entry" after interval just above where we let go the lock.
5938			entry = large_entry_for_pointer_no_lock(szone, ptr);
5939			if (NULL == entry) {
5940				szone_error(szone, 1, "entry for pointer being freed from death-row vanished", ptr, NULL);
5941				SZONE_UNLOCK(szone);
5942				return;
5943			}
5944
5945			// Add "entry" to death-row ring
5946			if (reusable) {
5947				int idx = szone->large_entry_cache_newest; // Most recently occupied
5948				vm_address_t addr;
5949				size_t adjsize;
5950
5951				if (szone->large_entry_cache_newest == szone->large_entry_cache_oldest &&
5952					0 == szone->large_entry_cache[idx].address) {
5953					// Ring is empty, idx is good as it stands
5954					addr = 0;
5955					adjsize = 0;
5956				} else {
5957					// Extend the queue to the "right" by bumping up large_entry_cache_newest
5958					if (idx == LARGE_ENTRY_CACHE_SIZE - 1)
5959						idx = 0; // Wrap index
5960					else
5961						idx++; // Bump index
5962
5963					if (idx == szone->large_entry_cache_oldest) { // Fully occupied
5964						// Drop this entry from the cache and deallocate the VM
5965						addr = szone->large_entry_cache[idx].address;
5966						adjsize = szone->large_entry_cache[idx].size;
5967						szone->large_entry_cache_bytes -= adjsize;
5968						if (!szone->large_entry_cache[idx].did_madvise_reusable)
5969							szone->large_entry_cache_reserve_bytes -= adjsize;
5970					} else {
5971						// Using an unoccupied cache slot
5972						addr = 0;
5973						adjsize = 0;
5974					}
5975				}
5976
5977				if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE))
5978					memset((void *)(entry->address), should_madvise ? SCRUBBLE_BYTE : SCRABBLE_BYTE, entry->size);
5979
5980				entry->did_madvise_reusable = should_madvise; // Was madvise()'d above?
5981				if (!should_madvise) // Entered on death-row without madvise() => up the hoard total
5982					szone->large_entry_cache_reserve_bytes += entry->size;
5983
5984				szone->large_entry_cache_bytes += entry->size;
5985
5986				if (!szone->flotsam_enabled && szone->large_entry_cache_bytes > SZONE_FLOTSAM_THRESHOLD_HIGH) {
5987					szone->flotsam_enabled = TRUE;
5988				}
5989
5990				szone->large_entry_cache[idx] = *entry;
5991				szone->large_entry_cache_newest = idx;
5992
5993				szone->num_large_objects_in_use--;
5994				szone->num_bytes_in_large_objects -= entry->size;
5995
5996				(void)large_entry_free_no_lock(szone, entry);
5997
5998				if (0 == addr) {
5999					SZONE_UNLOCK(szone);
6000					return;
6001				}
6002
6003				// Fall through to drop large_entry_cache_oldest from the cache,
6004				// and then deallocate its pages.
6005
6006				// Trim the queue on the "left" by bumping up large_entry_cache_oldest
6007				if (szone->large_entry_cache_oldest == LARGE_ENTRY_CACHE_SIZE - 1)
6008					szone->large_entry_cache_oldest = 0;
6009				else
6010					szone->large_entry_cache_oldest++;
6011
6012				// we deallocate_pages, including guard pages, outside the lock
6013				SZONE_UNLOCK(szone);
6014				deallocate_pages(szone, (void *)addr, (size_t)adjsize, 0);
6015				return;
6016			} else {
6017				/* fall through to discard an allocation that is not reusable */
6018			}
6019		}
6020#endif /* LARGE_CACHE */
6021
6022		szone->num_large_objects_in_use--;
6023		szone->num_bytes_in_large_objects -= entry->size;
6024
6025		vm_range_to_deallocate = large_entry_free_no_lock(szone, entry);
6026	} else {
6027#if DEBUG_MALLOC
6028		large_debug_print(szone);
6029#endif
6030		szone_error(szone, 1, "pointer being freed was not allocated", ptr, NULL);
6031		SZONE_UNLOCK(szone);
6032		return;
6033	}
6034	SZONE_UNLOCK(szone); // we release the lock asap
6035	CHECK(szone, __PRETTY_FUNCTION__);
6036
6037	// we deallocate_pages, including guard pages, outside the lock
6038	if (vm_range_to_deallocate.address) {
6039#if DEBUG_MALLOC
6040		// FIXME: large_entry_for_pointer_no_lock() needs the lock held ...
6041		if (large_entry_for_pointer_no_lock(szone, (void *)vm_range_to_deallocate.address)) {
6042			malloc_printf("*** invariant broken: %p still in use num_large_entries=%d\n",
6043						  vm_range_to_deallocate.address, szone->num_large_entries);
6044			large_debug_print(szone);
6045			szone_sleep();
6046		}
6047#endif
6048		deallocate_pages(szone, (void *)vm_range_to_deallocate.address, (size_t)vm_range_to_deallocate.size, 0);
6049	}
6050}
6051
6052static INLINE void *
6053large_try_shrink_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_good_size)
6054{
6055	size_t shrinkage = old_size - new_good_size;
6056
6057	if (shrinkage) {
6058		SZONE_LOCK(szone);
6059		/* contract existing large entry */
6060		large_entry_t *large_entry = large_entry_for_pointer_no_lock(szone, ptr);
6061		if (!large_entry) {
6062			szone_error(szone, 1, "large entry reallocated is not properly in table", ptr, NULL);
6063			SZONE_UNLOCK(szone);
6064			return ptr;
6065		}
6066
6067		large_entry->address = (vm_address_t)ptr;
6068		large_entry->size = new_good_size;
6069		szone->num_bytes_in_large_objects -= shrinkage;
6070		SZONE_UNLOCK(szone); // we release the lock asap
6071
6072		deallocate_pages(szone, (void *)((uintptr_t)ptr + new_good_size), shrinkage, 0);
6073	}
6074	return ptr;
6075}
6076
6077static INLINE int
6078large_try_realloc_in_place(szone_t *szone, void *ptr, size_t old_size, size_t new_size)
6079{
6080	vm_address_t	addr = (vm_address_t)ptr + old_size;
6081	large_entry_t	*large_entry;
6082	kern_return_t	err;
6083
6084	SZONE_LOCK(szone);
6085	large_entry = large_entry_for_pointer_no_lock(szone, (void *)addr);
6086	SZONE_UNLOCK(szone);
6087
6088	if (large_entry) { // check if "addr = ptr + old_size" is already spoken for
6089		return 0; // large pointer already exists in table - extension is not going to work
6090	}
6091
6092	new_size = round_page_quanta(new_size);
6093	/*
6094	 * Ask for allocation at a specific address, and mark as realloc
6095	 * to request coalescing with previous realloc'ed extensions.
6096	 */
6097	err = vm_allocate(mach_task_self(), &addr, new_size - old_size, VM_MAKE_TAG(VM_MEMORY_REALLOC));
6098	if (err != KERN_SUCCESS) {
6099		return 0;
6100	}
6101
6102	SZONE_LOCK(szone);
6103	/* extend existing large entry */
6104	large_entry = large_entry_for_pointer_no_lock(szone, ptr);
6105	if (!large_entry) {
6106		szone_error(szone, 1, "large entry reallocated is not properly in table", ptr, NULL);
6107		SZONE_UNLOCK(szone);
6108		return 0; // Bail, leaking "addr"
6109	}
6110
6111	large_entry->address = (vm_address_t)ptr;
6112	large_entry->size = new_size;
6113	szone->num_bytes_in_large_objects += new_size - old_size;
6114	SZONE_UNLOCK(szone); // we release the lock asap
6115
6116	return 1;
6117}
6118
6119/*********************	Zone call backs	************************/
6120/*
6121 * Mark these NOINLINE to avoid bloating the purgeable zone call backs
6122 */
6123static NOINLINE void
6124szone_free(szone_t *szone, void *ptr)
6125{
6126	region_t	tiny_region;
6127	region_t	small_region;
6128
6129#if DEBUG_MALLOC
6130	if (LOG(szone, ptr))
6131		malloc_printf("in szone_free with %p\n", ptr);
6132#endif
6133	if (!ptr)
6134		return;
6135	/*
6136	 * Try to free to a tiny region.
6137	 */
6138	if ((uintptr_t)ptr & (TINY_QUANTUM - 1)) {
6139		szone_error(szone, 1, "Non-aligned pointer being freed", ptr, NULL);
6140		return;
6141	}
6142	if ((tiny_region = tiny_region_for_ptr_no_lock(szone, ptr)) != NULL) {
6143		if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS) {
6144			szone_error(szone, 1, "Pointer to metadata being freed", ptr, NULL);
6145			return;
6146		}
6147		free_tiny(szone, ptr, tiny_region, 0);
6148		return;
6149	}
6150
6151	/*
6152	 * Try to free to a small region.
6153	 */
6154	if ((uintptr_t)ptr & (SMALL_QUANTUM - 1)) {
6155		szone_error(szone, 1, "Non-aligned pointer being freed (2)", ptr, NULL);
6156		return;
6157	}
6158	if ((small_region = small_region_for_ptr_no_lock(szone, ptr)) != NULL) {
6159		if (SMALL_META_INDEX_FOR_PTR(ptr) >= NUM_SMALL_BLOCKS) {
6160			szone_error(szone, 1, "Pointer to metadata being freed (2)", ptr, NULL);
6161			return;
6162		}
6163		free_small(szone, ptr, small_region, 0);
6164		return;
6165	}
6166
6167	/* check that it's a legal large allocation */
6168	if ((uintptr_t)ptr & (vm_page_quanta_size - 1)) {
6169		szone_error(szone, 1, "non-page-aligned, non-allocated pointer being freed", ptr, NULL);
6170		return;
6171	}
6172	free_large(szone, ptr);
6173}
6174
6175static NOINLINE void
6176szone_free_definite_size(szone_t *szone, void *ptr, size_t size)
6177{
6178#if DEBUG_MALLOC
6179	if (LOG(szone, ptr))
6180		malloc_printf("in szone_free_definite_size with %p\n", ptr);
6181
6182	if (0 == size) {
6183		szone_error(szone, 1, "pointer of size zero being freed", ptr, NULL);
6184		return;
6185	}
6186
6187#endif
6188	if (!ptr)
6189		return;
6190
6191	/*
6192	 * Try to free to a tiny region.
6193	 */
6194	if ((uintptr_t)ptr & (TINY_QUANTUM - 1)) {
6195		szone_error(szone, 1, "Non-aligned pointer being freed", ptr, NULL);
6196		return;
6197	}
6198	if (size <= (NUM_TINY_SLOTS - 1)*TINY_QUANTUM) {
6199		if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS) {
6200			szone_error(szone, 1, "Pointer to metadata being freed", ptr, NULL);
6201			return;
6202		}
6203		free_tiny(szone, ptr, TINY_REGION_FOR_PTR(ptr), size);
6204		return;
6205	}
6206
6207	/*
6208	 * Try to free to a small region.
6209	 */
6210	if ((uintptr_t)ptr & (SMALL_QUANTUM - 1)) {
6211		szone_error(szone, 1, "Non-aligned pointer being freed (2)", ptr, NULL);
6212		return;
6213	}
6214	if (size <= szone->large_threshold) {
6215		if (SMALL_META_INDEX_FOR_PTR(ptr) >= NUM_SMALL_BLOCKS) {
6216			szone_error(szone, 1, "Pointer to metadata being freed (2)", ptr, NULL);
6217			return;
6218		}
6219		free_small(szone, ptr, SMALL_REGION_FOR_PTR(ptr), size);
6220		return;
6221	}
6222
6223	/* check that it's a legal large allocation */
6224	if ((uintptr_t)ptr & (vm_page_quanta_size - 1)) {
6225		szone_error(szone, 1, "non-page-aligned, non-allocated pointer being freed", ptr, NULL);
6226		return;
6227	}
6228	free_large(szone, ptr);
6229}
6230
6231static NOINLINE void *
6232szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_requested)
6233{
6234	void	*ptr;
6235	msize_t	msize;
6236
6237	if (size <= (NUM_TINY_SLOTS - 1)*TINY_QUANTUM) {
6238		// think tiny
6239		msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
6240		if (!msize)
6241			msize = 1;
6242		ptr = tiny_malloc_should_clear(szone, msize, cleared_requested);
6243	} else if (size <= szone->large_threshold) {
6244		// think small
6245		msize = SMALL_MSIZE_FOR_BYTES(size + SMALL_QUANTUM - 1);
6246		if (!msize)
6247			msize = 1;
6248		ptr = small_malloc_should_clear(szone, msize, cleared_requested);
6249	} else {
6250		// large
6251		size_t num_kernel_pages = round_page_quanta(size) >> vm_page_quanta_shift;
6252		if (num_kernel_pages == 0)	/* Overflowed */
6253			ptr = 0;
6254		else
6255			ptr = large_malloc(szone, num_kernel_pages, 0, cleared_requested);
6256	}
6257#if DEBUG_MALLOC
6258	if (LOG(szone, ptr))
6259		malloc_printf("szone_malloc returned %p\n", ptr);
6260#endif
6261	/*
6262	 * If requested, scribble on allocated memory.
6263	 */
6264	if ((szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && ptr && !cleared_requested && size)
6265		memset(ptr, SCRIBBLE_BYTE, szone_size(szone, ptr));
6266
6267	return ptr;
6268}
6269
6270static NOINLINE void *
6271szone_malloc(szone_t *szone, size_t size) {
6272	return szone_malloc_should_clear(szone, size, 0);
6273}
6274
6275static NOINLINE void *
6276szone_calloc(szone_t *szone, size_t num_items, size_t size)
6277{
6278	size_t total_bytes = num_items * size;
6279
6280	// Check for overflow of integer multiplication
6281	if (num_items > 1) {
6282#if __LP64__ /* size_t is uint64_t */
6283		if ((num_items | size) & 0xffffffff00000000ul) {
6284			// num_items or size equals or exceeds sqrt(2^64) == 2^32, appeal to wider arithmetic
6285			__uint128_t product = ((__uint128_t)num_items) * ((__uint128_t)size);
6286			if ((uint64_t)(product >> 64)) // compiles to test on upper register of register pair
6287				return NULL;
6288		}
6289#else /* size_t is uint32_t */
6290		if ((num_items | size) & 0xffff0000ul) {
6291			// num_items or size equals or exceeds sqrt(2^32) == 2^16, appeal to wider arithmetic
6292			uint64_t product = ((uint64_t)num_items) * ((uint64_t)size);
6293			if ((uint32_t)(product >> 32)) // compiles to test on upper register of register pair
6294				return NULL;
6295		}
6296#endif
6297	}
6298
6299	return szone_malloc_should_clear(szone, total_bytes, 1);
6300}
6301
6302static NOINLINE void *
6303szone_valloc(szone_t *szone, size_t size)
6304{
6305	void	*ptr;
6306
6307	if (size <= szone->large_threshold) {
6308		ptr = szone_memalign(szone, vm_page_quanta_size, size);
6309	} else {
6310		size_t	num_kernel_pages;
6311
6312		num_kernel_pages = round_page_quanta(size) >> vm_page_quanta_shift;
6313		ptr = large_malloc(szone, num_kernel_pages, 0, 0);
6314	}
6315
6316#if DEBUG_MALLOC
6317	if (LOG(szone, ptr))
6318		malloc_printf("szone_valloc returned %p\n", ptr);
6319#endif
6320	return ptr;
6321}
6322
6323/* Isolate PIC-base load here. */
6324static NOINLINE size_t
6325szone_size_try_large(szone_t *szone, const void *ptr)
6326{
6327	size_t		size = 0;
6328	large_entry_t	*entry;
6329
6330	SZONE_LOCK(szone);
6331	entry = large_entry_for_pointer_no_lock(szone, ptr);
6332	if (entry) {
6333		size = entry->size;
6334	}
6335	SZONE_UNLOCK(szone);
6336#if DEBUG_MALLOC
6337	if (LOG(szone, ptr)) {
6338		malloc_printf("szone_size for %p returned %d\n", ptr, (unsigned)size);
6339	}
6340#endif
6341	return size;
6342}
6343
6344static NOINLINE size_t
6345szone_size(szone_t *szone, const void *ptr)
6346{
6347	boolean_t		is_free;
6348	msize_t		msize, msize_and_free;
6349
6350	if (!ptr)
6351		return 0;
6352#if DEBUG_MALLOC
6353	if (LOG(szone, ptr)) {
6354		malloc_printf("in szone_size for %p (szone=%p)\n", ptr, szone);
6355	}
6356#endif
6357
6358	/*
6359	 * Look for it in a tiny region.
6360	 */
6361	if ((uintptr_t)ptr & (TINY_QUANTUM - 1))
6362		return 0;
6363	if (tiny_region_for_ptr_no_lock(szone, ptr)) {
6364		if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS)
6365			return 0;
6366		msize = get_tiny_meta_header(ptr, &is_free);
6367		if (is_free)
6368			return 0;
6369#if TINY_CACHE
6370		{
6371			mag_index_t mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(TINY_REGION_FOR_PTR(ptr));
6372			if (DEPOT_MAGAZINE_INDEX != mag_index) {
6373				magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
6374
6375				if (msize < TINY_QUANTUM && ptr == (void *)((uintptr_t)(tiny_mag_ptr->mag_last_free) & ~ (TINY_QUANTUM - 1)))
6376					return 0;
6377			} else {
6378				for (mag_index = 0; mag_index < szone->num_tiny_magazines; mag_index++) {
6379					magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
6380
6381					if (msize < TINY_QUANTUM && ptr == (void *)((uintptr_t)(tiny_mag_ptr->mag_last_free) & ~ (TINY_QUANTUM - 1)))
6382						return 0;
6383				}
6384			}
6385		}
6386#endif
6387		return TINY_BYTES_FOR_MSIZE(msize);
6388	}
6389
6390	/*
6391	 * Look for it in a small region.
6392	 */
6393	if ((uintptr_t)ptr & (SMALL_QUANTUM - 1))
6394		return 0;
6395	if (small_region_for_ptr_no_lock(szone, ptr)) {
6396		if (SMALL_META_INDEX_FOR_PTR(ptr) >= NUM_SMALL_BLOCKS)
6397			return 0;
6398		msize_and_free = *SMALL_METADATA_FOR_PTR(ptr);
6399		if (msize_and_free & SMALL_IS_FREE)
6400			return 0;
6401#if SMALL_CACHE
6402		{
6403			mag_index_t	mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(ptr));
6404			if (DEPOT_MAGAZINE_INDEX != mag_index) {
6405				magazine_t	*small_mag_ptr = &(szone->small_magazines[mag_index]);
6406
6407				if (ptr == (void *)((uintptr_t)(small_mag_ptr->mag_last_free) & ~ (SMALL_QUANTUM - 1)))
6408					return 0;
6409			} else {
6410				for (mag_index = 0; mag_index < szone->num_small_magazines; mag_index++) {
6411					magazine_t	*small_mag_ptr = &(szone->small_magazines[mag_index]);
6412
6413					if (ptr == (void *)((uintptr_t)(small_mag_ptr->mag_last_free) & ~ (SMALL_QUANTUM - 1)))
6414						return 0;
6415				}
6416			}
6417		}
6418#endif
6419		return SMALL_BYTES_FOR_MSIZE(msize_and_free);
6420	}
6421
6422	/*
6423	 * If not page-aligned, it cannot have come from a large allocation.
6424	 */
6425	if ((uintptr_t)ptr & (vm_page_quanta_size - 1))
6426		return 0;
6427
6428	/*
6429	 * Look for it in a large entry.
6430	 */
6431	return szone_size_try_large(szone, ptr);
6432}
6433
6434static NOINLINE void *
6435szone_realloc(szone_t *szone, void *ptr, size_t new_size)
6436{
6437	size_t	old_size, new_good_size, valid_size;
6438	void	*new_ptr;
6439
6440#if DEBUG_MALLOC
6441	if (LOG(szone, ptr)) {
6442		malloc_printf("in szone_realloc for %p, %d\n", ptr, (unsigned)new_size);
6443	}
6444#endif
6445	if (NULL == ptr) {
6446		// If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size.
6447		return szone_malloc(szone, new_size);
6448	} else if (0 == new_size) {
6449		// If size is 0 and ptr is not a null pointer, the object pointed to is freed.
6450		szone_free(szone, ptr);
6451		// If size is 0, either a null pointer or a unique pointer that can be successfully passed
6452		// to free() shall be returned.
6453		return szone_malloc(szone, 1);
6454	}
6455
6456	old_size = szone_size(szone, ptr);
6457	if (!old_size) {
6458		szone_error(szone, 1, "pointer being reallocated was not allocated", ptr, NULL);
6459		return NULL;
6460	}
6461
6462	new_good_size = szone_good_size(szone, new_size);
6463	if (new_good_size == old_size) { // Existing allocation is best fit evar?
6464		return ptr;
6465	}
6466
6467	/*
6468	 * If the new size suits the tiny allocator and the pointer being resized
6469	 * belongs to a tiny region, try to reallocate in-place.
6470	 */
6471	if (new_good_size <= (NUM_TINY_SLOTS - 1) * TINY_QUANTUM) {
6472		if (old_size <= (NUM_TINY_SLOTS - 1) * TINY_QUANTUM) {
6473			if (new_good_size <= (old_size >> 1)) {
6474				/*
6475				 * Serious shrinkage (more than half). free() the excess.
6476				 */
6477				return tiny_try_shrink_in_place(szone, ptr, old_size, new_good_size);
6478			} else if (new_good_size <= old_size) {
6479				/*
6480				 * new_good_size smaller than old_size but not by much (less than half).
6481				 * Avoid thrashing at the expense of some wasted storage.
6482				 */
6483				if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
6484					memset(ptr + new_size, SCRIBBLE_BYTE, old_size - new_size);
6485				return ptr;
6486			} else if (tiny_try_realloc_in_place(szone, ptr, old_size, new_good_size)) { // try to grow the allocation
6487				if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
6488					memset(ptr + old_size, SCRIBBLE_BYTE, new_good_size - old_size);
6489				return ptr;
6490			}
6491		}
6492
6493		/*
6494		 * Else if the new size suits the small allocator and the pointer being resized
6495		 * belongs to a small region, and we're not protecting the small allocations
6496		 * try to reallocate in-place.
6497		 */
6498	} else if (new_good_size <= szone->large_threshold) {
6499		if ((NUM_TINY_SLOTS - 1) * TINY_QUANTUM < old_size && old_size <= szone->large_threshold) {
6500			if (new_good_size <= (old_size >> 1)) {
6501				return small_try_shrink_in_place(szone, ptr, old_size, new_good_size);
6502			} else if (new_good_size <= old_size) {
6503				if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
6504					memset(ptr + new_size, SCRIBBLE_BYTE, old_size - new_size);
6505				return ptr;
6506			} else if (small_try_realloc_in_place(szone, ptr, old_size, new_good_size)) {
6507				if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
6508					memset(ptr + old_size, SCRIBBLE_BYTE, new_good_size - old_size);
6509				return ptr;
6510			}
6511		}
6512		/*
6513		 * Else if the allocation's a large allocation, try to reallocate in-place there.
6514		 */
6515	} else if (!(szone->debug_flags & SCALABLE_MALLOC_PURGEABLE) && // purgeable needs fresh allocation
6516			   (old_size > szone->large_threshold) &&
6517			   (new_good_size > szone->large_threshold)) {
6518		if (new_good_size <= (old_size >> 1)) {
6519			return large_try_shrink_in_place(szone, ptr, old_size, new_good_size);
6520		} else if (new_good_size <= old_size) {
6521			if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
6522				memset(ptr + new_size, SCRIBBLE_BYTE, old_size - new_size);
6523			return ptr;
6524		} else if (large_try_realloc_in_place(szone, ptr, old_size, new_good_size)) {
6525			if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
6526				memset(ptr + old_size, SCRIBBLE_BYTE, new_good_size - old_size);
6527			return ptr;
6528		}
6529	}
6530
6531	/*
6532	 * Can't reallocate in place for whatever reason; allocate a new buffer and copy.
6533	 */
6534	if (new_good_size <= (old_size >> 1)) {
6535		/* Serious shrinkage (more than half). FALL THROUGH to alloc/copy/free. */
6536	} else if (new_good_size <= old_size) {
6537		if (szone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
6538			memset(ptr + new_size, SCRIBBLE_BYTE, old_size - new_size);
6539		return ptr;
6540	}
6541
6542	new_ptr = szone_malloc(szone, new_size);
6543	if (new_ptr == NULL)
6544		return NULL;
6545
6546	/*
6547	 * If the allocation's large enough, try to copy using VM.  If that fails, or
6548	 * if it's too small, just copy by hand.
6549	 */
6550	valid_size = MIN(old_size, new_size);
6551	if ((valid_size < szone->vm_copy_threshold) ||
6552		vm_copy(mach_task_self(), (vm_address_t)ptr, valid_size, (vm_address_t)new_ptr))
6553		memcpy(new_ptr, ptr, valid_size);
6554	szone_free(szone, ptr);
6555
6556#if DEBUG_MALLOC
6557	if (LOG(szone, ptr)) {
6558		malloc_printf("szone_realloc returned %p for %d\n", new_ptr, (unsigned)new_size);
6559	}
6560#endif
6561	return new_ptr;
6562}
6563
6564static NOINLINE void *
6565szone_memalign(szone_t *szone, size_t alignment, size_t size)
6566{
6567	if (size == 0)
6568		size = 1; // Ensures we'll return an aligned free()-able pointer
6569
6570	if ((size + alignment) < size) // size_t arithmetic wrapped!
6571		return NULL;
6572
6573	// alignment is gauranteed a power of 2 at least as large as sizeof(void *), hence non-zero.
6574	// Since size + alignment didn't wrap, 0 <= size + alignment - 1 < size + alignment
6575	size_t span = size + alignment - 1;
6576
6577	if (alignment <= TINY_QUANTUM) {
6578		return szone_malloc(szone, size); // Trivially satisfied by tiny, small, or large
6579
6580	} else if (span <= (NUM_TINY_SLOTS - 1)*TINY_QUANTUM) {
6581		msize_t mspan = TINY_MSIZE_FOR_BYTES(span + TINY_QUANTUM - 1);
6582		void *p = szone_malloc(szone, span); // avoids inlining tiny_malloc_should_clear(szone, mspan, 0);
6583
6584		if (NULL == p)
6585			return NULL;
6586
6587		size_t offset = ((uintptr_t) p) & (alignment - 1); // p % alignment
6588		size_t pad = (0 == offset) ? 0 : alignment - offset; // p + pad achieves desired alignment
6589
6590		msize_t msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
6591		msize_t mpad = TINY_MSIZE_FOR_BYTES(pad + TINY_QUANTUM - 1);
6592		msize_t mwaste = mspan - msize - mpad; // excess blocks
6593
6594		if (mpad > 0) {
6595			void *q = (void *)(((uintptr_t) p) + pad);
6596
6597			// Mark q as a block header and in-use, thus creating two blocks.
6598			magazine_t	*tiny_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->tiny_magazines,
6599																		 REGION_TRAILER_FOR_TINY_REGION(TINY_REGION_FOR_PTR(p)),
6600																		 MAGAZINE_INDEX_FOR_TINY_REGION(TINY_REGION_FOR_PTR(p)));
6601			set_tiny_meta_header_in_use(q, msize);
6602			tiny_mag_ptr->mag_num_objects++;
6603
6604			// set_tiny_meta_header_in_use() "reaffirms" the block_header on the *following* block, so
6605			// now set its in_use bit as well. But only if its within the original allocation made above.
6606			if (mwaste > 0)
6607				BITARRAY_SET(TINY_INUSE_FOR_HEADER(TINY_BLOCK_HEADER_FOR_PTR(q)), TINY_INDEX_FOR_PTR(q) + msize);
6608			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
6609
6610			// Give up mpad blocks beginning at p to the tiny free list
6611			// region_t r = TINY_REGION_FOR_PTR(p);
6612			szone_free(szone, p); // avoids inlining free_tiny(szone, p, &r);
6613
6614			p = q; // advance p to the desired alignment
6615		}
6616
6617		if (mwaste > 0) {
6618			void *q = (void *)(((uintptr_t) p) + TINY_BYTES_FOR_MSIZE(msize));
6619			// Mark q as block header and in-use, thus creating two blocks.
6620			magazine_t	*tiny_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->tiny_magazines,
6621																		 REGION_TRAILER_FOR_TINY_REGION(TINY_REGION_FOR_PTR(p)),
6622																		 MAGAZINE_INDEX_FOR_TINY_REGION(TINY_REGION_FOR_PTR(p)));
6623			set_tiny_meta_header_in_use(q, mwaste);
6624			tiny_mag_ptr->mag_num_objects++;
6625			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
6626
6627			// Give up mwaste blocks beginning at q to the tiny free list
6628			// region_t r = TINY_REGION_FOR_PTR(q);
6629			szone_free(szone, q); // avoids inlining free_tiny(szone, q, &r);
6630		}
6631
6632		return p; // p has the desired size and alignment, and can later be free()'d
6633
6634	} else if ((NUM_TINY_SLOTS - 1)*TINY_QUANTUM < size && alignment <= SMALL_QUANTUM) {
6635		return szone_malloc(szone, size); // Trivially satisfied by small or large
6636
6637	} else if (span <= szone->large_threshold) {
6638
6639		if (size <= (NUM_TINY_SLOTS - 1)*TINY_QUANTUM) {
6640			size = (NUM_TINY_SLOTS - 1)*TINY_QUANTUM + TINY_QUANTUM; // ensure block allocated by small does not have a tiny-possible size
6641			span = size + alignment - 1;
6642		}
6643
6644		msize_t mspan = SMALL_MSIZE_FOR_BYTES(span + SMALL_QUANTUM - 1);
6645		void *p = szone_malloc(szone, span); // avoid inlining small_malloc_should_clear(szone, mspan, 0);
6646
6647		if (NULL == p)
6648			return NULL;
6649
6650		size_t offset = ((uintptr_t) p) & (alignment - 1); // p % alignment
6651		size_t pad = (0 == offset) ? 0 : alignment - offset; // p + pad achieves desired alignment
6652
6653		msize_t msize = SMALL_MSIZE_FOR_BYTES(size + SMALL_QUANTUM - 1);
6654		msize_t mpad = SMALL_MSIZE_FOR_BYTES(pad + SMALL_QUANTUM - 1);
6655		msize_t mwaste = mspan - msize - mpad; // excess blocks
6656
6657		if (mpad > 0) {
6658			void *q = (void *)(((uintptr_t) p) + pad);
6659
6660			// Mark q as block header and in-use, thus creating two blocks.
6661			magazine_t	*small_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->small_magazines,
6662																		  REGION_TRAILER_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(p)),
6663																		  MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(p)));
6664			small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(p), SMALL_META_INDEX_FOR_PTR(p), mpad);
6665			small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(q), SMALL_META_INDEX_FOR_PTR(q), msize + mwaste);
6666			small_mag_ptr->mag_num_objects++;
6667			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
6668
6669			// Give up mpad blocks beginning at p to the small free list
6670			// region_t r = SMALL_REGION_FOR_PTR(p);
6671			szone_free(szone, p); // avoid inlining free_small(szone, p, &r);
6672
6673			p = q; // advance p to the desired alignment
6674		}
6675		if (mwaste > 0) {
6676			void *q = (void *)(((uintptr_t) p) + SMALL_BYTES_FOR_MSIZE(msize));
6677			// Mark q as block header and in-use, thus creating two blocks.
6678			magazine_t	*small_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->small_magazines,
6679																		  REGION_TRAILER_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(p)),
6680																		  MAGAZINE_INDEX_FOR_SMALL_REGION(SMALL_REGION_FOR_PTR(p)));
6681			small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(p), SMALL_META_INDEX_FOR_PTR(p), msize);
6682			small_meta_header_set_in_use(SMALL_META_HEADER_FOR_PTR(q), SMALL_META_INDEX_FOR_PTR(q), mwaste);
6683			small_mag_ptr->mag_num_objects++;
6684			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
6685
6686			// Give up mwaste blocks beginning at q to the small free list
6687			// region_t r = SMALL_REGION_FOR_PTR(q);
6688			szone_free(szone, q); // avoid inlining free_small(szone, q, &r);
6689		}
6690
6691		return p; // p has the desired size and alignment, and can later be free()'d
6692
6693	} else if (szone->large_threshold < size && alignment <= vm_page_quanta_size) {
6694		return szone_malloc(szone, size); // Trivially satisfied by large
6695
6696	} else {
6697		// ensure block allocated by large does not have a small-possible size
6698		size_t num_kernel_pages = round_page_quanta(MAX(szone->large_threshold + 1, size)) >> vm_page_quanta_shift;
6699		void *p;
6700
6701		if (num_kernel_pages == 0)	/* Overflowed */
6702			p = NULL;
6703		else
6704			p = large_malloc(szone, num_kernel_pages, MAX(vm_page_quanta_shift, __builtin_ctz(alignment)), 0);
6705
6706		return p;
6707	}
6708	/* NOTREACHED */
6709}
6710
6711// given a size, returns the number of pointers allocated capable of holding
6712// that size, up to the limit specified by the 'count' argument.  These pointers
6713// are stored in the 'results' array, which must be allocated by the caller.
6714// may return zero, since this function is only a best attempt at allocating
6715// the pointers.  clients should be prepared to call malloc for any additional
6716// blocks they need.
6717static NOINLINE unsigned
6718szone_batch_malloc(szone_t *szone, size_t size, void **results, unsigned count)
6719{
6720	msize_t	msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
6721	unsigned	found = 0;
6722	mag_index_t	mag_index = mag_get_thread_index(szone);
6723	magazine_t	*tiny_mag_ptr = &(szone->tiny_magazines[mag_index]);
6724
6725	// only bother implementing this for tiny
6726	if (size > (NUM_TINY_SLOTS - 1)*TINY_QUANTUM)
6727		return 0;
6728	// make sure to return objects at least one quantum in size
6729	if (!msize)
6730		msize = 1;
6731
6732	CHECK(szone, __PRETTY_FUNCTION__);
6733
6734	// We must lock the zone now, since tiny_malloc_from_free_list assumes that
6735	// the caller has done so.
6736	SZONE_MAGAZINE_PTR_LOCK(szone, tiny_mag_ptr);
6737
6738	// with the zone locked, allocate objects from the free list until all
6739	// sufficiently large objects have been exhausted, or we have met our quota
6740	// of objects to allocate.
6741	while (found < count) {
6742		void *ptr = tiny_malloc_from_free_list(szone, tiny_mag_ptr, mag_index, msize);
6743		if (!ptr)
6744			break;
6745
6746		*results++ = ptr;
6747		found++;
6748	}
6749	SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
6750	return found;
6751}
6752
6753/* Try caching the tiny_region and checking if the next ptr hits there. */
6754static NOINLINE void
6755szone_batch_free(szone_t *szone, void **to_be_freed, unsigned count)
6756{
6757	unsigned	cc = 0;
6758	void	*ptr;
6759	region_t	tiny_region = NULL;
6760	boolean_t	is_free;
6761	msize_t	msize;
6762	magazine_t	*tiny_mag_ptr = NULL;
6763	mag_index_t mag_index = -1;
6764
6765	// frees all the pointers in to_be_freed
6766	// note that to_be_freed may be overwritten during the process
6767	if (!count)
6768		return;
6769
6770	CHECK(szone, __PRETTY_FUNCTION__);
6771	while (cc < count) {
6772		ptr = to_be_freed[cc];
6773		if (ptr) {
6774			if (NULL == tiny_region || tiny_region != TINY_REGION_FOR_PTR(ptr)) { // region same as last iteration?
6775				if (tiny_mag_ptr) { // non-NULL iff magazine lock taken
6776					SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
6777					tiny_mag_ptr = NULL;
6778				}
6779
6780				tiny_region = tiny_region_for_ptr_no_lock(szone, ptr);
6781
6782				if (tiny_region) {
6783					tiny_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->tiny_magazines,
6784																	REGION_TRAILER_FOR_TINY_REGION(tiny_region),
6785																	MAGAZINE_INDEX_FOR_TINY_REGION(tiny_region));
6786					mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(tiny_region);
6787				}
6788			}
6789			if (tiny_region) {
6790				// this is a tiny pointer
6791				if (TINY_INDEX_FOR_PTR(ptr) >= NUM_TINY_BLOCKS)
6792					break; // pointer to metadata; let the standard free deal with it
6793				msize = get_tiny_meta_header(ptr, &is_free);
6794				if (is_free)
6795					break; // a double free; let the standard free deal with it
6796
6797				if (!tiny_free_no_lock(szone, tiny_mag_ptr, mag_index, tiny_region, ptr, msize)) {
6798					// Arrange to re-acquire magazine lock
6799					tiny_mag_ptr = NULL;
6800					tiny_region = NULL;
6801				}
6802				to_be_freed[cc] = NULL;
6803			} else {
6804				// No region in this zone claims ptr; let the standard free deal with it
6805				break;
6806			}
6807		}
6808		cc++;
6809	}
6810
6811	if (tiny_mag_ptr) {
6812		SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
6813		tiny_mag_ptr = NULL;
6814	}
6815
6816	CHECK(szone, __PRETTY_FUNCTION__);
6817	while (count--) {
6818		ptr = to_be_freed[count];
6819		if (ptr)
6820			szone_free(szone, ptr);
6821	}
6822}
6823
6824// FIXME: Suppose one of the locks is held?
6825static void
6826szone_destroy(szone_t *szone)
6827{
6828	size_t		index;
6829	large_entry_t	*large;
6830	vm_range_t		range_to_deallocate;
6831
6832#if LARGE_CACHE
6833	SZONE_LOCK(szone);
6834
6835	/* disable any memory pressure responder */
6836	szone->flotsam_enabled = FALSE;
6837
6838	// stack allocated copy of the death-row cache
6839	int idx = szone->large_entry_cache_oldest, idx_max = szone->large_entry_cache_newest;
6840	large_entry_t local_entry_cache[LARGE_ENTRY_CACHE_SIZE];
6841
6842	memcpy((void *)local_entry_cache, (void *)szone->large_entry_cache, sizeof(local_entry_cache));
6843
6844	szone->large_entry_cache_oldest = szone->large_entry_cache_newest = 0;
6845	szone->large_entry_cache[0].address = 0x0;
6846	szone->large_entry_cache[0].size = 0;
6847	szone->large_entry_cache_bytes = 0;
6848	szone->large_entry_cache_reserve_bytes = 0;
6849
6850	SZONE_UNLOCK(szone);
6851
6852	// deallocate the death-row cache outside the zone lock
6853	while (idx != idx_max) {
6854		deallocate_pages(szone, (void *) local_entry_cache[idx].address, local_entry_cache[idx].size, 0);
6855		if (++idx == LARGE_ENTRY_CACHE_SIZE) idx = 0;
6856	}
6857	if (0 != local_entry_cache[idx].address && 0 != local_entry_cache[idx].size) {
6858		deallocate_pages(szone, (void *) local_entry_cache[idx].address, local_entry_cache[idx].size, 0);
6859	}
6860#endif
6861
6862	/* destroy large entries */
6863	index = szone->num_large_entries;
6864	while (index--) {
6865		large = szone->large_entries + index;
6866		if (large->address) {
6867			// we deallocate_pages, including guard pages
6868			deallocate_pages(szone, (void *)(large->address), large->size, szone->debug_flags);
6869		}
6870	}
6871	large_entries_free_no_lock(szone, szone->large_entries, szone->num_large_entries, &range_to_deallocate);
6872	if (range_to_deallocate.size)
6873		deallocate_pages(szone, (void *)range_to_deallocate.address, (size_t)range_to_deallocate.size, 0);
6874
6875	/* destroy tiny regions */
6876	for (index = 0; index < szone->tiny_region_generation->num_regions_allocated; ++index)
6877		if ((HASHRING_OPEN_ENTRY != szone->tiny_region_generation->hashed_regions[index]) &&
6878			(HASHRING_REGION_DEALLOCATED != szone->tiny_region_generation->hashed_regions[index]))
6879			deallocate_pages(szone, szone->tiny_region_generation->hashed_regions[index], TINY_REGION_SIZE, 0);
6880
6881	/* destroy small regions */
6882	for (index = 0; index < szone->small_region_generation->num_regions_allocated; ++index)
6883		if ((HASHRING_OPEN_ENTRY != szone->small_region_generation->hashed_regions[index]) &&
6884			(HASHRING_REGION_DEALLOCATED != szone->small_region_generation->hashed_regions[index]))
6885			deallocate_pages(szone, szone->small_region_generation->hashed_regions[index], SMALL_REGION_SIZE, 0);
6886
6887	/* destroy region hash rings, if any */
6888	if (szone->tiny_region_generation->hashed_regions != szone->initial_tiny_regions) {
6889		size_t size = round_page_quanta(szone->tiny_region_generation->num_regions_allocated * sizeof(region_t));
6890		deallocate_pages(szone, szone->tiny_region_generation->hashed_regions, size, 0);
6891	}
6892	if (szone->small_region_generation->hashed_regions != szone->initial_small_regions) {
6893		size_t size = round_page_quanta(szone->small_region_generation->num_regions_allocated * sizeof(region_t));
6894		deallocate_pages(szone, szone->small_region_generation->hashed_regions, size, 0);
6895	}
6896
6897	/* Now destroy the separate szone region */
6898	deallocate_pages(szone, (void *)&(szone->tiny_magazines[-1]), TINY_MAGAZINE_PAGED_SIZE, SCALABLE_MALLOC_ADD_GUARD_PAGES);
6899	deallocate_pages(szone, (void *)&(szone->small_magazines[-1]), SMALL_MAGAZINE_PAGED_SIZE, SCALABLE_MALLOC_ADD_GUARD_PAGES);
6900	deallocate_pages(szone, (void *)szone, SZONE_PAGED_SIZE, 0);
6901}
6902
6903static NOINLINE size_t
6904szone_good_size(szone_t *szone, size_t size)
6905{
6906	msize_t msize;
6907
6908	// Find a good size for this tiny allocation.
6909	if (size <= (NUM_TINY_SLOTS - 1) * TINY_QUANTUM) {
6910		msize = TINY_MSIZE_FOR_BYTES(size + TINY_QUANTUM - 1);
6911		if (!msize)
6912			msize = 1;
6913		return TINY_BYTES_FOR_MSIZE(msize);
6914	}
6915
6916	// Find a good size for this small allocation.
6917	if (size <= szone->large_threshold) {
6918		msize = SMALL_MSIZE_FOR_BYTES(size + SMALL_QUANTUM - 1);
6919		if (!msize)
6920			msize = 1;
6921		return SMALL_BYTES_FOR_MSIZE(msize);
6922	}
6923
6924	// Check for integer overflow on the size, since unlike the two cases above,
6925	// there is no upper bound on allocation size at this point.
6926	if (size > round_page_quanta(size))
6927		return (size_t)(-1LL);
6928
6929#if DEBUG_MALLOC
6930	// It is not acceptable to see a size of zero here, since that means we
6931	// failed to catch a request for zero bytes in the tiny check, or the size
6932	// overflowed to zero during some arithmetic.
6933	if (size == 0)
6934		malloc_printf("szone_good_size() invariant broken %y\n", size);
6935#endif
6936	return round_page_quanta(size);
6937}
6938
6939unsigned szone_check_counter = 0;
6940unsigned szone_check_start = 0;
6941unsigned szone_check_modulo = 1;
6942
6943static NOINLINE boolean_t
6944szone_check_all(szone_t *szone, const char *function)
6945{
6946	size_t index;
6947
6948	/* check tiny regions - chould check region count */
6949	for (index = 0; index < szone->tiny_region_generation->num_regions_allocated; ++index) {
6950		region_t tiny = szone->tiny_region_generation->hashed_regions[index];
6951
6952		if (HASHRING_REGION_DEALLOCATED == tiny)
6953			continue;
6954
6955		if (tiny) {
6956			magazine_t *tiny_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->tiny_magazines,
6957																		REGION_TRAILER_FOR_TINY_REGION(tiny), MAGAZINE_INDEX_FOR_TINY_REGION(tiny));
6958
6959			if (!tiny_check_region(szone, tiny)) {
6960				SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
6961				szone->debug_flags &= ~ CHECK_REGIONS;
6962				szone_error(szone, 1, "check: tiny region incorrect", NULL,
6963							"*** tiny region %ld incorrect szone_check_all(%s) counter=%d\n",
6964							index, function, szone_check_counter);
6965				return 0;
6966			}
6967			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_mag_ptr);
6968		}
6969	}
6970	/* check tiny free lists */
6971	for (index = 0; index < NUM_TINY_SLOTS; ++index) {
6972		if (!tiny_free_list_check(szone, index)) {
6973			szone->debug_flags &= ~ CHECK_REGIONS;
6974			szone_error(szone, 1, "check: tiny free list incorrect", NULL,
6975						"*** tiny free list incorrect (slot=%ld) szone_check_all(%s) counter=%d\n",
6976						index, function, szone_check_counter);
6977			return 0;
6978		}
6979	}
6980
6981	/* check small regions - could check region count */
6982	for (index = 0; index < szone->small_region_generation->num_regions_allocated; ++index) {
6983		region_t small = szone->small_region_generation->hashed_regions[index];
6984
6985		if (HASHRING_REGION_DEALLOCATED == small)
6986			continue;
6987
6988		if (small) {
6989			magazine_t *small_mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->small_magazines,
6990																		 REGION_TRAILER_FOR_SMALL_REGION(small), MAGAZINE_INDEX_FOR_SMALL_REGION(small));
6991
6992			if (!small_check_region(szone, small)) {
6993				SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
6994				szone->debug_flags &= ~ CHECK_REGIONS;
6995				szone_error(szone, 1, "check: small region incorrect", NULL,
6996							"*** small region %ld incorrect szone_check_all(%s) counter=%d\n",
6997							index, function, szone_check_counter);
6998				return 0;
6999			}
7000			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_mag_ptr);
7001		}
7002	}
7003	/* check small free lists */
7004	for (index = 0; index < szone->num_small_slots; ++index) {
7005		if (!small_free_list_check(szone, index)) {
7006			szone->debug_flags &= ~ CHECK_REGIONS;
7007			szone_error(szone, 1, "check: small free list incorrect", NULL,
7008						"*** small free list incorrect (slot=%ld) szone_check_all(%s) counter=%d\n",
7009						index, function, szone_check_counter);
7010			return 0;
7011		}
7012	}
7013
7014	return 1;
7015}
7016
7017static boolean_t
7018szone_check(szone_t *szone)
7019{
7020	if ((++szone_check_counter % 10000) == 0)
7021		_malloc_printf(ASL_LEVEL_NOTICE, "at szone_check counter=%d\n", szone_check_counter);
7022
7023	if (szone_check_counter < szone_check_start)
7024		return 1;
7025
7026	if (szone_check_counter % szone_check_modulo)
7027		return 1;
7028
7029	return szone_check_all(szone, "");
7030}
7031
7032static kern_return_t
7033szone_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t zone_address,
7034							memory_reader_t reader, vm_range_recorder_t recorder)
7035{
7036	szone_t		*szone;
7037	kern_return_t	err;
7038
7039	if (!reader) reader = _szone_default_reader;
7040
7041	err = reader(task, zone_address, sizeof(szone_t), (void **)&szone);
7042	if (err) return err;
7043
7044	err = tiny_in_use_enumerator(task, context, type_mask, szone, reader, recorder);
7045	if (err) return err;
7046
7047	err = small_in_use_enumerator(task, context, type_mask, szone, reader, recorder);
7048	if (err) return err;
7049
7050	err = large_in_use_enumerator(task, context, type_mask,
7051								  (vm_address_t)szone->large_entries, szone->num_large_entries, reader, recorder);
7052	return err;
7053}
7054
7055// Following method is deprecated:  use scalable_zone_statistics instead
7056void
7057scalable_zone_info(malloc_zone_t *zone, unsigned *info_to_fill, unsigned count)
7058{
7059	szone_t	*szone = (void *)zone;
7060	unsigned	info[13];
7061
7062	// We do not lock to facilitate debug
7063
7064	size_t	s = 0;
7065	unsigned	t = 0;
7066	size_t	u = 0;
7067	mag_index_t mag_index;
7068
7069	for (mag_index = -1; mag_index < szone->num_tiny_magazines; mag_index++) {
7070		s += szone->tiny_magazines[mag_index].mag_bytes_free_at_start;
7071		s += szone->tiny_magazines[mag_index].mag_bytes_free_at_end;
7072		t += szone->tiny_magazines[mag_index].mag_num_objects;
7073		u += szone->tiny_magazines[mag_index].mag_num_bytes_in_objects;
7074	}
7075
7076	info[4] = t;
7077	info[5] = u;
7078
7079	for (t = 0, u = 0, mag_index = -1; mag_index < szone->num_small_magazines; mag_index++) {
7080		s += szone->small_magazines[mag_index].mag_bytes_free_at_start;
7081		s += szone->small_magazines[mag_index].mag_bytes_free_at_end;
7082		t += szone->small_magazines[mag_index].mag_num_objects;
7083		u += szone->small_magazines[mag_index].mag_num_bytes_in_objects;
7084	}
7085
7086	info[6] = t;
7087	info[7] = u;
7088
7089	info[8] = szone->num_large_objects_in_use;
7090	info[9] = szone->num_bytes_in_large_objects;
7091
7092	info[10] = 0; // DEPRECATED szone->num_huge_entries;
7093	info[11] = 0; // DEPRECATED szone->num_bytes_in_huge_objects;
7094
7095	info[12] = szone->debug_flags;
7096
7097	info[0] = info[4] + info[6] + info[8] + info[10];
7098	info[1] = info[5] + info[7] + info[9] + info[11];
7099
7100	info[3] = (szone->num_tiny_regions - szone->num_tiny_regions_dealloc) * TINY_REGION_SIZE +
7101	(szone->num_small_regions - szone->num_small_regions_dealloc) * SMALL_REGION_SIZE + info[9] + info[11];
7102
7103	info[2] = info[3] - s;
7104	memcpy(info_to_fill, info, sizeof(unsigned)*count);
7105}
7106
7107// FIXME: consistent picture requires locking!
7108static NOINLINE void
7109szone_print(szone_t *szone, boolean_t verbose)
7110{
7111	unsigned	info[13];
7112	size_t	index;
7113	region_t	region;
7114
7115	scalable_zone_info((void *)szone, info, 13);
7116	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
7117				   "Scalable zone %p: inUse=%u(%y) touched=%y allocated=%y flags=%d\n",
7118				   szone, info[0], info[1], info[2], info[3], info[12]);
7119	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
7120				   "\ttiny=%u(%y) small=%u(%y) large=%u(%y) huge=%u(%y)\n",
7121				   info[4], info[5], info[6], info[7], info[8], info[9], info[10], info[11]);
7122	// tiny
7123	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
7124				   "%lu tiny regions:\n", szone->num_tiny_regions);
7125	if (szone->num_tiny_regions_dealloc)
7126		_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
7127					   "[%lu tiny regions have been vm_deallocate'd]\n", szone->num_tiny_regions_dealloc);
7128	for (index = 0; index < szone->tiny_region_generation->num_regions_allocated; ++index) {
7129		region = szone->tiny_region_generation->hashed_regions[index];
7130		if (HASHRING_OPEN_ENTRY != region && HASHRING_REGION_DEALLOCATED != region) {
7131			mag_index_t mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(region);
7132			print_tiny_region(verbose, region,
7133							  (region == szone->tiny_magazines[mag_index].mag_last_region) ?
7134							  szone->tiny_magazines[mag_index].mag_bytes_free_at_start : 0,
7135							  (region == szone->tiny_magazines[mag_index].mag_last_region) ?
7136							  szone->tiny_magazines[mag_index].mag_bytes_free_at_end : 0);
7137		}
7138	}
7139	if (verbose)
7140		print_tiny_free_list(szone);
7141	// small
7142	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
7143				   "%lu small regions:\n", szone->num_small_regions);
7144	if (szone->num_small_regions_dealloc)
7145		_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
7146					   "[%lu small regions have been vm_deallocate'd]\n", szone->num_small_regions_dealloc);
7147	for (index = 0; index < szone->small_region_generation->num_regions_allocated; ++index) {
7148		region = szone->small_region_generation->hashed_regions[index];
7149		if (HASHRING_OPEN_ENTRY != region && HASHRING_REGION_DEALLOCATED != region) {
7150			mag_index_t mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(region);
7151			print_small_region(szone, verbose, region,
7152							   (region == szone->small_magazines[mag_index].mag_last_region) ?
7153							   szone->small_magazines[mag_index].mag_bytes_free_at_start : 0,
7154							   (region == szone->small_magazines[mag_index].mag_last_region) ?
7155							   szone->small_magazines[mag_index].mag_bytes_free_at_end : 0);
7156		}
7157	}
7158	if (verbose)
7159		print_small_free_list(szone);
7160}
7161
7162static void
7163szone_log(malloc_zone_t *zone, void *log_address)
7164{
7165	szone_t	*szone = (szone_t *)zone;
7166
7167	szone->log_address = log_address;
7168}
7169
7170// <rdar://problem/18001324>
7171// When forcing the lock on the entire zone, make sure we are out of the critical section in each magazine
7172static INLINE void
7173szone_force_lock_magazine(szone_t *szone, magazine_t *mag)
7174{
7175	while (1) {
7176		SZONE_MAGAZINE_PTR_LOCK(szone, mag);
7177		if (!mag->alloc_underway)
7178			return;
7179
7180		SZONE_MAGAZINE_PTR_UNLOCK(szone, mag);
7181		yield();
7182	}
7183}
7184
7185static void
7186szone_force_lock(szone_t *szone)
7187{
7188	mag_index_t i;
7189
7190	for (i = 0; i < szone->num_tiny_magazines; ++i) {
7191		szone_force_lock_magazine(szone, &szone->tiny_magazines[i]);
7192	}
7193	szone_force_lock_magazine(szone, &szone->tiny_magazines[DEPOT_MAGAZINE_INDEX]);
7194
7195	for (i = 0; i < szone->num_small_magazines; ++i) {
7196		szone_force_lock_magazine(szone, &szone->small_magazines[i]);
7197	}
7198	szone_force_lock_magazine(szone, &szone->small_magazines[DEPOT_MAGAZINE_INDEX]);
7199
7200	SZONE_LOCK(szone);
7201}
7202
7203static void
7204szone_force_unlock(szone_t *szone)
7205{
7206	mag_index_t i;
7207
7208	SZONE_UNLOCK(szone);
7209
7210	for (i = -1; i < szone->num_small_magazines; ++i) {
7211		SZONE_MAGAZINE_PTR_UNLOCK(szone, (&(szone->small_magazines[i])));
7212	}
7213
7214	for (i = -1; i < szone->num_tiny_magazines; ++i) {
7215		SZONE_MAGAZINE_PTR_UNLOCK(szone, (&(szone->tiny_magazines[i])));
7216	}
7217}
7218
7219static boolean_t
7220szone_locked(szone_t *szone)
7221{
7222	mag_index_t i;
7223	int tookLock;
7224
7225	tookLock = SZONE_TRY_LOCK(szone);
7226	if (tookLock == 0)
7227		return 1;
7228	SZONE_UNLOCK(szone);
7229
7230	for (i = -1; i < szone->num_small_magazines; ++i) {
7231		tookLock = SZONE_MAGAZINE_PTR_TRY_LOCK(szone, (&(szone->small_magazines[i])));
7232		if (tookLock == 0)
7233			return 1;
7234		SZONE_MAGAZINE_PTR_UNLOCK(szone, (&(szone->small_magazines[i])));
7235	}
7236
7237	for (i = -1; i < szone->num_tiny_magazines; ++i) {
7238		tookLock = SZONE_MAGAZINE_PTR_TRY_LOCK(szone, (&(szone->tiny_magazines[i])));
7239		if (tookLock == 0)
7240			return 1;
7241		SZONE_MAGAZINE_PTR_UNLOCK(szone, (&(szone->tiny_magazines[i])));
7242	}
7243	return 0;
7244}
7245
7246static size_t
7247szone_pressure_relief(szone_t *szone, size_t goal)
7248{
7249	size_t total = 0;
7250
7251#if MADVISE_PRESSURE_RELIEF
7252	mag_index_t mag_index;
7253
7254	magazine_t *tiny_depot_ptr = (&szone->tiny_magazines[DEPOT_MAGAZINE_INDEX]);
7255	magazine_t *small_depot_ptr = (&szone->small_magazines[DEPOT_MAGAZINE_INDEX]);
7256
7257	for (mag_index = 0; mag_index < szone->num_tiny_magazines; mag_index++) {
7258		size_t index;
7259		for (index = 0; index < szone->tiny_region_generation->num_regions_allocated; ++index) {
7260			SZONE_LOCK(szone);
7261
7262			region_t tiny = szone->tiny_region_generation->hashed_regions[index];
7263			if (!tiny || tiny == HASHRING_REGION_DEALLOCATED) {
7264				SZONE_UNLOCK(szone);
7265				continue;
7266			}
7267
7268			magazine_t *mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->tiny_magazines, REGION_TRAILER_FOR_TINY_REGION(tiny), MAGAZINE_INDEX_FOR_TINY_REGION(tiny));
7269			SZONE_UNLOCK(szone);
7270
7271			/* Ordering is important here, the magazine of a region may potentially change
7272			 * during mag_lock_zine_for_region_trailer, so src_mag_index must be taken
7273			 * after we've obtained the lock.
7274			 */
7275			mag_index_t src_mag_index = MAGAZINE_INDEX_FOR_TINY_REGION(tiny);
7276
7277			/* We can (and must) ignore magazines that are already in the recirc depot. */
7278			if (src_mag_index == DEPOT_MAGAZINE_INDEX) {
7279				SZONE_MAGAZINE_PTR_UNLOCK(szone, mag_ptr);
7280				continue;
7281			}
7282
7283			if (tiny == mag_ptr->mag_last_region && (mag_ptr->mag_bytes_free_at_end || mag_ptr->mag_bytes_free_at_start)) {
7284				tiny_finalize_region(szone, mag_ptr);
7285			}
7286
7287			/* Because this region is currently in use, we can't safely madvise it while
7288			 * it's attached to the magazine. For this operation we have to remove it from
7289			 * the current mag, attach it to the depot and then madvise.
7290			 */
7291
7292			recirc_list_extract(szone, mag_ptr, REGION_TRAILER_FOR_TINY_REGION(tiny));
7293			int objects_in_use = tiny_free_detach_region(szone, mag_ptr, tiny);
7294
7295			SZONE_MAGAZINE_PTR_LOCK(szone, tiny_depot_ptr);
7296			MAGAZINE_INDEX_FOR_TINY_REGION(tiny) = DEPOT_MAGAZINE_INDEX;
7297			REGION_TRAILER_FOR_TINY_REGION(tiny)->pinned_to_depot = 0;
7298
7299			size_t bytes_inplay = tiny_free_reattach_region(szone, tiny_depot_ptr, tiny);
7300
7301			/* Fix up the metadata of the target magazine while the region is in the depot. */
7302			mag_ptr->mag_num_bytes_in_objects -= bytes_inplay;
7303			mag_ptr->num_bytes_in_magazine -= TINY_REGION_PAYLOAD_BYTES;
7304			mag_ptr->mag_num_objects -= objects_in_use;
7305
7306			/* Now we can drop the magazine lock of the source mag. */
7307			SZONE_MAGAZINE_PTR_UNLOCK(szone, mag_ptr);
7308
7309			tiny_depot_ptr->mag_num_bytes_in_objects += bytes_inplay;
7310			tiny_depot_ptr->num_bytes_in_magazine += TINY_REGION_PAYLOAD_BYTES;
7311			tiny_depot_ptr->mag_num_objects -= objects_in_use;
7312
7313			recirc_list_splice_last(szone, tiny_depot_ptr, REGION_TRAILER_FOR_TINY_REGION(tiny));
7314
7315			/* Actually do the scan, done holding the depot lock, the call will drop the lock
7316			 * around the actual madvise syscalls.
7317			 */
7318			tiny_free_scan_madvise_free(szone, tiny_depot_ptr, tiny);
7319
7320			/* Now the region is in the recirc depot, the next allocations to require more
7321			 * blocks will come along and take one of these regions back out of the depot.
7322			 * As OS X madvise's reuse on an per-region basis, we leave as many of these
7323			 * regions in the depot as possible after memory pressure.
7324			 */
7325			SZONE_MAGAZINE_PTR_UNLOCK(szone, tiny_depot_ptr);
7326		}
7327	}
7328
7329	for (mag_index = 0; mag_index < szone->num_small_magazines; mag_index++) {
7330		size_t index;
7331		for (index = 0; index < szone->small_region_generation->num_regions_allocated; ++index) {
7332			SZONE_LOCK(szone);
7333
7334			region_t small = szone->small_region_generation->hashed_regions[index];
7335			if (!small || small == HASHRING_REGION_DEALLOCATED) {
7336				SZONE_UNLOCK(szone);
7337				continue;
7338			}
7339
7340			magazine_t *mag_ptr = mag_lock_zine_for_region_trailer(szone, szone->small_magazines, REGION_TRAILER_FOR_SMALL_REGION(small), MAGAZINE_INDEX_FOR_SMALL_REGION(small));
7341			SZONE_UNLOCK(szone);
7342
7343			/* Ordering is important here, the magazine of a region may potentially change
7344			 * during mag_lock_zine_for_region_trailer, so src_mag_index must be taken
7345			 * after we've obtained the lock.
7346			 */
7347			mag_index_t src_mag_index = MAGAZINE_INDEX_FOR_SMALL_REGION(small);
7348
7349			/* We can (and must) ignore magazines that are already in the recirc depot. */
7350			if (src_mag_index == DEPOT_MAGAZINE_INDEX) {
7351				SZONE_MAGAZINE_PTR_UNLOCK(szone, mag_ptr);
7352				continue;
7353			}
7354
7355			if (small == mag_ptr->mag_last_region && (mag_ptr->mag_bytes_free_at_end || mag_ptr->mag_bytes_free_at_start)) {
7356				small_finalize_region(szone, mag_ptr);
7357			}
7358
7359			/* Because this region is currently in use, we can't safely madvise it while
7360			 * it's attached to the magazine. For this operation we have to remove it from
7361			 * the current mag, attach it to the depot and then madvise.
7362			 */
7363
7364			recirc_list_extract(szone, mag_ptr, REGION_TRAILER_FOR_SMALL_REGION(small));
7365			int objects_in_use = small_free_detach_region(szone, mag_ptr, small);
7366
7367			SZONE_MAGAZINE_PTR_LOCK(szone, small_depot_ptr);
7368			MAGAZINE_INDEX_FOR_SMALL_REGION(small) = DEPOT_MAGAZINE_INDEX;
7369			REGION_TRAILER_FOR_SMALL_REGION(small)->pinned_to_depot = 0;
7370
7371			size_t bytes_inplay = small_free_reattach_region(szone, small_depot_ptr, small);
7372
7373			/* Fix up the metadata of the target magazine while the region is in the depot. */
7374			mag_ptr->mag_num_bytes_in_objects -= bytes_inplay;
7375			mag_ptr->num_bytes_in_magazine -= SMALL_REGION_PAYLOAD_BYTES;
7376			mag_ptr->mag_num_objects -= objects_in_use;
7377
7378			/* Now we can drop the magazine lock of the source mag. */
7379			SZONE_MAGAZINE_PTR_UNLOCK(szone, mag_ptr);
7380
7381			small_depot_ptr->mag_num_bytes_in_objects += bytes_inplay;
7382			small_depot_ptr->num_bytes_in_magazine += SMALL_REGION_PAYLOAD_BYTES;
7383			small_depot_ptr->mag_num_objects -= objects_in_use;
7384
7385			recirc_list_splice_last(szone, small_depot_ptr, REGION_TRAILER_FOR_SMALL_REGION(small));
7386
7387			/* Actually do the scan, done holding the depot lock, the call will drop the lock
7388			 * around the actual madvise syscalls.
7389			 */
7390			small_free_scan_madvise_free(szone, small_depot_ptr, small);
7391
7392			/* Now the region is in the recirc depot, the next allocations to require more
7393			 * blocks will come along and take one of these regions back out of the depot.
7394			 * As OS X madvise's reuse on an per-region basis, we leave as many of these
7395			 * regions in the depot as possible after memory pressure.
7396			 */
7397			SZONE_MAGAZINE_PTR_UNLOCK(szone, small_depot_ptr);
7398		}
7399	}
7400#endif
7401
7402#if LARGE_CACHE
7403	if (szone->flotsam_enabled) {
7404		SZONE_LOCK(szone);
7405
7406		// stack allocated copy of the death-row cache
7407		int idx = szone->large_entry_cache_oldest, idx_max = szone->large_entry_cache_newest;
7408		large_entry_t local_entry_cache[LARGE_ENTRY_CACHE_SIZE];
7409
7410		memcpy((void *)local_entry_cache, (void *)szone->large_entry_cache, sizeof(local_entry_cache));
7411
7412		szone->large_entry_cache_oldest = szone->large_entry_cache_newest = 0;
7413		szone->large_entry_cache[0].address = 0x0;
7414		szone->large_entry_cache[0].size = 0;
7415		szone->large_entry_cache_bytes = 0;
7416		szone->large_entry_cache_reserve_bytes = 0;
7417
7418		szone->flotsam_enabled = FALSE;
7419
7420		SZONE_UNLOCK(szone);
7421
7422		// deallocate the death-row cache outside the zone lock
7423		size_t total = 0;
7424		while (idx != idx_max) {
7425			deallocate_pages(szone, (void *) local_entry_cache[idx].address, local_entry_cache[idx].size, 0);
7426			total += local_entry_cache[idx].size;
7427			if (++idx == LARGE_ENTRY_CACHE_SIZE) idx = 0;
7428		}
7429		if (0 != local_entry_cache[idx].address && 0 != local_entry_cache[idx].size) {
7430			deallocate_pages(szone, (void *) local_entry_cache[idx].address, local_entry_cache[idx].size, 0);
7431			total += local_entry_cache[idx].size;
7432		}
7433	}
7434#endif
7435
7436	MAGMALLOC_PRESSURERELIEF((void *)szone, goal, total); // DTrace USDT Probe
7437	return total;
7438}
7439
7440boolean_t
7441scalable_zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats, unsigned subzone)
7442{
7443	szone_t *szone = (szone_t *)zone;
7444
7445	switch (subzone) {
7446		case 0:
7447		{
7448			size_t	s = 0;
7449			unsigned	t = 0;
7450			size_t	u = 0;
7451			mag_index_t mag_index;
7452
7453			for (mag_index = -1; mag_index < szone->num_tiny_magazines; mag_index++) {
7454				s += szone->tiny_magazines[mag_index].mag_bytes_free_at_start;
7455				s += szone->tiny_magazines[mag_index].mag_bytes_free_at_end;
7456				t += szone->tiny_magazines[mag_index].mag_num_objects;
7457				u += szone->tiny_magazines[mag_index].mag_num_bytes_in_objects;
7458			}
7459
7460			stats->blocks_in_use = t;
7461			stats->size_in_use = u;
7462			stats->size_allocated = (szone->num_tiny_regions - szone->num_tiny_regions_dealloc) * TINY_REGION_SIZE;
7463			stats->max_size_in_use = stats->size_allocated - s;
7464			return 1;
7465		}
7466		case 1:
7467		{
7468			size_t	s = 0;
7469			unsigned	t = 0;
7470			size_t	u = 0;
7471			mag_index_t mag_index;
7472
7473			for (mag_index = -1; mag_index < szone->num_small_magazines; mag_index++) {
7474				s += szone->small_magazines[mag_index].mag_bytes_free_at_start;
7475				s += szone->small_magazines[mag_index].mag_bytes_free_at_end;
7476				t += szone->small_magazines[mag_index].mag_num_objects;
7477				u += szone->small_magazines[mag_index].mag_num_bytes_in_objects;
7478			}
7479
7480			stats->blocks_in_use = t;
7481			stats->size_in_use = u;
7482			stats->size_allocated = (szone->num_small_regions - szone->num_small_regions_dealloc) * SMALL_REGION_SIZE;
7483			stats->max_size_in_use = stats->size_allocated - s;
7484			return 1;
7485		}
7486		case 2:
7487			stats->blocks_in_use = szone->num_large_objects_in_use;
7488			stats->size_in_use = szone->num_bytes_in_large_objects;
7489			stats->max_size_in_use = stats->size_allocated = stats->size_in_use;
7490			return 1;
7491		case 3:
7492			stats->blocks_in_use = 0; // DEPRECATED szone->num_huge_entries;
7493			stats->size_in_use = 0; // DEPRECATED szone->num_bytes_in_huge_objects;
7494			stats->max_size_in_use = stats->size_allocated = 0;
7495			return 1;
7496	}
7497	return 0;
7498}
7499
7500static void
7501szone_statistics(szone_t *szone, malloc_statistics_t *stats)
7502{
7503	size_t	large;
7504
7505	size_t	s = 0;
7506	unsigned	t = 0;
7507	size_t	u = 0;
7508	mag_index_t mag_index;
7509
7510	for (mag_index = -1; mag_index < szone->num_tiny_magazines; mag_index++) {
7511		s += szone->tiny_magazines[mag_index].mag_bytes_free_at_start;
7512		s += szone->tiny_magazines[mag_index].mag_bytes_free_at_end;
7513		t += szone->tiny_magazines[mag_index].mag_num_objects;
7514		u += szone->tiny_magazines[mag_index].mag_num_bytes_in_objects;
7515	}
7516
7517	for (mag_index = -1; mag_index < szone->num_small_magazines; mag_index++) {
7518		s += szone->small_magazines[mag_index].mag_bytes_free_at_start;
7519		s += szone->small_magazines[mag_index].mag_bytes_free_at_end;
7520		t += szone->small_magazines[mag_index].mag_num_objects;
7521		u += szone->small_magazines[mag_index].mag_num_bytes_in_objects;
7522	}
7523
7524	large = szone->num_bytes_in_large_objects + 0; // DEPRECATED szone->num_bytes_in_huge_objects;
7525
7526	stats->blocks_in_use = t + szone->num_large_objects_in_use + 0; // DEPRECATED szone->num_huge_entries;
7527	stats->size_in_use = u + large;
7528	stats->max_size_in_use = stats->size_allocated =
7529	(szone->num_tiny_regions - szone->num_tiny_regions_dealloc) * TINY_REGION_SIZE +
7530	(szone->num_small_regions - szone->num_small_regions_dealloc) * SMALL_REGION_SIZE + large;
7531	// Now we account for the untouched areas
7532	stats->max_size_in_use -= s;
7533}
7534
7535static void *
7536legacy_zeroing_large_malloc(szone_t *szone, size_t size) {
7537	if (size > LARGE_THRESHOLD) // Leopard and earlier returned a ZFOD range, so ...
7538		return szone_calloc(szone, 1, size); // Clear to zero always, ham-handedly touching in each page
7539	else
7540		return szone_malloc(szone, size);
7541}
7542
7543static void *
7544legacy_zeroing_large_valloc(szone_t *szone, size_t size) {
7545	void *p = szone_valloc(szone, size);
7546
7547	// Leopard and earlier returned a ZFOD range, so ...
7548	memset(p, 0, size); // Clear to zero always, ham-handedly touching in each page
7549	return p;
7550}
7551
7552void zeroify_scalable_zone(malloc_zone_t *zone)
7553{
7554	szone_t	*szone = (szone_t *)zone;
7555
7556	if (szone) {
7557		mprotect(szone, sizeof(szone->basic_zone), PROT_READ | PROT_WRITE);
7558		szone->basic_zone.malloc = (void *)legacy_zeroing_large_malloc;
7559		szone->basic_zone.valloc = (void *)legacy_zeroing_large_valloc;
7560		mprotect(szone, sizeof(szone->basic_zone), PROT_READ);
7561	}
7562}
7563
7564static const struct malloc_introspection_t szone_introspect = {
7565	(void *)szone_ptr_in_use_enumerator,
7566	(void *)szone_good_size,
7567	(void *)szone_check,
7568	(void *)szone_print,
7569	szone_log,
7570	(void *)szone_force_lock,
7571	(void *)szone_force_unlock,
7572	(void *)szone_statistics,
7573	(void *)szone_locked,
7574	NULL, NULL, NULL, NULL, /* Zone enumeration version 7 and forward. */
7575}; // marked as const to spare the DATA section
7576
7577malloc_zone_t *
7578create_scalable_zone(size_t initial_size, unsigned debug_flags)
7579{
7580	szone_t	*szone;
7581	uint64_t	hw_memsize = 0;
7582
7583#if defined(__i386__) || defined(__x86_64__)
7584	if (_COMM_PAGE_VERSION_REQD > (*((uint16_t *)_COMM_PAGE_VERSION))) {
7585		malloc_printf("*** ERROR - comm page version mismatch.\n");
7586		exit(-1);
7587	}
7588#endif
7589
7590	/* get memory for the zone. */
7591	szone = allocate_pages(NULL, SZONE_PAGED_SIZE, 0, 0, VM_MEMORY_MALLOC);
7592	if (!szone)
7593		return NULL;
7594
7595	/* set up the szone structure */
7596#if 0
7597#warning CHECK_REGIONS enabled
7598	debug_flags |= CHECK_REGIONS;
7599#endif
7600#if 0
7601#warning LOG enabled
7602	szone->log_address = ~0;
7603#endif
7604	szone->trg[0].nextgen = &(szone->trg[1]);
7605	szone->trg[1].nextgen = &(szone->trg[0]);
7606	szone->tiny_region_generation = &(szone->trg[0]);
7607
7608	szone->tiny_region_generation->hashed_regions = szone->initial_tiny_regions;
7609	szone->tiny_region_generation->num_regions_allocated = INITIAL_NUM_REGIONS;
7610	szone->tiny_region_generation->num_regions_allocated_shift = INITIAL_NUM_REGIONS_SHIFT;
7611
7612	szone->srg[0].nextgen = &(szone->srg[1]);
7613	szone->srg[1].nextgen = &(szone->srg[0]);
7614	szone->small_region_generation = &(szone->srg[0]);
7615
7616	szone->small_region_generation->hashed_regions = szone->initial_small_regions;
7617	szone->small_region_generation->num_regions_allocated = INITIAL_NUM_REGIONS;
7618	szone->small_region_generation->num_regions_allocated_shift = INITIAL_NUM_REGIONS_SHIFT;
7619
7620
7621	/*
7622	 * Initialize variables that size the free list for SMALL allocations based
7623	 * upon the amount of memory in the system.  Switch to a larger number of
7624	 * free list entries at 1GB.
7625	 */
7626#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
7627	if ((hw_memsize = *(uint64_t *)(uintptr_t)_COMM_PAGE_MEMORY_SIZE) >= (1ULL << 30))
7628#else
7629		size_t	uint64_t_size = sizeof(hw_memsize);
7630
7631	if (0 == sysctlbyname("hw.memsize", &hw_memsize, &uint64_t_size, 0, 0) &&
7632		hw_memsize >= (1ULL << 30))
7633#endif
7634	{
7635		szone->is_largemem = 1;
7636		szone->num_small_slots = NUM_SMALL_SLOTS_LARGEMEM;
7637		szone->large_threshold = LARGE_THRESHOLD_LARGEMEM;
7638		szone->vm_copy_threshold = VM_COPY_THRESHOLD_LARGEMEM;
7639	} else {
7640		szone->is_largemem = 0;
7641		szone->num_small_slots = NUM_SMALL_SLOTS;
7642		szone->large_threshold = LARGE_THRESHOLD;
7643		szone->vm_copy_threshold = VM_COPY_THRESHOLD;
7644	}
7645#if LARGE_CACHE
7646	szone->large_entry_cache_reserve_limit =
7647	hw_memsize >> 10; // madvise(..., MADV_REUSABLE) death-row arrivals above this threshold [~0.1%]
7648
7649	/* <rdar://problem/6610904> Reset protection when returning a previous large allocation? */
7650	int32_t libSystemVersion  = NSVersionOfLinkTimeLibrary("System");
7651	if ((-1 != libSystemVersion) && ((libSystemVersion >> 16) < 112) /* CFSystemVersionSnowLeopard */)
7652		szone->large_legacy_reset_mprotect = TRUE;
7653	else
7654		szone->large_legacy_reset_mprotect = FALSE;
7655#endif
7656
7657	// Prepare ASLR
7658#if __i386__ || __x86_64__ || __arm64__ || TARGET_OS_EMBEDDED
7659#if __i386__
7660	uintptr_t stackbase = 0x8fe00000;
7661	int entropic_bits = 3;
7662#elif __x86_64__
7663	uintptr_t stackbase = USRSTACK64;
7664	int entropic_bits = 16;
7665#elif __arm64__
7666	uintptr_t stackbase = USRSTACK64;
7667	int entropic_bits = 7;
7668#else
7669	uintptr_t stackbase = USRSTACK;
7670	int entropic_bits = 3;
7671#endif
7672
7673	// assert(((1 << entropic_bits) - 1) << SMALL_BLOCKS_ALIGN < (stackbase - MAXSSIZ - ENTROPIC_KABILLION));
7674
7675	if (0 != _dyld_get_image_slide((const struct mach_header*)_NSGetMachExecuteHeader())) {
7676		if (0 == entropic_address) {
7677			uintptr_t t = stackbase - MAXSSIZ - ((uintptr_t) (malloc_entropy[1] & ((1 << entropic_bits) - 1)) << SMALL_BLOCKS_ALIGN);
7678			(void)__sync_bool_compare_and_swap(&entropic_limit, 0, t); // Just one initialization please
7679			(void)__sync_bool_compare_and_swap(&entropic_address, 0, t - ENTROPIC_KABILLION); // Just one initialization please
7680		}
7681		debug_flags &= ~DISABLE_ASLR;
7682	} else {
7683		// zero slide when ASLR has been disabled by boot-arg. Eliminate cloaking.
7684		malloc_entropy[0] = 0;
7685		malloc_entropy[1] = 0;
7686		debug_flags |= DISABLE_ASLR;
7687	}
7688
7689#else
7690	malloc_entropy[0] = 0;
7691	malloc_entropy[1] = 0;
7692	debug_flags |= DISABLE_ASLR;
7693#endif
7694
7695	// Initialize the security token.
7696	szone->cookie = (uintptr_t)malloc_entropy[0];
7697
7698	szone->basic_zone.version = 8;
7699	szone->basic_zone.size = (void *)szone_size;
7700	szone->basic_zone.malloc = (void *)szone_malloc;
7701	szone->basic_zone.calloc = (void *)szone_calloc;
7702	szone->basic_zone.valloc = (void *)szone_valloc;
7703	szone->basic_zone.free = (void *)szone_free;
7704	szone->basic_zone.realloc = (void *)szone_realloc;
7705	szone->basic_zone.destroy = (void *)szone_destroy;
7706	szone->basic_zone.batch_malloc = (void *)szone_batch_malloc;
7707	szone->basic_zone.batch_free = (void *)szone_batch_free;
7708	szone->basic_zone.introspect = (struct malloc_introspection_t *)&szone_introspect;
7709	szone->basic_zone.memalign = (void *)szone_memalign;
7710	szone->basic_zone.free_definite_size = (void *)szone_free_definite_size;
7711	szone->basic_zone.pressure_relief = (void *)szone_pressure_relief;
7712
7713	szone->basic_zone.reserved1 = 0; /* Set to zero once and for all as required by CFAllocator. */
7714	szone->basic_zone.reserved2 = 0; /* Set to zero once and for all as required by CFAllocator. */
7715	mprotect(szone, sizeof(szone->basic_zone), PROT_READ); /* Prevent overwriting the function pointers in basic_zone. */
7716
7717	szone->debug_flags = debug_flags;
7718	_malloc_lock_init(&szone->large_szone_lock);
7719
7720#if defined(__ppc__) || defined(__ppc64__)
7721	/*
7722	 * In the interest of compatibility for PPC applications executing via Rosetta,
7723	 * arrange to zero-fill allocations as occurred by side effect in Leopard and earlier.
7724	 */
7725	zeroify_scalable_zone((malloc_zone_t *)szone);
7726#endif
7727
7728	szone->cpu_id_key = -1UL; // Unused.
7729
7730	// Query the number of configured processors.
7731	// Uniprocessor case gets just one tiny and one small magazine (whose index is zero). This gives
7732	// the same behavior as the original scalable malloc. MP gets per-CPU magazines
7733	// that scale (way) better.
7734#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
7735	int nproc = *(uint8_t *)(uintptr_t)_COMM_PAGE_NCPUS;
7736#else
7737	int nproc = sysconf(_SC_NPROCESSORS_CONF);
7738#endif
7739	szone->num_tiny_magazines = (nproc > 1) ? MIN(nproc, TINY_MAX_MAGAZINES) : 1;
7740
7741	// FIXME vm_allocate() based on number of configured CPUs
7742	magazine_t *tiny_magazines = allocate_pages(NULL, TINY_MAGAZINE_PAGED_SIZE, 0,
7743												SCALABLE_MALLOC_ADD_GUARD_PAGES, VM_MEMORY_MALLOC);
7744	if (NULL == tiny_magazines)
7745		return NULL;
7746
7747	szone->tiny_magazines = &(tiny_magazines[1]); // szone->tiny_magazines[-1] is the Depot
7748
7749	// The magazines are indexed in [0 .. (num_tiny_magazines - 1)]
7750	// Find the smallest power of 2 that exceeds (num_tiny_magazines - 1)
7751	szone->num_tiny_magazines_mask_shift = 0;
7752	int i = 1;
7753	while( i <= (szone->num_tiny_magazines - 1) ) {
7754		szone->num_tiny_magazines_mask_shift++;
7755		i <<= 1;
7756	}
7757
7758	// Now if i <= TINY_MAX_MAGAZINES we'll never access tiny_magazines[] out of bounds.
7759	if (i > TINY_MAX_MAGAZINES) {
7760		malloc_printf("*** FATAL ERROR - magazine mask exceeds allocated magazines.\n");
7761		exit(-1);
7762	}
7763
7764	// Reduce i by 1 to obtain a mask covering [0 .. (num_tiny_magazines - 1)]
7765	szone->num_tiny_magazines_mask = i - 1; // A mask used for hashing to a magazine index (and a safety aid)
7766	szone->last_tiny_advise = 0;
7767
7768	// Init the tiny_magazine locks
7769	_malloc_lock_init(&szone->tiny_regions_lock);
7770	_malloc_lock_init(&szone->tiny_magazines[DEPOT_MAGAZINE_INDEX].magazine_lock);
7771	for (i = 0; i < szone->num_tiny_magazines; ++i) {
7772		_malloc_lock_init(&szone->tiny_magazines[i].magazine_lock);
7773	}
7774
7775	szone->num_small_magazines = (nproc > 1) ? MIN(nproc, SMALL_MAX_MAGAZINES) : 1;
7776
7777	// FIXME vm_allocate() based on number of configured CPUs
7778	magazine_t *small_magazines = allocate_pages(NULL, SMALL_MAGAZINE_PAGED_SIZE, 0,
7779												 SCALABLE_MALLOC_ADD_GUARD_PAGES, VM_MEMORY_MALLOC);
7780	if (NULL == small_magazines)
7781		return NULL;
7782
7783	szone->small_magazines = &(small_magazines[1]); // szone->small_magazines[-1] is the Depot
7784
7785	// The magazines are indexed in [0 .. (num_small_magazines - 1)]
7786	// Find the smallest power of 2 that exceeds (num_small_magazines - 1)
7787	szone->num_small_magazines_mask_shift = 0;
7788	while( i <= (szone->num_small_magazines - 1) ) {
7789		szone->num_small_magazines_mask_shift++;
7790		i <<= 1;
7791	}
7792
7793	// Now if i <= SMALL_MAX_MAGAZINES we'll never access small_magazines[] out of bounds.
7794	if (i > SMALL_MAX_MAGAZINES) {
7795		malloc_printf("*** FATAL ERROR - magazine mask exceeds allocated magazines.\n");
7796		exit(-1);
7797	}
7798
7799	// Reduce i by 1 to obtain a mask covering [0 .. (num_small_magazines - 1)]
7800	szone->num_small_magazines_mask = i - 1; // A mask used for hashing to a magazine index (and a safety aid)
7801	szone->last_small_advise = 0;
7802
7803	// Init the small_magazine locks
7804	_malloc_lock_init(&szone->small_regions_lock);
7805	_malloc_lock_init(&szone->small_magazines[DEPOT_MAGAZINE_INDEX].magazine_lock);
7806	for (i = 0; i < szone->num_small_magazines; ++i) {
7807		_malloc_lock_init(&szone->small_magazines[i].magazine_lock);
7808	}
7809
7810	CHECK(szone, __PRETTY_FUNCTION__);
7811	return (malloc_zone_t *)szone;
7812}
7813
7814//
7815// purgeable zones have their own "large" allocation pool, but share "tiny" and "large"
7816// heaps with a helper_zone identified in the call to create_purgeable_zone()
7817//
7818static size_t
7819purgeable_size(szone_t *szone, const void *ptr)
7820{
7821	// Only claim our large allocations, leave the shared tiny/small for the helper zone to claim.
7822	return szone_size_try_large(szone, ptr);
7823}
7824
7825static void *
7826purgeable_malloc(szone_t *szone, size_t size) {
7827	if (size <= szone->large_threshold)
7828		return szone_malloc(szone->helper_zone, size);
7829	else
7830		return szone_malloc(szone, size);
7831}
7832
7833static void *
7834purgeable_calloc(szone_t *szone, size_t num_items, size_t size)
7835{
7836	size_t total_bytes = num_items * size;
7837
7838	// Check for overflow of integer multiplication
7839	if (num_items > 1) {
7840#if __LP64__ /* size_t is uint64_t */
7841		if ((num_items | size) & 0xffffffff00000000ul) {
7842			// num_items or size equals or exceeds sqrt(2^64) == 2^32, appeal to wider arithmetic
7843			__uint128_t product = ((__uint128_t)num_items) * ((__uint128_t)size);
7844			if ((uint64_t)(product >> 64)) // compiles to test on upper register of register pair
7845				return NULL;
7846		}
7847#else /* size_t is uint32_t */
7848		if ((num_items | size) & 0xffff0000ul) {
7849			// num_items or size equals or exceeds sqrt(2^32) == 2^16, appeal to wider arithmetic
7850			uint64_t product = ((uint64_t)num_items) * ((uint64_t)size);
7851			if ((uint32_t)(product >> 32)) // compiles to test on upper register of register pair
7852				return NULL;
7853		}
7854#endif
7855	}
7856
7857	if (total_bytes <= szone->large_threshold)
7858		return szone_calloc(szone->helper_zone, 1, total_bytes);
7859	else
7860		return szone_calloc(szone, 1, total_bytes);
7861}
7862
7863static void *
7864purgeable_valloc(szone_t *szone, size_t size)
7865{
7866	if (size <= szone->large_threshold)
7867		return szone_valloc(szone->helper_zone, size);
7868	else
7869		return szone_valloc(szone, size);
7870}
7871
7872static void
7873purgeable_free(szone_t *szone, void *ptr)
7874{
7875	large_entry_t	*entry;
7876
7877	SZONE_LOCK(szone);
7878	entry = large_entry_for_pointer_no_lock(szone, ptr);
7879	SZONE_UNLOCK(szone);
7880	if (entry) {
7881		return free_large(szone, ptr);
7882	} else {
7883		return szone_free(szone->helper_zone, ptr);
7884	}
7885}
7886
7887static void
7888purgeable_free_definite_size(szone_t *szone, void *ptr, size_t size)
7889{
7890	if (size <= szone->large_threshold)
7891		return szone_free_definite_size(szone->helper_zone, ptr, size);
7892	else
7893		return szone_free_definite_size(szone, ptr, size);
7894}
7895
7896static void *
7897purgeable_realloc(szone_t *szone, void *ptr, size_t new_size)
7898{
7899	size_t old_size;
7900
7901	if (NULL == ptr) {
7902		// If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size.
7903		return purgeable_malloc(szone, new_size);
7904	} else if (0 == new_size) {
7905		// If size is 0 and ptr is not a null pointer, the object pointed to is freed.
7906		purgeable_free(szone, ptr);
7907		// If size is 0, either a null pointer or a unique pointer that can be successfully passed
7908		// to free() shall be returned.
7909		return purgeable_malloc(szone, 1);
7910	}
7911
7912	old_size = purgeable_size(szone, ptr); // Now ptr can be safely size()'d
7913	if (!old_size)
7914		old_size = szone_size(szone->helper_zone, ptr);
7915
7916	if (!old_size) {
7917		szone_error(szone, 1, "pointer being reallocated was not allocated", ptr, NULL);
7918		return NULL;
7919	}
7920
7921	// Distinguish 4 cases: {oldsize, newsize} x { <= , > large_threshold }
7922	// and deal with the allocation crossing from the purgeable zone to the helper zone and vice versa.
7923	if (old_size <= szone->large_threshold) {
7924		if (new_size <= szone->large_threshold)
7925			return szone_realloc(szone->helper_zone, ptr, new_size);
7926		else {
7927			// allocation crosses from helper to purgeable zone
7928			void * new_ptr = purgeable_malloc(szone, new_size);
7929			if (new_ptr) {
7930				memcpy(new_ptr, ptr, old_size);
7931				szone_free_definite_size(szone->helper_zone, ptr, old_size);
7932			}
7933			return new_ptr; // in state VM_PURGABLE_NONVOLATILE
7934		}
7935	} else {
7936		if (new_size <= szone->large_threshold) {
7937			// allocation crosses from purgeable to helper zone
7938			void * new_ptr = szone_malloc(szone->helper_zone, new_size);
7939			if (new_ptr) {
7940				memcpy(new_ptr, ptr, new_size);
7941				purgeable_free_definite_size(szone, ptr, old_size);
7942			}
7943			return new_ptr;
7944		} else {
7945			void * new_ptr = purgeable_malloc(szone, new_size);
7946			if (new_ptr) {
7947				memcpy(new_ptr, ptr, MIN(old_size, new_size));
7948				purgeable_free_definite_size(szone, ptr, old_size);
7949			}
7950			return new_ptr; // in state VM_PURGABLE_NONVOLATILE
7951		}
7952	}
7953	/* NOTREACHED */
7954}
7955
7956static void
7957purgeable_destroy(szone_t *szone)
7958{
7959	/* destroy large entries */
7960	size_t index = szone->num_large_entries;
7961	large_entry_t	*large;
7962	vm_range_t		range_to_deallocate;
7963
7964	while (index--) {
7965		large = szone->large_entries + index;
7966		if (large->address) {
7967			// we deallocate_pages, including guard pages
7968			deallocate_pages(szone, (void *)(large->address), large->size, szone->debug_flags);
7969		}
7970	}
7971	large_entries_free_no_lock(szone, szone->large_entries, szone->num_large_entries, &range_to_deallocate);
7972	if (range_to_deallocate.size)
7973		deallocate_pages(szone, (void *)range_to_deallocate.address, (size_t)range_to_deallocate.size, 0);
7974
7975	/* Now destroy the separate szone region */
7976	deallocate_pages(szone, (void *)szone, SZONE_PAGED_SIZE, 0);
7977}
7978
7979static unsigned
7980purgeable_batch_malloc(szone_t *szone, size_t size, void **results, unsigned count)
7981{
7982	return szone_batch_malloc(szone->helper_zone, size, results, count);
7983}
7984
7985static void
7986purgeable_batch_free(szone_t *szone, void **to_be_freed, unsigned count)
7987{
7988	return szone_batch_free(szone->helper_zone, to_be_freed, count);
7989}
7990
7991static void *
7992purgeable_memalign(szone_t *szone, size_t alignment, size_t size)
7993{
7994	if (size <= szone->large_threshold)
7995		return szone_memalign(szone->helper_zone, alignment, size);
7996	else
7997		return szone_memalign(szone, alignment, size);
7998}
7999
8000static kern_return_t
8001purgeable_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t zone_address,
8002								memory_reader_t reader, vm_range_recorder_t recorder)
8003{
8004	szone_t		*szone;
8005	kern_return_t	err;
8006
8007	if (!reader) reader = _szone_default_reader;
8008
8009	err = reader(task, zone_address, sizeof(szone_t), (void **)&szone);
8010	if (err) return err;
8011
8012	err = large_in_use_enumerator(task, context, type_mask,
8013								  (vm_address_t)szone->large_entries, szone->num_large_entries, reader, recorder);
8014	return err;
8015}
8016
8017static size_t
8018purgeable_good_size(szone_t *szone, size_t size)
8019{
8020	if (size <= szone->large_threshold)
8021		return szone_good_size(szone->helper_zone, size);
8022	else
8023		return szone_good_size(szone, size);
8024}
8025
8026static boolean_t
8027purgeable_check(szone_t *szone)
8028{
8029	return 1;
8030}
8031
8032static void
8033purgeable_print(szone_t *szone, boolean_t verbose)
8034{
8035	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
8036				   "Scalable zone %p: inUse=%u(%y) flags=%d\n",
8037				   szone, szone->num_large_objects_in_use, szone->num_bytes_in_large_objects, szone->debug_flags);
8038}
8039
8040static void
8041purgeable_log(malloc_zone_t *zone, void *log_address)
8042{
8043	szone_t	*szone = (szone_t *)zone;
8044
8045	szone->log_address = log_address;
8046}
8047
8048static void
8049purgeable_force_lock(szone_t *szone)
8050{
8051	SZONE_LOCK(szone);
8052}
8053
8054static void
8055purgeable_force_unlock(szone_t *szone)
8056{
8057	SZONE_UNLOCK(szone);
8058}
8059
8060static void
8061purgeable_statistics(szone_t *szone, malloc_statistics_t *stats)
8062{
8063	stats->blocks_in_use = szone->num_large_objects_in_use;
8064	stats->size_in_use = stats->max_size_in_use = stats->size_allocated = szone->num_bytes_in_large_objects;
8065}
8066
8067static boolean_t
8068purgeable_locked(szone_t *szone)
8069{
8070	int tookLock;
8071
8072	tookLock = SZONE_TRY_LOCK(szone);
8073	if (tookLock == 0)
8074		return 1;
8075	SZONE_UNLOCK(szone);
8076	return 0;
8077}
8078
8079static size_t
8080purgeable_pressure_relief(szone_t *szone, size_t goal)
8081{
8082	return szone_pressure_relief(szone, goal) + szone_pressure_relief(szone->helper_zone, goal);
8083}
8084
8085static const struct malloc_introspection_t purgeable_introspect = {
8086	(void *)purgeable_ptr_in_use_enumerator,
8087	(void *)purgeable_good_size,
8088	(void *)purgeable_check,
8089	(void *)purgeable_print,
8090	purgeable_log,
8091	(void *)purgeable_force_lock,
8092	(void *)purgeable_force_unlock,
8093	(void *)purgeable_statistics,
8094	(void *)purgeable_locked,
8095	NULL, NULL, NULL, NULL, /* Zone enumeration version 7 and forward. */
8096}; // marked as const to spare the DATA section
8097
8098__attribute__((visibility("hidden")))
8099malloc_zone_t *
8100create_purgeable_zone(size_t initial_size, malloc_zone_t *malloc_default_zone, unsigned debug_flags)
8101{
8102	szone_t	*szone;
8103	uint64_t	hw_memsize = 0;
8104
8105	/* get memory for the zone. */
8106	szone = allocate_pages(NULL, SZONE_PAGED_SIZE, 0, 0, VM_MEMORY_MALLOC);
8107	if (!szone)
8108		return NULL;
8109
8110	/* set up the szone structure */
8111#if 0
8112#warning LOG enabled
8113	szone->log_address = ~0;
8114#endif
8115
8116#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
8117	hw_memsize = *(uint64_t *)(uintptr_t)_COMM_PAGE_MEMORY_SIZE;
8118#else
8119	size_t	uint64_t_size = sizeof(hw_memsize);
8120
8121	sysctlbyname("hw.memsize", &hw_memsize, &uint64_t_size, 0, 0);
8122#endif
8123
8124	szone->trg[0].nextgen = &(szone->trg[1]);
8125	szone->trg[1].nextgen = &(szone->trg[0]);
8126	szone->tiny_region_generation = &(szone->trg[0]);
8127
8128	szone->tiny_region_generation->hashed_regions = szone->initial_tiny_regions;
8129	szone->tiny_region_generation->num_regions_allocated = INITIAL_NUM_REGIONS;
8130	szone->tiny_region_generation->num_regions_allocated_shift = INITIAL_NUM_REGIONS_SHIFT;
8131
8132	szone->srg[0].nextgen = &(szone->srg[1]);
8133	szone->srg[1].nextgen = &(szone->srg[0]);
8134	szone->small_region_generation = &(szone->srg[0]);
8135
8136	szone->small_region_generation->hashed_regions = szone->initial_small_regions;
8137	szone->small_region_generation->num_regions_allocated = INITIAL_NUM_REGIONS;
8138	szone->small_region_generation->num_regions_allocated_shift = INITIAL_NUM_REGIONS_SHIFT;
8139
8140	/* Purgeable zone does not participate in the adaptive "largemem" sizing. */
8141	szone->is_largemem = 0;
8142	szone->large_threshold = LARGE_THRESHOLD;
8143	szone->vm_copy_threshold = VM_COPY_THRESHOLD;
8144
8145#if LARGE_CACHE
8146	szone->large_entry_cache_reserve_limit =
8147	hw_memsize >> 10; // madvise(..., MADV_REUSABLE) death-row arrivals above this threshold [~0.1%]
8148
8149	/* <rdar://problem/6610904> Reset protection when returning a previous large allocation? */
8150	int32_t libSystemVersion  = NSVersionOfLinkTimeLibrary("System");
8151	if ((-1 != libSystemVersion) && ((libSystemVersion >> 16) < 112) /* CFSystemVersionSnowLeopard */)
8152		szone->large_legacy_reset_mprotect = TRUE;
8153	else
8154		szone->large_legacy_reset_mprotect = FALSE;
8155#endif
8156
8157	szone->basic_zone.version = 8;
8158	szone->basic_zone.size = (void *)purgeable_size;
8159	szone->basic_zone.malloc = (void *)purgeable_malloc;
8160	szone->basic_zone.calloc = (void *)purgeable_calloc;
8161	szone->basic_zone.valloc = (void *)purgeable_valloc;
8162	szone->basic_zone.free = (void *)purgeable_free;
8163	szone->basic_zone.realloc = (void *)purgeable_realloc;
8164	szone->basic_zone.destroy = (void *)purgeable_destroy;
8165	szone->basic_zone.batch_malloc = (void *)purgeable_batch_malloc;
8166	szone->basic_zone.batch_free = (void *)purgeable_batch_free;
8167	szone->basic_zone.introspect = (struct malloc_introspection_t *)&purgeable_introspect;
8168	szone->basic_zone.memalign = (void *)purgeable_memalign;
8169	szone->basic_zone.free_definite_size = (void *)purgeable_free_definite_size;
8170	szone->basic_zone.pressure_relief = (void *)purgeable_pressure_relief;
8171
8172	szone->basic_zone.reserved1 = 0; /* Set to zero once and for all as required by CFAllocator. */
8173	szone->basic_zone.reserved2 = 0; /* Set to zero once and for all as required by CFAllocator. */
8174	mprotect(szone, sizeof(szone->basic_zone), PROT_READ); /* Prevent overwriting the function pointers in basic_zone. */
8175
8176	szone->debug_flags = debug_flags | SCALABLE_MALLOC_PURGEABLE;
8177
8178	/* Purgeable zone does not support SCALABLE_MALLOC_ADD_GUARD_PAGES. */
8179	if (szone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) {
8180		_malloc_printf(ASL_LEVEL_INFO, "purgeable zone does not support guard pages\n");
8181		szone->debug_flags &= ~SCALABLE_MALLOC_ADD_GUARD_PAGES;
8182	}
8183
8184	_malloc_lock_init(&szone->large_szone_lock);
8185
8186	szone->helper_zone = (struct szone_s *)malloc_default_zone;
8187
8188	CHECK(szone, __PRETTY_FUNCTION__);
8189	return (malloc_zone_t *)szone;
8190}
8191
8192/*
8193 * For use by CheckFix: create a new zone whose behavior is, apart from
8194 * the use of death-row and per-CPU magazines, that of Leopard.
8195 */
8196static NOINLINE void *
8197legacy_valloc(szone_t *szone, size_t size)
8198{
8199	void	*ptr;
8200	size_t num_kernel_pages;
8201
8202	num_kernel_pages = round_page_quanta(size) >> vm_page_quanta_shift;
8203	ptr = large_malloc(szone, num_kernel_pages, 0, TRUE);
8204#if DEBUG_MALLOC
8205	if (LOG(szone, ptr))
8206		malloc_printf("legacy_valloc returned %p\n", ptr);
8207#endif
8208	return ptr;
8209}
8210
8211__attribute__((visibility("hidden")))
8212malloc_zone_t *
8213create_legacy_scalable_zone(size_t initial_size, unsigned debug_flags)
8214{
8215	malloc_zone_t *mzone = create_scalable_zone(initial_size, debug_flags);
8216	szone_t	*szone = (szone_t *)mzone;
8217
8218	if (!szone)
8219		return NULL;
8220
8221	szone->is_largemem = 0;
8222	szone->num_small_slots = NUM_SMALL_SLOTS;
8223	szone->large_threshold = LARGE_THRESHOLD;
8224	szone->vm_copy_threshold = VM_COPY_THRESHOLD;
8225
8226	mprotect(szone, sizeof(szone->basic_zone), PROT_READ | PROT_WRITE);
8227	szone->basic_zone.valloc = (void *)legacy_valloc;
8228	szone->basic_zone.free_definite_size = NULL;
8229	mprotect(szone, sizeof(szone->basic_zone), PROT_READ);
8230
8231	return mzone;
8232}
8233
8234/********* Support code for emacs unexec ************/
8235
8236/* History of freezedry version numbers:
8237 *
8238 * 1) Old malloc (before the scalable malloc implementation in this file
8239 *    existed).
8240 * 2) Original freezedrying code for scalable malloc.  This code was apparently
8241 *    based on the old freezedrying code and was fundamentally flawed in its
8242 *    assumption that tracking allocated memory regions was adequate to fake
8243 *    operations on freezedried memory.  This doesn't work, since scalable
8244 *    malloc does not store flags in front of large page-aligned allocations.
8245 * 3) Original szone-based freezedrying code.
8246 * 4) Fresher malloc with tiny zone
8247 * 5) 32/64bit compatible malloc
8248 * 6) Metadata within 1MB and 8MB region for tiny and small
8249 *
8250 * No version backward compatibility is provided, but the version number does
8251 * make it possible for malloc_jumpstart() to return an error if the application
8252 * was freezedried with an older version of malloc.
8253 */
8254#define MALLOC_FREEZEDRY_VERSION 6
8255
8256typedef struct {
8257	unsigned	version;
8258	unsigned	nszones;
8259	szone_t	*szones;
8260} malloc_frozen;
8261
8262static void *
8263frozen_malloc(szone_t *zone, size_t new_size)
8264{
8265	return malloc(new_size);
8266}
8267
8268static void *
8269frozen_calloc(szone_t *zone, size_t num_items, size_t size)
8270{
8271	return calloc(num_items, size);
8272}
8273
8274static void *
8275frozen_valloc(szone_t *zone, size_t new_size)
8276{
8277	return valloc(new_size);
8278}
8279
8280static void *
8281frozen_realloc(szone_t *zone, void *ptr, size_t new_size)
8282{
8283	size_t	old_size = szone_size(zone, ptr);
8284	void	*new_ptr;
8285
8286	if (new_size <= old_size) {
8287		return ptr;
8288	}
8289	new_ptr = malloc(new_size);
8290	if (old_size > 0) {
8291		memcpy(new_ptr, ptr, old_size);
8292	}
8293	return new_ptr;
8294}
8295
8296static void
8297frozen_free(szone_t *zone, void *ptr)
8298{
8299}
8300
8301static void
8302frozen_destroy(szone_t *zone)
8303{
8304}
8305
8306/********* Pseudo-private API for emacs unexec ************/
8307
8308/*
8309 * malloc_freezedry() records all of the szones in use, so that they can be
8310 * partially reconstituted by malloc_jumpstart().  Due to the differences
8311 * between reconstituted memory regions and those created by the szone code,
8312 * care is taken not to reallocate from the freezedried memory, except in the
8313 * case of a non-growing realloc().
8314 *
8315 * Due to the flexibility provided by the zone registration mechanism, it is
8316 * impossible to implement generic freezedrying for any zone type.  This code
8317 * only handles applications that use the szone allocator, so malloc_freezedry()
8318 * returns 0 (error) if any non-szone zones are encountered.
8319 */
8320
8321uintptr_t
8322malloc_freezedry(void)
8323{
8324	extern unsigned malloc_num_zones;
8325	extern malloc_zone_t **malloc_zones;
8326	malloc_frozen *data;
8327	unsigned i;
8328
8329	/* Allocate space in which to store the freezedry state. */
8330	data = (malloc_frozen *) malloc(sizeof(malloc_frozen));
8331
8332	/* Set freezedry version number so that malloc_jumpstart() can check for compatibility. */
8333	data->version = MALLOC_FREEZEDRY_VERSION;
8334
8335	/* Allocate the array of szone pointers. */
8336	data->nszones = malloc_num_zones;
8337	data->szones = (szone_t *) calloc(malloc_num_zones, sizeof(szone_t));
8338
8339	/*
8340	 * Fill in the array of szone structures.  They are copied rather than
8341	 * referenced, since the originals are likely to be clobbered during malloc
8342	 * initialization.
8343	 */
8344	for (i = 0; i < malloc_num_zones; i++) {
8345		if (strcmp(malloc_zones[i]->zone_name, "DefaultMallocZone")) {
8346			/* Unknown zone type. */
8347			free(data->szones);
8348			free(data);
8349			return 0;
8350		}
8351		memcpy(&data->szones[i], malloc_zones[i], sizeof(szone_t));
8352	}
8353
8354	return((uintptr_t)data);
8355}
8356
8357int
8358malloc_jumpstart(uintptr_t cookie)
8359{
8360	malloc_frozen *data = (malloc_frozen *)cookie;
8361	unsigned i;
8362
8363	if (data->version != MALLOC_FREEZEDRY_VERSION) {
8364		/* Unsupported freezedry version. */
8365		return 1;
8366	}
8367
8368	for (i = 0; i < data->nszones; i++) {
8369		/* Set function pointers.  Even the functions that stay the same must be
8370		 * set, since there are no guarantees that they will be mapped to the
8371		 * same addresses. */
8372		data->szones[i].basic_zone.size = (void *) szone_size;
8373		data->szones[i].basic_zone.malloc = (void *) frozen_malloc;
8374		data->szones[i].basic_zone.calloc = (void *) frozen_calloc;
8375		data->szones[i].basic_zone.valloc = (void *) frozen_valloc;
8376		data->szones[i].basic_zone.free = (void *) frozen_free;
8377		data->szones[i].basic_zone.realloc = (void *) frozen_realloc;
8378		data->szones[i].basic_zone.destroy = (void *) frozen_destroy;
8379		data->szones[i].basic_zone.introspect = (struct malloc_introspection_t *)&szone_introspect;
8380
8381		/* Register the freezedried zone. */
8382		malloc_zone_register(&data->szones[i].basic_zone);
8383	}
8384
8385	return 0;
8386}
8387
8388/* vim: set noet:ts=4:sw=4:cindent: */
8389