1/*
2 * Copyright (c) 1999, 2000, 2003, 2005, 2008, 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24#ifdef __LP64__ /* nano_malloc for 64bit ABI */
25#define NDEBUG 1
26#define NANO_FREE_DEQUEUE_DILIGENCE 1 /* Check for corrupt free list */
27
28#include <_simple.h>
29#include <assert.h>
30#include <stddef.h>
31#include <stdint.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <limits.h>
35#include <errno.h>
36#include <TargetConditionals.h>
37
38#include <sys/types.h>
39#include <sys/mman.h>
40#include <sys/param.h>
41
42#include <mach/mach.h>
43#include <mach/mach_vm.h>
44
45#include <libkern/OSAtomic.h>
46#include <mach-o/dyld_priv.h>	/* for _dyld_get_image_slide() */
47#include <crt_externs.h>	/* for _NSGetMachExecuteHeader() */
48
49#include <os/tsd.h>
50
51#if defined(__x86_64__)
52#define __APPLE_API_PRIVATE
53#include <machine/cpu_capabilities.h>
54#define _COMM_PAGE_VERSION_REQD 9
55#undef __APPLE_API_PRIVATE
56#else
57Unknown Architecture
58#endif
59
60#include "scalable_malloc.h"
61#include "malloc_internal.h"
62#include "malloc_printf.h"
63
64#include <CrashReporterClient.h>
65
66#include "bitarray.h"
67
68#ifndef VM_MEMORY_MALLOC_NANO /* Until osfmk/mach/vm_statistics.h is updated in xnu */
69#define VM_MEMORY_MALLOC_NANO 11
70#endif
71
72extern uint64_t malloc_entropy[2];
73/*********************	DEFINITIONS	************************/
74
75#define INLINE	__inline__
76#define ALWAYSINLINE __attribute__((always_inline))
77#define NOINLINE __attribute__((noinline))
78
79#if defined(__x86_64__)
80#define CACHE_LINE	64
81#define CACHE_ALIGN __attribute__ ((aligned (64) ))
82#else
83#define CACHE_ALIGN /* TBD for other platforms */
84#endif
85
86#define NANO_MAG_INDEX(nz)		(_os_cpu_number() >> nz->hyper_shift)
87
88#define SCRIBBLE_BYTE			0xaa /* allocated scribble */
89#define SCRABBLE_BYTE			0x55 /* free()'d scribble */
90#define SCRUBBLE_BYTE			0xdd /* madvise(..., MADV_FREE) scriblle */
91
92#define MAX_RECORDER_BUFFER		256
93
94/*************          nanozone address field layout        ******************/
95
96#if defined(__x86_64)
97#define NANO_SIGNATURE_BITS		20
98#define NANOZONE_SIGNATURE		0x00006ULL			// 0x00006nnnnnnnnnnn the address range devoted to us.
99#define NANO_MAG_BITS			5
100#define NANO_BAND_BITS			18
101#define NANO_SLOT_BITS			4
102#define NANO_OFFSET_BITS		17
103
104#else
105#error Unknown Architecture
106#endif
107
108#if defined(__BIG_ENDIAN__)
109struct nano_blk_addr_s {
110	uint64_t
111nano_signature:NANO_SIGNATURE_BITS,	// 0x00006nnnnnnnnnnn the address range devoted to us.
112nano_mag_index:NANO_MAG_BITS,		// the core that allocated this block
113nano_band:NANO_BAND_BITS,
114nano_slot:NANO_SLOT_BITS,		// bucket of homogenous quanta-multiple blocks
115nano_offset:NANO_OFFSET_BITS;		// locates the block
116};
117#else
118// least significant bits declared first
119struct nano_blk_addr_s {
120	uint64_t
121nano_offset:NANO_OFFSET_BITS,		// locates the block
122nano_slot:NANO_SLOT_BITS,		// bucket of homogenous quanta-multiple blocks
123nano_band:NANO_BAND_BITS,
124nano_mag_index:NANO_MAG_BITS,		// the core that allocated this block
125nano_signature:NANO_SIGNATURE_BITS;	// 0x00006nnnnnnnnnnn the address range devoted to us.
126};
127#endif
128
129typedef union  {
130	uint64_t			addr;
131	struct nano_blk_addr_s	fields;
132} nano_blk_addr_t;
133
134/* Are we using the nano allocator? Set by the initializer. */
135__attribute__((visibility("hidden")))
136boolean_t _malloc_engaged_nano;
137
138#define NANO_MAX_SIZE			256 /* Buckets sized {16, 32, 48, 64, 80, 96, 112, ...} */
139#define SHIFT_NANO_QUANTUM		4
140#define NANO_REGIME_QUANTA_SIZE		(1 << SHIFT_NANO_QUANTUM)	// 16
141#define NANO_QUANTA_MASK		0xFULL				// NANO_REGIME_QUANTA_SIZE - 1
142
143#define SLOT_IN_BAND_SIZE 	(1 << NANO_OFFSET_BITS)
144#define SLOT_KEY_LIMIT 		(1 << NANO_SLOT_BITS) /* Must track nano_slot width */
145#define BAND_SIZE 		(1 << (NANO_SLOT_BITS + NANO_OFFSET_BITS)) /*  == Number of bytes covered by a page table entry */
146#define NANO_MAG_SIZE 		(1 << NANO_MAG_BITS)
147#define NANO_SLOT_SIZE 		(1 << NANO_SLOT_BITS)
148
149/****************************** zone itself ***********************************/
150
151/*
152 * Note that objects whose adddress are held in pointers here must be pursued
153 * individually in the nano_in_use_enumeration() routines.
154 */
155
156typedef struct chained_block_s {
157	uintptr_t			double_free_guard;
158	struct chained_block_s	*next;
159} *chained_block_t;
160
161typedef struct nano_meta_s {
162	OSQueueHead			slot_LIFO CACHE_ALIGN;
163    unsigned int		slot_madvised_log_page_count;
164	volatile uintptr_t		slot_current_base_addr;
165	volatile uintptr_t		slot_limit_addr;
166	volatile size_t		slot_objects_mapped;
167	volatile size_t		slot_objects_skipped;
168	bitarray_t			slot_madvised_pages;
169    volatile uintptr_t		slot_bump_addr CACHE_ALIGN; // position on cache line distinct from that of slot_LIFO
170    volatile boolean_t		slot_exhausted;
171	unsigned int		slot_bytes;
172	unsigned int		slot_objects;
173} *nano_meta_admin_t;
174
175typedef struct nanozone_s {				// vm_allocate()'d, so page-aligned to begin with.
176	malloc_zone_t		basic_zone;		// first page will be given read-only protection
177	uint8_t			pad[PAGE_MAX_SIZE - sizeof(malloc_zone_t)];
178
179	// remainder of structure is R/W (contains no function pointers)
180	// page-aligned
181	struct nano_meta_s		meta_data[NANO_MAG_SIZE][NANO_SLOT_SIZE]; // max: NANO_MAG_SIZE cores x NANO_SLOT_SIZE slots for nano blocks {16 .. 256}
182	_malloc_lock_s			band_resupply_lock[NANO_MAG_SIZE];
183    uintptr_t           band_max_mapped_baseaddr[NANO_MAG_SIZE];
184	size_t			core_mapped_size[NANO_MAG_SIZE];
185
186	unsigned			debug_flags;
187	unsigned			our_signature;
188	unsigned			phys_ncpus;
189	unsigned			logical_ncpus;
190	unsigned			hyper_shift;
191
192	/* security cookie */
193	uintptr_t			cookie;
194
195	/*
196	 * The nano zone constructed by create_nano_zone() would like to hand off tiny, small, and large
197	 * allocations to the default scalable zone. Record the latter as the "helper" zone here.
198	 */
199	malloc_zone_t		*helper_zone;
200} nanozone_t;
201
202#define SZONE_PAGED_SIZE	((sizeof(nanozone_t) + vm_page_size - 1) & ~ (vm_page_size - 1))
203
204/*********************             PROTOTYPES		***********************/
205extern void malloc_error_break(void);
206
207// msg prints after fmt, ...
208static NOINLINE void	nanozone_error(nanozone_t *nanozone, int is_corruption, const char *msg, const void *ptr, const char *fmt, ...)
209__printflike(5, 6);
210
211static void nano_statistics(nanozone_t *nanozone, malloc_statistics_t *stats);
212
213/*********************	   VERY LOW LEVEL UTILITIES    ************************/
214// msg prints after fmt, ...
215
216static NOINLINE void
217nanozone_error(nanozone_t *nanozone, int is_corruption, const char *msg, const void *ptr, const char *fmt, ...)
218{
219	va_list ap;
220	_SIMPLE_STRING b = _simple_salloc();
221
222	if (b) {
223		if (fmt) {
224			va_start(ap, fmt);
225			_simple_vsprintf(b, fmt, ap);
226			va_end(ap);
227		}
228		if (ptr) {
229			_simple_sprintf(b, "*** error for object %p: %s\n", ptr, msg);
230		} else {
231			_simple_sprintf(b, "*** error: %s\n", msg);
232		}
233		malloc_printf("%s*** set a breakpoint in malloc_error_break to debug\n", _simple_string(b));
234	} else {
235		/*
236		 * Should only get here if vm_allocate() can't get a single page of
237		 * memory, implying _simple_asl_log() would also fail.  So we just
238		 * print to the file descriptor.
239		 */
240		if (fmt) {
241			va_start(ap, fmt);
242			_malloc_vprintf(MALLOC_PRINTF_NOLOG, fmt, ap);
243			va_end(ap);
244		}
245		if (ptr) {
246			_malloc_printf(MALLOC_PRINTF_NOLOG, "*** error for object %p: %s\n", ptr, msg);
247		} else {
248			_malloc_printf(MALLOC_PRINTF_NOLOG, "*** error: %s\n", msg);
249		}
250		_malloc_printf(MALLOC_PRINTF_NOLOG, "*** set a breakpoint in malloc_error_break to debug\n");
251	}
252	malloc_error_break();
253
254	// Call abort() if this is a memory corruption error and the abort on
255	// corruption flag is set, or if any error should abort.
256	if ((is_corruption && (nanozone->debug_flags & SCALABLE_MALLOC_ABORT_ON_CORRUPTION)) ||
257		(nanozone->debug_flags & SCALABLE_MALLOC_ABORT_ON_ERROR)) {
258		CRSetCrashLogMessage(b ? _simple_string(b) : msg);
259		abort();
260	} else if (b) {
261		_simple_sfree(b);
262	}
263}
264
265static void
266protect(void *address, size_t size, unsigned protection, unsigned debug_flags)
267{
268	kern_return_t	err;
269
270	if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_PRELUDE)) {
271		err = mprotect((void *)((uintptr_t)address - vm_page_size), vm_page_size, protection);
272		if (err) {
273			malloc_printf("*** can't protect(%p) region for prelude guard page at %p\n",
274						  protection,(uintptr_t)address - (1 << vm_page_shift));
275		}
276	}
277	if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_POSTLUDE)) {
278		err = mprotect((void *)((uintptr_t)address + size), vm_page_size, protection);
279		if (err) {
280			malloc_printf("*** can't protect(%p) region for postlude guard page at %p\n",
281						  protection, (uintptr_t)address + size);
282		}
283	}
284}
285
286static void *
287allocate_based_pages(nanozone_t *nanozone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label, void *base_addr)
288{
289	boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES;
290	mach_vm_address_t vm_addr;
291	uintptr_t addr;
292	mach_vm_size_t allocation_size = round_page(size);
293	mach_vm_offset_t allocation_mask = ((mach_vm_offset_t)1 << align) - 1;
294	int alloc_flags = VM_FLAGS_ANYWHERE | VM_MAKE_TAG(vm_page_label);
295	kern_return_t kr;
296
297	if (!allocation_size) allocation_size = vm_page_size;
298	if (add_guard_pages) allocation_size += 2 * vm_page_size;
299	if (allocation_size < size) // size_t arithmetic wrapped!
300		return NULL;
301
302	vm_addr = round_page((mach_vm_address_t)base_addr);
303	if (!vm_addr) vm_addr = vm_page_size;
304	kr = mach_vm_map(mach_task_self(), &vm_addr, allocation_size,
305			allocation_mask, alloc_flags, MEMORY_OBJECT_NULL, 0, FALSE,
306			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
307	if (kr) {
308		nanozone_error(nanozone, 0, "can't allocate pages", NULL,
309				"*** mach_vm_map(size=%lu) failed (error code=%d)\n",
310					   size, kr);
311		return NULL;
312	}
313	addr = (uintptr_t)vm_addr;
314
315	if (add_guard_pages) {
316		addr += vm_page_size;
317		protect((void *)addr, size, PROT_NONE, debug_flags);
318	}
319	return (void *)addr;
320}
321
322static void *
323allocate_pages(nanozone_t *nanozone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label)
324{
325	return allocate_based_pages(nanozone, size, align, debug_flags, vm_page_label, 0);
326}
327
328static void
329deallocate_pages(nanozone_t *nanozone, void *addr, size_t size, unsigned debug_flags)
330{
331	boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES;
332	mach_vm_address_t vm_addr = (mach_vm_address_t)addr;
333	mach_vm_size_t allocation_size = size;
334	kern_return_t kr;
335
336	if (add_guard_pages) {
337		vm_addr -= vm_page_size;
338		allocation_size += 2 * vm_page_size;
339	}
340	kr = mach_vm_deallocate(mach_task_self(), vm_addr, allocation_size);
341	if (kr && nanozone)
342		nanozone_error(nanozone, 0, "Can't deallocate_pages at", addr, NULL);
343}
344
345/*
346 * We maintain separate free lists for each (quantized) size. The literature
347 * calls this the "segregated policy".
348 */
349
350static boolean_t
351segregated_band_grow(nanozone_t *nanozone, nano_meta_admin_t pMeta, unsigned int slot_bytes, unsigned int mag_index)
352{
353	nano_blk_addr_t u; // the compiler holds this in a register
354	uintptr_t p, s;
355	size_t watermark, hiwater;
356
357	if (0 == pMeta->slot_current_base_addr) { // First encounter?
358
359		u.fields.nano_signature = NANOZONE_SIGNATURE;
360		u.fields.nano_mag_index = mag_index;
361		u.fields.nano_band = 0;
362		u.fields.nano_slot = (slot_bytes >> SHIFT_NANO_QUANTUM) - 1;
363		u.fields.nano_offset = 0;
364
365		p = u.addr;
366		pMeta->slot_bytes = slot_bytes;
367		pMeta->slot_objects = SLOT_IN_BAND_SIZE / slot_bytes;
368	} else {
369		p = pMeta->slot_current_base_addr + BAND_SIZE; // Growing, so stride ahead by BAND_SIZE
370
371		u.addr = (uint64_t)p;
372		if (0 == u.fields.nano_band) // Did the band index wrap?
373			return FALSE;
374
375		assert(slot_bytes == pMeta->slot_bytes);
376	}
377	pMeta->slot_current_base_addr = p;
378
379	mach_vm_address_t vm_addr = p & ~((uintptr_t)(BAND_SIZE - 1)); // Address of the (2MB) band covering this (128KB) slot
380
381    if (nanozone->band_max_mapped_baseaddr[mag_index] < vm_addr) {
382        // Obtain the next band to cover this slot
383        kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, BAND_SIZE,
384                0, VM_MAKE_TAG(VM_MEMORY_MALLOC_NANO), MEMORY_OBJECT_NULL, 0, FALSE,
385                VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
386
387        void *q = (uintptr_t)vm_addr;
388        if (kr || q != (void *)(p & ~((uintptr_t)(BAND_SIZE - 1)))) // Must get exactly what we asked for
389            return FALSE;
390
391        nanozone->band_max_mapped_baseaddr[mag_index] = vm_addr;
392    }
393
394	// Randomize the starting allocation from this slot (introduces 11 to 14 bits of entropy)
395	if (0 == pMeta->slot_objects_mapped) { // First encounter?
396		pMeta->slot_objects_skipped = (malloc_entropy[1] % (SLOT_IN_BAND_SIZE / slot_bytes));
397		pMeta->slot_bump_addr = p + (pMeta->slot_objects_skipped * slot_bytes);
398	} else {
399		pMeta->slot_bump_addr = p;
400	}
401
402	pMeta->slot_limit_addr = p + (SLOT_IN_BAND_SIZE / slot_bytes) * slot_bytes;
403	pMeta->slot_objects_mapped += (SLOT_IN_BAND_SIZE / slot_bytes);
404
405	u.fields.nano_signature = NANOZONE_SIGNATURE;
406	u.fields.nano_mag_index = mag_index;
407	u.fields.nano_band = 0;
408	u.fields.nano_slot = 0;
409	u.fields.nano_offset = 0;
410	s = u.addr; // Base for this core.
411
412	// Set the high water mark for this CPU's entire magazine, if this resupply raised it.
413    watermark = nanozone->core_mapped_size[mag_index];
414    hiwater = MAX( watermark, p - s + SLOT_IN_BAND_SIZE );
415    nanozone->core_mapped_size[mag_index] = hiwater;
416
417	return TRUE;
418}
419
420static inline unsigned long
421divrem(unsigned long a, unsigned int b, unsigned int *remainder)
422{
423	// Encapsulating the modulo and division in an in-lined function convinces the compiler
424	// to issue just a single divide instruction to obtain quotient and remainder. Go figure.
425	*remainder = a % b;
426	return a / b;
427}
428
429static INLINE void *
430segregated_next_block(nanozone_t *nanozone, nano_meta_admin_t pMeta, unsigned int slot_bytes, unsigned int mag_index)
431{
432	while (1) {
433		uintptr_t theLimit = pMeta->slot_limit_addr; // Capture the slot limit that bounds slot_bump_addr right now
434		uintptr_t b = OSAtomicAdd64Barrier(slot_bytes, (volatile int64_t *)&(pMeta->slot_bump_addr));
435		b -= slot_bytes; // Atomic op returned addr of *next* free block. Subtract to get addr for *this* allocation.
436
437		if (b < theLimit) { // Did we stay within the bound of the present slot allocation?
438			return (void *)b; // Yep, so the slot_bump_addr this thread incremented is good to go
439		} else {
440			if (pMeta->slot_exhausted) { // exhausted all the bands availble for this slot?
441				return 0; // We're toast
442			} else {
443				// One thread will grow the heap, others will see its been grown and retry allocation
444				_malloc_lock_lock(&nanozone->band_resupply_lock[mag_index]);
445				// re-check state now that we've taken the lock
446				if (pMeta->slot_exhausted) {
447					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
448					return 0; // Toast
449				} else if (b < pMeta->slot_limit_addr) {
450					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
451					continue; // ... the slot was successfully grown by first-taker (not us). Now try again.
452				} else if (segregated_band_grow(nanozone, pMeta, slot_bytes, mag_index)) {
453					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
454					continue; // ... the slot has been successfully grown by us. Now try again.
455				} else {
456					pMeta->slot_exhausted = TRUE;
457					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
458					return 0;
459				}
460			}
461		}
462	}
463}
464
465static INLINE unsigned int
466segregated_size_to_fit(nanozone_t *nanozone, size_t size, unsigned int *pKey)
467{
468	unsigned int k, slot_bytes;
469
470	if (0 == size)
471		size = NANO_REGIME_QUANTA_SIZE; // Historical behavior
472
473	k = (size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM; // round up and shift for number of quanta
474	slot_bytes = k << SHIFT_NANO_QUANTUM; // multiply by power of two quanta size
475	*pKey = k - 1; // Zero-based!
476
477	return slot_bytes;
478}
479
480static INLINE index_t
481offset_to_index(nanozone_t *nanozone, nano_meta_admin_t pMeta, uintptr_t offset)
482{
483	unsigned int slot_bytes = pMeta->slot_bytes;
484	unsigned int slot_objects = pMeta->slot_objects; // SLOT_IN_BAND_SIZE / slot_bytes;
485    unsigned int rem;
486    unsigned long quo = divrem(offset, BAND_SIZE, &rem);
487
488	assert(0 == rem%slot_bytes);
489    return (quo * slot_objects) + (rem / slot_bytes);
490}
491
492static INLINE uintptr_t
493index_to_offset(nanozone_t *nanozone, nano_meta_admin_t pMeta, index_t i)
494{
495	unsigned int slot_bytes = pMeta->slot_bytes;
496	unsigned int slot_objects = pMeta->slot_objects; // SLOT_IN_BAND_SIZE / slot_bytes;
497    unsigned int rem;
498    unsigned long quo = divrem(i, slot_objects, &rem);
499
500	return (quo * BAND_SIZE) + (rem * slot_bytes);
501}
502
503static kern_return_t
504segregated_in_use_enumerator(task_t task, void *context, unsigned type_mask, nanozone_t *nanozone,
505							 memory_reader_t reader, vm_range_recorder_t recorder)
506{
507	unsigned int	mag_index, slot_key;
508	vm_range_t		ptr_range;
509	vm_range_t		buffer[MAX_RECORDER_BUFFER];
510	kern_return_t	err;
511	unsigned		count = 0;
512
513	for (mag_index = 0; mag_index < nanozone->phys_ncpus; mag_index++) {
514		uintptr_t	clone_magazine; // magazine base for ourselves
515		nano_blk_addr_t	p; // slot base for remote
516		uintptr_t	clone_slot_base; // slot base for ourselves (tracks with "p")
517
518		// Establish p as base address for slot 0 in remote
519		p.fields.nano_signature = NANOZONE_SIGNATURE;
520		p.fields.nano_mag_index = mag_index;
521		p.fields.nano_band = 0;
522		p.fields.nano_slot = 0;
523		p.fields.nano_offset = 0;
524
525		if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
526			mach_vm_address_t vm_addr;
527			mach_vm_size_t alloc_size = nanozone->core_mapped_size[mag_index];
528			int alloc_flags = VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_MEMORY_MALLOC);
529
530			vm_addr = vm_page_size;
531			kern_return_t kr = mach_vm_allocate(mach_task_self(), &vm_addr, alloc_size, alloc_flags);
532			if (kr) {
533				return kr;
534			}
535			clone_magazine = (uintptr_t)vm_addr;
536			clone_slot_base = clone_magazine; // base for slot 0 in this local magazine
537		} else {
538			clone_slot_base = clone_magazine = 0; // and won't be used in this loop
539		}
540
541		for (slot_key = 0; slot_key < SLOT_KEY_LIMIT;
542			 p.addr += SLOT_IN_BAND_SIZE,	    // Advance to next slot base for remote
543			 clone_slot_base += SLOT_IN_BAND_SIZE,   // Advance to next slot base for ourselves
544			 slot_key++) {
545			nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]);
546			size_t slot_objects_mapped = pMeta->slot_objects_mapped; // capture this volatile count
547
548			if (0 == slot_objects_mapped) // Nothing allocated in this magazine for this slot?
549				continue;
550
551			if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) {
552				/* do NOTHING as there is no distinct admin region */
553			}
554
555			if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) {
556				nano_blk_addr_t q = p;
557				uintptr_t skip_adj = index_to_offset(nanozone, pMeta, pMeta->slot_objects_skipped);
558
559				while (q.addr < pMeta->slot_limit_addr) {
560					ptr_range.address = q.addr + skip_adj;
561					ptr_range.size = SLOT_IN_BAND_SIZE - skip_adj;
562					skip_adj = 0;
563					recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1);
564					q.addr += BAND_SIZE;
565				}
566			}
567
568			if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) {
569				nano_blk_addr_t q = p;
570				uintptr_t slot_band, clone_slot_band_base = clone_slot_base;
571				uintptr_t skip_adj = index_to_offset(nanozone, pMeta, pMeta->slot_objects_skipped);
572
573				while (q.addr < pMeta->slot_limit_addr) {
574					// read slot in each remote band. Lands in some random location.
575					size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE);
576					err = reader(task, (vm_address_t)(q.addr + skip_adj), len - skip_adj, (void **)&slot_band);
577					if (err)
578						return err;
579
580					// Place the data just read in the correct position relative to the local magazine.
581					memcpy((void *)(clone_slot_band_base + skip_adj), (void *)slot_band, len - skip_adj);
582
583					// Simultaneously advance pointers in remote and ourselves to the next band.
584					q.addr += BAND_SIZE;
585					clone_slot_band_base += BAND_SIZE;
586					skip_adj = 0;
587				}
588
589				// Walk the slot free list and populate a bitarray_t
590				int log_size = 64 - __builtin_clzl(slot_objects_mapped);
591				bitarray_t slot_bitarray = bitarray_create(log_size);
592
593				if (!slot_bitarray)
594					return errno;
595
596				chained_block_t t;
597				unsigned stoploss = slot_objects_mapped;
598				while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next) + (clone_slot_base - p.addr)))) {
599					if (0 == stoploss) {
600						malloc_printf("Free list walk in segregated_in_use_enumerator exceeded object count.");
601						break;
602					}
603					stoploss--;
604
605					uintptr_t	offset = ((uintptr_t)t - p.addr); // offset from beginning of slot, task-independent
606					index_t 	block_index = offset_to_index(nanozone, pMeta, offset);
607
608					if (block_index < slot_objects_mapped)
609						bitarray_set(slot_bitarray, log_size, block_index);
610				}
611				// N.B. pMeta->slot_LIFO in *this* task is now drained (remote free list has *not* been disturbed)
612
613				// Copy the bitarray_t denoting madvise()'d pages (if any) into *this* task's address space
614				bitarray_t madv_page_bitarray;
615				int log_page_count;
616
617				if (pMeta->slot_madvised_pages) {
618					log_page_count = pMeta->slot_madvised_log_page_count;
619					err = reader(task, (vm_address_t)(pMeta->slot_madvised_pages), bitarray_size(log_page_count), (void **)&madv_page_bitarray);
620					if (err)
621						return err;
622				} else {
623					madv_page_bitarray = NULL;
624					log_page_count = 0;
625				}
626
627				// Enumerate all the block indices issued to date, and report those not on the free list
628				index_t i;
629				for (i = pMeta->slot_objects_skipped; i < slot_objects_mapped; ++i)  {
630					uintptr_t block_offset = index_to_offset(nanozone, pMeta, i);
631					if (p.addr + block_offset >= pMeta->slot_bump_addr)
632						break;
633
634					// blocks falling on madvise()'d pages are free! So not enumerated.
635					if (madv_page_bitarray) {
636						nano_blk_addr_t q;
637						index_t pgnum, pgnum_end;
638
639						q.addr = p.addr + block_offset;
640						pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
641						q.addr += pMeta->slot_bytes - 1;
642						pgnum_end = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
643
644						if (pgnum < (1 << log_page_count)) {// bounds check for bitarray_get()'s that follow
645							if (bitarray_get(madv_page_bitarray, log_page_count, pgnum) ||
646								bitarray_get(madv_page_bitarray, log_page_count, pgnum_end)) {
647								continue;
648							}
649						}
650					}
651
652					if (!bitarray_get(slot_bitarray, log_size, i)) {
653						buffer[count].address = p.addr + block_offset;
654						buffer[count].size = (slot_key + 1) << SHIFT_NANO_QUANTUM;
655						count++;
656						if (count >= MAX_RECORDER_BUFFER) {
657							recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
658							count = 0;
659						}
660					}
661				}
662				if (count) {
663					recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count);
664					count = 0;
665				}
666
667				free(slot_bitarray);
668			}
669		}
670		if (clone_magazine) {
671			mach_vm_address_t vm_addr = clone_magazine;
672			mach_vm_size_t alloc_size = nanozone->core_mapped_size[mag_index];
673			mach_vm_deallocate(mach_task_self(), vm_addr, alloc_size);
674		}
675	}
676	return 0;
677}
678
679/******************           nanozone methods           **********************/
680/*
681 * These methods are called with "ptr" known to possess the nano signature (from
682 * which we can additionally infer "ptr" is not NULL), and with "size" bounded to
683 * the extent of the nano allocation regime -- (0, 256].
684 */
685
686static INLINE size_t
687__nano_vet_and_size(nanozone_t *nanozone, const void *ptr)
688{
689	// Extracts the size of the block in bytes. Checks for a plausible ptr.
690	nano_blk_addr_t	p; // the compiler holds this in a register
691	nano_meta_admin_t	pMeta;
692
693	p.addr = (uint64_t)ptr; // Begin the dissection of ptr
694
695	if (nanozone->phys_ncpus <= p.fields.nano_mag_index)
696		return 0;
697
698	if (p.fields.nano_offset & NANO_QUANTA_MASK) // stray low-order bits?
699		return 0;
700
701	pMeta = &(nanozone->meta_data[p.fields.nano_mag_index][p.fields.nano_slot]);
702	if ((void *)(pMeta->slot_bump_addr) <= ptr)
703		return 0; // Beyond what's ever been allocated!
704
705	if ((p.fields.nano_offset % pMeta->slot_bytes) != 0)
706		return 0; // Not an exact multiple of the block size for this slot
707
708	return pMeta->slot_bytes;
709}
710
711static INLINE size_t
712_nano_vet_and_size_of_live(nanozone_t *nanozone, const void *ptr)
713{
714    size_t size = __nano_vet_and_size(nanozone, ptr);
715    if (size && ((((chained_block_t)ptr)->double_free_guard ^ nanozone->cookie) != 0xBADDC0DEDEADBEADULL))
716        return size; // Common case: not on a free list, hence live. Return its size.
717    else
718        // ptr is either on a free list (its got the correct canary) in which case return zero, OR
719        // the caller has stored the canary value in the double_free_guard slot entirely by coincidence
720        // and the block is a live allocation. The latter is very unlikely (1 in 2^64) so just return 0.
721        return 0;
722}
723
724static INLINE size_t
725_nano_vet_and_size_of_free(nanozone_t *nanozone, const void *ptr)
726{
727    size_t size = __nano_vet_and_size(nanozone, ptr);
728    if (size && ((((chained_block_t)ptr)->double_free_guard ^ nanozone->cookie) == 0xBADDC0DEDEADBEADULL))
729        return size;
730    else
731        return 0;
732}
733
734static void *
735_nano_malloc_check_clear(nanozone_t *nanozone, size_t size, boolean_t cleared_requested)
736{
737	void		*ptr;
738	unsigned int	slot_key;
739	unsigned int	slot_bytes = segregated_size_to_fit(nanozone, size, &slot_key); // Note slot_key is set here
740	unsigned int	mag_index = NANO_MAG_INDEX(nanozone);
741
742	nano_meta_admin_t	pMeta = &(nanozone->meta_data[mag_index][slot_key]);
743
744	ptr = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next));
745	if (ptr) {
746#if NANO_FREE_DEQUEUE_DILIGENCE
747		size_t		gotSize;
748		nano_blk_addr_t	p; // the compiler holds this in a register
749
750		p.addr = (uint64_t)ptr; // Begin the dissection of ptr
751		if (nanozone->our_signature != p.fields.nano_signature) {
752			nanozone_error(nanozone, 1,
753						   "Invalid signature for pointer dequeued from free list", ptr, NULL);
754		}
755
756		if (mag_index != p.fields.nano_mag_index) {
757			nanozone_error(nanozone, 1,
758						   "Mismatched magazine for pointer dequeued from free list", ptr, NULL);
759		}
760
761		gotSize = _nano_vet_and_size_of_free(nanozone, ptr);
762		if (0 == gotSize) {
763			nanozone_error(nanozone, 1,
764						   "Invalid pointer dequeued from free list", ptr, NULL);
765		}
766		if (gotSize != slot_bytes) {
767			nanozone_error(nanozone, 1,
768						   "Mismatched size for pointer dequeued from free list", ptr, NULL);
769		}
770
771		if ((((chained_block_t)ptr)->double_free_guard ^ nanozone->cookie) != 0xBADDC0DEDEADBEADULL) {
772			nanozone_error(nanozone, 1,
773						   "Heap corruption detected, free list canary is damaged", ptr, NULL);
774		}
775#if defined(DEBUG)
776		void *next = (void *) (((chained_block_t)ptr)->next);
777		if (next) {
778			p.addr = (uint64_t)next; // Begin the dissection of next
779			if (nanozone->our_signature != p.fields.nano_signature) {
780				nanozone_error(nanozone, 1,
781							   "Invalid next signature for pointer dequeued from free list (showing ptr, next)",
782							   ptr, ", %p", next);
783			}
784
785			if (mag_index != p.fields.nano_mag_index) {
786				nanozone_error(nanozone, 1,
787							   "Mismatched next magazine for pointer dequeued from free list (showing ptr, next)",
788							   ptr, ", %p", next);
789			}
790
791			gotSize = _nano_vet_and_size_of_free(nanozone, next);
792			if (0 == gotSize) {
793				nanozone_error(nanozone, 1,
794							   "Invalid next for pointer dequeued from free list (showing ptr, next)",
795							   ptr, ", %p", next);
796			}
797			if (gotSize != slot_bytes) {
798				nanozone_error(nanozone, 1,
799							   "Mismatched next size for pointer dequeued from free list (showing ptr, next)",
800							   ptr, ", %p", next);
801			}
802		}
803#endif /* DEBUG */
804#endif /* NANO_FREE_DEQUEUE_DILIGENCE */
805
806		((chained_block_t)ptr)->double_free_guard = 0;
807		((chained_block_t)ptr)->next = NULL; // clear out next pointer to protect free list
808	} else {
809		ptr = segregated_next_block(nanozone, pMeta, slot_bytes, mag_index);
810	}
811
812	if (cleared_requested && ptr)
813		memset(ptr, 0, slot_bytes); // TODO: Needs a memory barrier after memset to ensure zeroes land first?
814
815	return ptr;
816}
817
818static void *
819_nano_malloc_check_scribble(nanozone_t *nanozone, size_t size)
820{
821	void *ptr = _nano_malloc_check_clear(nanozone, size, 0);
822
823	/*
824	 * Scribble on allocated memory when requested.
825	 */
826	if ((nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && ptr && size)
827		memset(ptr, SCRIBBLE_BYTE, _nano_vet_and_size_of_live(nanozone, ptr));
828
829	return ptr;
830}
831
832static INLINE boolean_t
833_nano_block_inuse_p(nanozone_t *nanozone, const void *ptr)
834{
835	nano_blk_addr_t p; // happily, the compiler holds this in a register
836	nano_meta_admin_t pMeta;
837	chained_block_t head = NULL, tail = NULL, t;
838	boolean_t inuse = TRUE;
839
840	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
841
842	pMeta = &(nanozone->meta_data[p.fields.nano_mag_index][p.fields.nano_slot]);
843
844	if ((void *)(pMeta->slot_bump_addr) <= ptr)
845		return FALSE; // Beyond what's ever been allocated, so trivially not in use.
846
847	// pop elements off the free list all the while looking for ptr.
848	unsigned stoploss = pMeta->slot_objects_mapped;
849	while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) {
850		if (0 == stoploss) {
851			nanozone_error(nanozone, 1, "Free list walk in _nano_block_inuse_p exceeded object count.",
852						   (void *)&(pMeta->slot_LIFO), NULL);
853		}
854		stoploss--;
855
856		if (NULL == head)
857			head = t;
858		else
859			tail->next = t;
860		tail = t;
861
862		if (ptr == t) {
863			inuse = FALSE;
864			break;
865		}
866	}
867	if (tail)
868		tail->next = NULL;
869
870	// push the free list extracted above back onto the LIFO, all at once
871	if (head)
872		OSAtomicEnqueue( &(pMeta->slot_LIFO), head, (uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next));
873
874	return inuse;
875}
876
877static INLINE size_t
878_nano_size(nanozone_t *nanozone, const void *ptr)
879{
880	return  _nano_vet_and_size_of_live(nanozone, ptr);
881}
882
883static INLINE size_t
884_nano_good_size(nanozone_t *nanozone, size_t size)
885{
886	return (size <= NANO_REGIME_QUANTA_SIZE) ?
887	NANO_REGIME_QUANTA_SIZE :
888	(((size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM) << SHIFT_NANO_QUANTUM);
889}
890
891static INLINE void _nano_free_trusted_size_check_scribble(nanozone_t *nanozone, void *ptr, size_t trusted_size, boolean_t do_scribble) ALWAYSINLINE;
892
893static INLINE void
894_nano_free_trusted_size_check_scribble(nanozone_t *nanozone, void *ptr, size_t trusted_size, boolean_t do_scribble)
895{
896	if (trusted_size) {
897		nano_blk_addr_t p; // happily, the compiler holds this in a register
898		nano_meta_admin_t pMeta;
899
900		if (do_scribble)
901			(void)memset(ptr, SCRABBLE_BYTE, trusted_size);
902		((chained_block_t)ptr)->double_free_guard = (0xBADDC0DEDEADBEADULL ^ nanozone->cookie);
903
904		p.addr = (uint64_t)ptr; // place ptr on the dissecting table
905		pMeta = &(nanozone->meta_data[p.fields.nano_mag_index][p.fields.nano_slot]);
906		OSAtomicEnqueue( &(pMeta->slot_LIFO), ptr, offsetof(struct chained_block_s,next));
907	} else {
908		nanozone_error(nanozone, 1, "Freeing unallocated pointer", ptr, NULL);
909	}
910}
911
912static INLINE void _nano_free_check_scribble(nanozone_t *nanozone, void *ptr, boolean_t do_scribble) ALWAYSINLINE;
913
914static INLINE void
915_nano_free_check_scribble(nanozone_t *nanozone, void *ptr, boolean_t do_scribble)
916{
917    _nano_free_trusted_size_check_scribble(nanozone, ptr, _nano_vet_and_size_of_live(nanozone, ptr), do_scribble);
918}
919
920static INLINE void *
921_nano_realloc(nanozone_t *nanozone, void *ptr, size_t new_size)
922{
923	size_t	old_size, new_good_size, valid_size;
924	void	*new_ptr;
925
926	if (FALSE && NULL == ptr) { // ptr has our_signature so can't be NULL, but if it were Posix sez ...
927		// If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size.
928		return _nano_malloc_check_scribble(nanozone, new_size);
929	} else if (0 == new_size) {
930		// If size is 0 and ptr is not a null pointer, the object pointed to is freed.
931		_nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE));
932		// If size is 0, either a null pointer or a unique pointer that can be successfully passed
933		// to free() shall be returned.
934		return _nano_malloc_check_scribble(nanozone, 1);
935	}
936
937	old_size = _nano_vet_and_size_of_live(nanozone, ptr);
938	if (!old_size) {
939		nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL);
940		return NULL;
941	}
942
943	new_good_size = _nano_good_size(nanozone, new_size);
944	if (new_good_size > old_size) {
945		/* Must grow. FALL THROUGH to alloc/copy/free. */
946	} else if (new_good_size <= (old_size >> 1)) {
947		/* Serious shrinkage (more than half). FALL THROUGH to alloc/copy/free. */
948	} else {
949		/* Let's hang on to what we got. */
950		if (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
951			memset(ptr + new_size, SCRIBBLE_BYTE, old_size - new_size);
952		return ptr;
953	}
954
955	/*
956	 * Allocate a new buffer and copy.
957	 */
958	new_ptr = _nano_malloc_check_scribble(nanozone, new_good_size);
959	if (new_ptr == NULL)
960		return NULL;
961
962	valid_size = MIN(old_size, new_good_size);
963	memcpy(new_ptr, ptr, valid_size);
964	_nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE));
965
966	return new_ptr;
967}
968
969static INLINE void
970_nano_destroy(nanozone_t *nanozone)
971{
972	/* Now destroy the separate nanozone region */
973	deallocate_pages(nanozone, (void *)nanozone, SZONE_PAGED_SIZE, 0);
974}
975
976/******************           nanozone dispatch          **********************/
977
978static void *
979nano_malloc(nanozone_t *nanozone, size_t size)
980{
981	if (size <= NANO_MAX_SIZE) {
982		void *p = _nano_malloc_check_clear(nanozone, size, 0);
983		if (p) {
984			return p;
985		} else {
986			/* FALLTHROUGH to helper zone */
987		}
988	}
989
990	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
991	return zone->malloc(zone, size);
992}
993
994static void *
995nano_forked_malloc(nanozone_t *nanozone, size_t size)
996{
997	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
998	return zone->malloc(zone, size);
999}
1000
1001static void *
1002nano_malloc_scribble(nanozone_t *nanozone, size_t size)
1003{
1004	if (size <= NANO_MAX_SIZE) {
1005		void *ptr = _nano_malloc_check_clear(nanozone, size, 0);
1006		if (ptr) {
1007			/*
1008			 * Scribble on allocated memory.
1009			 */
1010			if (size)
1011				memset(ptr, SCRIBBLE_BYTE, _nano_vet_and_size_of_live(nanozone, ptr));
1012
1013			return ptr;
1014		} else {
1015			/* FALLTHROUGH to helper zone */
1016		}
1017	}
1018	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1019	return zone->malloc(zone, size);
1020}
1021
1022static void *
1023nano_calloc(nanozone_t *nanozone, size_t num_items, size_t size)
1024{
1025	size_t total_bytes = num_items * size;
1026
1027	// Check for overflow of integer multiplication
1028	if (num_items > 1) {
1029		/* size_t is uint64_t */
1030		if ((num_items | size) & 0xffffffff00000000ul) {
1031			// num_items or size equals or exceeds sqrt(2^64) == 2^32, appeal to wider arithmetic
1032			__uint128_t product = ((__uint128_t)num_items) * ((__uint128_t)size);
1033			if ((uint64_t)(product >> 64)) // compiles to test on upper register of register pair
1034				return NULL;
1035		}
1036	}
1037
1038	if (total_bytes <= NANO_MAX_SIZE) {
1039		void *p = _nano_malloc_check_clear(nanozone, total_bytes, 1);
1040		if (p) {
1041			return p;
1042		} else {
1043			/* FALLTHROUGH to helper zone */
1044		}
1045	}
1046	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1047	return zone->calloc(zone, 1, total_bytes);
1048}
1049
1050static void *
1051nano_forked_calloc(nanozone_t *nanozone, size_t num_items, size_t size)
1052{
1053	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1054	return zone->calloc(zone, num_items, size);
1055}
1056
1057static void *
1058nano_valloc(nanozone_t *nanozone, size_t size)
1059{
1060	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1061	return zone->valloc(zone, size);
1062}
1063
1064static INLINE void __nano_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size, boolean_t do_scribble) ALWAYSINLINE;
1065
1066static INLINE void
1067__nano_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size, boolean_t do_scribble)
1068{
1069	nano_blk_addr_t p; // happily, the compiler holds this in a register
1070
1071	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
1072	if (nanozone->our_signature == p.fields.nano_signature) {
1073	if (size == ((p.fields.nano_slot + 1) << SHIFT_NANO_QUANTUM)) { // "Trust but verify."
1074	    _nano_free_trusted_size_check_scribble(nanozone, ptr, size, do_scribble);
1075		return;
1076	} else {
1077            nanozone_error(nanozone, 1, "Freeing pointer whose size was misdeclared", ptr, NULL);
1078	}
1079    } else {
1080		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1081		zone->free_definite_size(zone, ptr, size);
1082		return;
1083	}
1084	/* NOTREACHED */
1085}
1086
1087static void
1088nano_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size)
1089{
1090	__nano_free_definite_size(nanozone, ptr, size, 0);
1091}
1092
1093static void
1094nano_free_definite_size_scribble(nanozone_t *nanozone, void *ptr, size_t size)
1095{
1096	__nano_free_definite_size(nanozone, ptr, size, 1);
1097}
1098
1099static INLINE void __nano_free(nanozone_t *nanozone, void *ptr, boolean_t do_scribble) ALWAYSINLINE;
1100
1101static INLINE void
1102__nano_free(nanozone_t *nanozone, void *ptr, boolean_t do_scribble)
1103{
1104	nano_blk_addr_t p; // happily, the compiler holds this in a register
1105
1106	if (!ptr)
1107		return; // Protect against malloc_zone_free() passing NULL.
1108
1109	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
1110	if (nanozone->our_signature == p.fields.nano_signature) {
1111		_nano_free_check_scribble(nanozone, ptr, do_scribble);
1112		return;
1113	} else {
1114		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1115		zone->free(zone, ptr);
1116		return;
1117	}
1118	/* NOTREACHED */
1119}
1120
1121static void
1122nano_free(nanozone_t *nanozone, void *ptr)
1123{
1124	__nano_free(nanozone, ptr, 0);
1125}
1126
1127static void
1128nano_forked_free(nanozone_t *nanozone, void *ptr)
1129{
1130	nano_blk_addr_t p; // happily, the compiler holds this in a register
1131
1132	if (!ptr)
1133		return; // Protect against malloc_zone_free() passing NULL.
1134
1135	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
1136	if (nanozone->our_signature == p.fields.nano_signature) {
1137		/* NOTHING. Drop it on the floor as nanozone metadata could be fouled by fork. */
1138		return;
1139	} else {
1140		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1141		zone->free(zone, ptr);
1142		return;
1143	}
1144	/* NOTREACHED */
1145}
1146
1147static void
1148nano_forked_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size)
1149{
1150	nano_forked_free(nanozone, ptr);
1151}
1152
1153static void
1154nano_free_scribble(nanozone_t *nanozone, void *ptr)
1155{
1156	__nano_free(nanozone, ptr, 1);
1157}
1158
1159static size_t
1160nano_size(nanozone_t *nanozone, const void *ptr)
1161{
1162	nano_blk_addr_t p; // happily, the compiler holds this in a register
1163
1164	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
1165
1166	if (nanozone->our_signature == p.fields.nano_signature) { // Our signature?
1167		return _nano_size(nanozone, ptr);
1168	} else {
1169		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1170		return zone->size(zone, ptr); // Not nano. Try other sizes.
1171	}
1172	/* NOTREACHED */
1173}
1174
1175static void *
1176nano_realloc(nanozone_t *nanozone, void *ptr, size_t new_size)
1177{
1178	nano_blk_addr_t p; // happily, the compiler holds this in a register
1179
1180	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
1181
1182	if (NULL == ptr) { // could occur through malloc_zone_realloc() path
1183        // If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size.
1184		return nano_malloc(nanozone, new_size);
1185	} else if (nanozone->our_signature == p.fields.nano_signature) { // Our signature?
1186		if (new_size <= NANO_MAX_SIZE) { // nano to nano?
1187			void *q = _nano_realloc(nanozone, ptr, new_size);
1188			if (q) {
1189				return q;
1190			} else { // nano exhausted
1191				/* FALLTHROUGH to helper zone copying case */
1192			}
1193		}
1194
1195		// nano to larger-than-nano (or FALLTHROUGH from just above)
1196		size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr);
1197
1198		if (!old_size) {
1199			nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL);
1200			return NULL;
1201		} else {
1202			malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1203			void *new_ptr = zone->malloc(zone, new_size);
1204
1205			if (new_ptr) {
1206				size_t valid_size = MIN(old_size, new_size);
1207				memcpy(new_ptr, ptr, valid_size);
1208				_nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE));
1209				return new_ptr;
1210			} else {
1211				/* Original ptr is left intact */
1212				return NULL;
1213			}
1214			/* NOTREACHED */
1215		}
1216	} else {
1217		// other-than-nano (not necessarily larger! possibly NULL!) to whatever
1218		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1219
1220		return zone->realloc(zone, ptr, new_size);
1221	}
1222	/* NOTREACHED */
1223}
1224
1225static void *
1226nano_forked_realloc(nanozone_t *nanozone, void *ptr, size_t new_size)
1227{
1228	nano_blk_addr_t p; // happily, the compiler holds this in a register
1229
1230	p.addr = (uint64_t)ptr; // place ptr on the dissecting table
1231
1232	if (NULL == ptr) { // could occur through malloc_zone_realloc() path
1233        // If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size.
1234		return nano_forked_malloc(nanozone, new_size);
1235	} else if (nanozone->our_signature == p.fields.nano_signature) { // Our signature?
1236        if (0 == new_size) {
1237            // If size is 0 and ptr is not a null pointer, the object pointed to is freed.
1238            // However as nanozone metadata could be fouled by fork, we'll intentionally leak it.
1239
1240            // If size is 0, either a null pointer or a unique pointer that can be successfully passed
1241            // to free() shall be returned.
1242            return nano_forked_malloc(nanozone, 1);
1243        }
1244
1245		size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr);
1246
1247		if (!old_size) {
1248			nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL);
1249			return NULL;
1250		} else {
1251			malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1252			void *new_ptr = zone->malloc(zone, new_size);
1253
1254			if (new_ptr) {
1255				size_t valid_size = MIN(old_size, new_size);
1256				memcpy(new_ptr, ptr, valid_size);
1257				/* Original pointer is intentionally leaked as nanozone metadata could be fouled by fork. */
1258				return new_ptr;
1259			} else {
1260				/* Original ptr is left intact */
1261				return NULL;
1262			}
1263			/* NOTREACHED */
1264		}
1265	} else {
1266		// other-than-nano (not necessarily larger! possibly NULL!) to whatever
1267		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1268
1269		return zone->realloc(zone, ptr, new_size);
1270	}
1271	/* NOTREACHED */
1272}
1273
1274static void
1275nano_destroy(nanozone_t *nanozone)
1276{
1277	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1278	zone->destroy(zone);
1279
1280	_nano_destroy(nanozone);
1281}
1282
1283static unsigned
1284nano_batch_malloc(nanozone_t *nanozone, size_t size, void **results, unsigned count)
1285{
1286	unsigned	found = 0;
1287
1288	if (size <= NANO_MAX_SIZE) {
1289		while (found < count) {
1290			void *ptr = _nano_malloc_check_clear(nanozone, size, 0);
1291			if (!ptr)
1292				break;
1293
1294			*results++ = ptr;
1295			found++;
1296		}
1297		if (found == count) {
1298			return found;
1299		} else {
1300			/* FALLTHROUGH to mop-up in the helper zone */
1301		}
1302	}
1303
1304	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1305	return found + zone->batch_malloc(zone, size, results, count - found);
1306}
1307
1308static unsigned
1309nano_forked_batch_malloc(nanozone_t *nanozone, size_t size, void **results, unsigned count)
1310{
1311	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1312	return zone->batch_malloc(zone, size, results, count);
1313}
1314
1315static void
1316nano_batch_free(nanozone_t *nanozone, void **to_be_freed, unsigned count)
1317{
1318	void	*ptr;
1319
1320	// frees all the pointers in to_be_freed
1321	// note that to_be_freed may be overwritten during the process
1322	if (!count)
1323		return;
1324
1325	while (count--) {
1326		ptr = to_be_freed[count];
1327		if (ptr)
1328			nano_free(nanozone, ptr);
1329	}
1330}
1331
1332static void
1333nano_forked_batch_free(nanozone_t *nanozone, void **to_be_freed, unsigned count)
1334{
1335	void	*ptr;
1336
1337	// frees all the pointers in to_be_freed
1338	// note that to_be_freed may be overwritten during the process
1339	if (!count)
1340		return;
1341
1342	while (count--) {
1343		ptr = to_be_freed[count];
1344		if (ptr)
1345			nano_forked_free(nanozone, ptr);
1346	}
1347}
1348
1349static void *
1350nano_memalign(nanozone_t *nanozone, size_t alignment, size_t size)
1351{
1352	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1353	return zone->memalign(zone, alignment, size);
1354}
1355
1356static size_t
1357nano_try_madvise(nanozone_t *nanozone, size_t goal)
1358{
1359	unsigned int	mag_index, slot_key;
1360	size_t		bytes_toward_goal = 0;
1361
1362	for (mag_index = 0; mag_index < nanozone->phys_ncpus; mag_index++) {
1363		nano_blk_addr_t	p;
1364
1365		// Establish p as base address for band 0, slot 0, offset 0
1366		p.fields.nano_signature = NANOZONE_SIGNATURE;
1367		p.fields.nano_mag_index = mag_index;
1368		p.fields.nano_band = 0;
1369		p.fields.nano_slot = 0;
1370		p.fields.nano_offset = 0;
1371
1372		for (slot_key = 0; slot_key < SLOT_KEY_LIMIT;
1373			 p.addr += SLOT_IN_BAND_SIZE,	    // Advance to next slot base
1374			 slot_key++) {
1375
1376			// _malloc_printf(ASL_LEVEL_WARNING,"nano_try_madvise examining slot base %p\n", p.addr);
1377			nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]);
1378			uintptr_t slot_bump_addr = pMeta->slot_bump_addr; // capture this volatile pointer
1379			size_t slot_objects_mapped = pMeta->slot_objects_mapped; // capture this volatile count
1380
1381			if (0 == slot_objects_mapped) { // Nothing allocated in this magazine for this slot?
1382				continue;
1383			} else {
1384				// Walk the slot free list and populate a bitarray_t
1385				int log_size = 64 - __builtin_clzl(slot_objects_mapped);
1386				bitarray_t slot_bitarray = bitarray_create(log_size);
1387
1388				unsigned int slot_bytes = pMeta->slot_bytes;
1389				int log_page_count = 64 - __builtin_clzl((slot_objects_mapped * slot_bytes) / vm_page_size);
1390				log_page_count = 1 + MAX(0, log_page_count);
1391				bitarray_t page_bitarray = bitarray_create(log_page_count);
1392
1393				// _malloc_printf(ASL_LEVEL_WARNING,"slot_bitarray: %db page_bitarray: %db\n", bitarray_size(log_size), bitarray_size(log_page_count));
1394				if (!slot_bitarray) {
1395					malloc_printf("bitarray_create(%d) in nano_try_madvise returned errno=%d.", log_size, errno);
1396					return bytes_toward_goal;
1397				}
1398
1399				if (!page_bitarray) {
1400					malloc_printf("bitarray_create(%d) in nano_try_madvise returned errno=%d.", log_page_count, errno);
1401					free(slot_bitarray);
1402					return bytes_toward_goal;
1403				}
1404
1405				chained_block_t head = NULL, tail = NULL, t;
1406				unsigned stoploss = slot_objects_mapped;
1407				while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) {
1408					if (0 == stoploss) {
1409						malloc_printf("Free list walk in nano_try_madvise exceeded object count.");
1410						break;
1411					}
1412					stoploss--;
1413
1414					uintptr_t	offset = ((uintptr_t)t - p.addr); // offset from beginning of slot
1415					index_t 	block_index = offset_to_index(nanozone, pMeta, offset);
1416
1417					// build a simple linked list of the free blocks we're able to obtain
1418					if (NULL == head)
1419						head = t;
1420					else
1421						tail->next = t;
1422					tail = t;
1423
1424					// take note in a bitarray_t of each free block we're able to obtain (allows fast lookup below)
1425					if (block_index < slot_objects_mapped)
1426						bitarray_set(slot_bitarray, log_size, block_index);
1427				}
1428				if (tail)
1429					tail->next = NULL;
1430
1431				if (NULL == head) {
1432					free(slot_bitarray);
1433					free(page_bitarray);
1434					continue;
1435				}
1436
1437				index_t i;
1438				nano_blk_addr_t q;
1439				size_t pgnum;
1440				for (i = pMeta->slot_objects_skipped; i < slot_objects_mapped; ++i)  {
1441					uintptr_t block_offset = index_to_offset(nanozone, pMeta, i);
1442					if (p.addr + block_offset >= slot_bump_addr)
1443						break;
1444
1445					if (!bitarray_get(slot_bitarray, log_size, i)) { // is block i allocated or already on an madvise'd page?
1446
1447						// Mark the page(s) it resides on as live
1448						q.addr = p.addr + block_offset;
1449						pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
1450						bitarray_set(page_bitarray, log_page_count, pgnum);
1451
1452						q.addr += slot_bytes - 1;
1453						pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
1454						bitarray_set(page_bitarray, log_page_count, pgnum);
1455					}
1456				}
1457
1458				free(slot_bitarray);
1459
1460				q.addr = p.addr + index_to_offset(nanozone, pMeta, pMeta->slot_objects_skipped);
1461				index_t pgstart = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
1462
1463				q.addr = slot_bump_addr - slot_bytes;
1464				pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
1465
1466				// _malloc_printf(ASL_LEVEL_WARNING,"Examining %d pages. Slot base %p.\n", pgnum - pgstart + 1, p.addr);
1467
1468				if (pMeta->slot_madvised_pages) {
1469					if (pMeta->slot_madvised_log_page_count < log_page_count) {
1470						bitarray_t new_madvised_pages = bitarray_create(log_page_count);
1471						index_t index;
1472						while (bitarray_zap_first_set(pMeta->slot_madvised_pages, pMeta->slot_madvised_log_page_count, &index)) {
1473							bitarray_set(new_madvised_pages, log_page_count, index);
1474						}
1475						free(pMeta->slot_madvised_pages);
1476						pMeta->slot_madvised_pages = new_madvised_pages;
1477						pMeta->slot_madvised_log_page_count = log_page_count;
1478					}
1479				} else {
1480					pMeta->slot_madvised_pages = bitarray_create(log_page_count);
1481					pMeta->slot_madvised_log_page_count = log_page_count;
1482				}
1483
1484				bitarray_t will_madvise_pages = bitarray_create(log_page_count);
1485				int num_advised = 0;
1486
1487				for (i = pgstart; i < pgnum; ++i) {
1488					if ((i < (1 << log_page_count)) && // bounds check for the bitarray_get()'s that follow.
1489						!bitarray_get(pMeta->slot_madvised_pages, log_page_count, i) && // already madvise'd?
1490						!bitarray_get(page_bitarray, log_page_count, i)) // no live allocations?
1491					{
1492						num_advised++;
1493						bitarray_set(will_madvise_pages, log_page_count, i);
1494					}
1495				}
1496				free(page_bitarray);
1497
1498				if (num_advised) {
1499					chained_block_t new_head = NULL, new_tail = NULL;
1500					// _malloc_printf(ASL_LEVEL_WARNING,"Constructing residual free list starting at %p num_advised %d\n", head, num_advised);
1501					t = head;
1502					while (t) {
1503						q.addr = (uintptr_t)t;
1504						index_t pgnum_start = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
1505						q.addr += slot_bytes - 1;
1506						index_t pgnum_end = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
1507
1508						// bounds check for the bitarray_get()'s that follow. If the pgnum is beyond the
1509						// capacity of the will_madvise_pages just restore the block to the free list.
1510						if (pgnum_start >= (1 << log_page_count)) {
1511							if (NULL == new_head)
1512								new_head = t;
1513							else
1514								new_tail->next = t;
1515							new_tail = t;
1516						}
1517						// If the block nowhere lies on an madvise()'d page restore it to the slot free list.
1518						else if (!bitarray_get(will_madvise_pages, log_page_count, pgnum_start) &&
1519								 !bitarray_get(will_madvise_pages, log_page_count, pgnum_end)) {
1520							if (NULL == new_head)
1521								new_head = t;
1522							else
1523								new_tail->next = t;
1524							new_tail = t;
1525						}
1526
1527						t = t->next;
1528					}
1529					if (new_tail)
1530						new_tail->next = NULL;
1531
1532					// push the free list extracted above back onto the LIFO, all at once
1533					if (new_head)
1534						OSAtomicEnqueue( &(pMeta->slot_LIFO), new_head,
1535										(uintptr_t)new_tail - (uintptr_t)new_head + offsetof(struct chained_block_s,next));
1536				} else {
1537					// _malloc_printf(ASL_LEVEL_WARNING,"Reinstating free list since no pages were madvised (%d).\n", num_advised);
1538					if (head)
1539						OSAtomicEnqueue( &(pMeta->slot_LIFO), head,
1540										(uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next));
1541				}
1542
1543				for (i = pgstart; i < pgnum; ++i) {
1544					if ((i < (1 << log_page_count)) && bitarray_get(will_madvise_pages, log_page_count, i)) {
1545						q = p;
1546						q.fields.nano_band = (i << vm_page_shift) >> NANO_OFFSET_BITS;
1547						q.fields.nano_offset = (i << vm_page_shift) & ((1 << NANO_OFFSET_BITS) - 1);
1548						// _malloc_printf(ASL_LEVEL_WARNING,"Entire page non-live: %d. Slot base %p, madvising %p\n", i, p.addr, q.addr);
1549
1550						if (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)
1551							memset((void *)q.addr, SCRUBBLE_BYTE, vm_page_size);
1552#if TARGET_OS_EMBEDDED
1553						if (-1 == madvise((void *)q.addr, vm_page_size, MADV_FREE))
1554#else
1555						if (-1 == madvise((void *)q.addr, vm_page_size, MADV_FREE_REUSABLE))
1556#endif
1557						{
1558						    /* -1 return: VM map entry change makes this unfit for reuse. Something evil lurks. */
1559#if DEBUG_MADVISE
1560						    nanozone_error(nanozone, 0, "madvise(..., MADV_FREE_REUSABLE) failed",
1561								    (void *)cwq.addrpgLo, "length=%d\n", vm_page_size);
1562#endif
1563						} else {
1564						    bytes_toward_goal += vm_page_size;
1565						    bitarray_set(pMeta->slot_madvised_pages, log_page_count, i);
1566						}
1567					}
1568				}
1569				free(will_madvise_pages);
1570
1571				if (!bitarray_first_set(pMeta->slot_madvised_pages, log_page_count)) {
1572					free(pMeta->slot_madvised_pages);
1573					pMeta->slot_madvised_pages = NULL;
1574					pMeta->slot_madvised_log_page_count = 0;
1575				}
1576
1577				if (goal && bytes_toward_goal >= goal)
1578					return bytes_toward_goal;
1579			}
1580		}
1581	}
1582	return bytes_toward_goal;
1583}
1584
1585static size_t
1586nano_pressure_relief(nanozone_t *nanozone, size_t goal)
1587{
1588	return nano_try_madvise(nanozone, goal);
1589}
1590
1591/****************           introspection methods         *********************/
1592
1593static kern_return_t
1594nanozone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr)
1595{
1596	*ptr = (void *)address;
1597	return 0;
1598}
1599
1600static kern_return_t
1601nano_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t zone_address,
1602						   memory_reader_t reader, vm_range_recorder_t recorder)
1603{
1604	nanozone_t		*nanozone;
1605	kern_return_t	err;
1606
1607	if (!reader) reader = nanozone_default_reader;
1608
1609	err = reader(task, zone_address, sizeof(nanozone_t), (void **)&nanozone);
1610	if (err) return err;
1611
1612	err = segregated_in_use_enumerator(task, context, type_mask, nanozone, reader, recorder);
1613	return err;
1614}
1615
1616static size_t
1617nano_good_size(nanozone_t *nanozone, size_t size)
1618{
1619	if (size <= NANO_MAX_SIZE)
1620		return _nano_good_size(nanozone, size);
1621	else {
1622		malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1623		return zone->introspect->good_size(zone, size);
1624	}
1625}
1626
1627// TODO sanity checks
1628unsigned nanozone_check_counter = 0;
1629unsigned nanozone_check_start = 0;
1630unsigned nanozone_check_modulo = 1;
1631
1632static boolean_t
1633nano_check_all(nanozone_t *nanozone, const char *function)
1634{
1635	return 1;
1636}
1637
1638static boolean_t
1639nanozone_check(nanozone_t *nanozone)
1640{
1641	if ((++nanozone_check_counter % 10000) == 0)
1642		_malloc_printf(ASL_LEVEL_NOTICE, "at nanozone_check counter=%d\n", nanozone_check_counter);
1643
1644	if (nanozone_check_counter < nanozone_check_start)
1645		return 1;
1646
1647	if (nanozone_check_counter % nanozone_check_modulo)
1648		return 1;
1649
1650	return nano_check_all(nanozone, "");
1651}
1652
1653static unsigned
1654count_free(nanozone_t *nanozone, nano_meta_admin_t pMeta)
1655{
1656	chained_block_t head = NULL, tail = NULL, t;
1657	unsigned count = 0;
1658
1659	unsigned stoploss = pMeta->slot_objects_mapped;
1660	while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) {
1661		if (0 == stoploss) {
1662			nanozone_error(nanozone, 1, "Free list walk in count_free exceeded object count.",
1663						   (void *)&(pMeta->slot_LIFO), NULL);
1664		}
1665		stoploss--;
1666
1667		if (NULL == head)
1668			head = t;
1669		else
1670			tail->next = t;
1671		tail = t;
1672
1673		count++;
1674	}
1675	if (tail)
1676		tail->next = NULL;
1677
1678	// push the free list extracted above back onto the LIFO, all at once
1679	if (head)
1680		OSAtomicEnqueue( &(pMeta->slot_LIFO), head, (uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next));
1681
1682	return count;
1683}
1684
1685static void
1686nano_print(nanozone_t *nanozone, boolean_t verbose)
1687{
1688	unsigned int	mag_index, slot_key;
1689	malloc_statistics_t stats;
1690
1691	nano_statistics(nanozone, &stats);
1692	_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
1693				   "Nanozone %p: inUse=%d(%dKB) touched=%dKB allocated=%dMB\n",
1694				   nanozone, stats.blocks_in_use, stats.size_in_use>>10, stats.max_size_in_use>>10, stats.size_allocated>>20);
1695
1696	for (mag_index = 0; mag_index < nanozone->phys_ncpus; mag_index++) {
1697		nano_blk_addr_t	p;
1698
1699		// Establish p as base address for band 0, slot 0, offset 0
1700		p.fields.nano_signature = NANOZONE_SIGNATURE;
1701		p.fields.nano_mag_index = mag_index;
1702		p.fields.nano_band = 0;
1703		p.fields.nano_slot = 0;
1704		p.fields.nano_offset = 0;
1705
1706		for (slot_key = 0; slot_key < SLOT_KEY_LIMIT;
1707			 p.addr += SLOT_IN_BAND_SIZE,	    // Advance to next slot base
1708			 slot_key++) {
1709
1710			nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]);
1711			uintptr_t slot_bump_addr = pMeta->slot_bump_addr; // capture this volatile pointer
1712			size_t slot_objects_mapped = pMeta->slot_objects_mapped; // capture this volatile count
1713
1714			if (0 == slot_objects_mapped) { // Nothing allocated in this magazine for this slot?
1715				_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
1716							   "Magazine %2d(%3d) Unrealized\n",mag_index, (slot_key + 1) << SHIFT_NANO_QUANTUM);
1717				continue;
1718			}
1719
1720			uintptr_t offset = (0 == slot_bump_addr ? 0 : slot_bump_addr - p.addr);
1721			unsigned blocks_touched = offset_to_index(nanozone, pMeta, offset) - pMeta->slot_objects_skipped;
1722			unsigned blocks_now_free = count_free(nanozone, pMeta);
1723			unsigned blocks_in_use = blocks_touched - blocks_now_free;
1724
1725			size_t size_hiwater = ((slot_key + 1) << SHIFT_NANO_QUANTUM) * blocks_touched;
1726			size_t size_in_use = ((slot_key + 1) << SHIFT_NANO_QUANTUM) * blocks_in_use;
1727			size_t size_allocated = ((offset / BAND_SIZE) + 1) * SLOT_IN_BAND_SIZE;
1728
1729			_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,
1730						   "Magazine %2d(%3d) [%p, %3dKB] \t Allocations in use=%4d \t Bytes in use=%db \t Untouched=%dKB\n",
1731						   mag_index, (slot_key + 1) << SHIFT_NANO_QUANTUM, p,
1732						   (size_allocated>>10), blocks_in_use, size_in_use, (size_allocated - size_hiwater)>>10);
1733
1734			if (!verbose) {
1735				continue;
1736			} else {
1737				// Walk the slot free list and populate a bitarray_t
1738				int log_size = 64 - __builtin_clzl(slot_objects_mapped);
1739				bitarray_t slot_bitarray = bitarray_create(log_size);
1740
1741				if (!slot_bitarray) {
1742					malloc_printf("bitarray_create(%d) in nano_print returned errno=%d.", log_size, errno);
1743					return;
1744				}
1745
1746				chained_block_t head = NULL, tail = NULL, t;
1747				unsigned stoploss = slot_objects_mapped;
1748				while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) {
1749					if (0 == stoploss) {
1750						malloc_printf("Free list walk in nano_print exceeded object count.");
1751						break;
1752					}
1753					stoploss--;
1754
1755					uintptr_t	offset = ((uintptr_t)t - p.addr); // offset from beginning of slot
1756					index_t 	block_index = offset_to_index(nanozone, pMeta, offset);
1757
1758					if (NULL == head)
1759						head = t;
1760					else
1761						tail->next = t;
1762					tail = t;
1763
1764					if (block_index < slot_objects_mapped)
1765						bitarray_set(slot_bitarray, log_size, block_index);
1766				}
1767				if (tail)
1768					tail->next = NULL;
1769
1770				index_t i;
1771				for (i = 0; i < slot_objects_mapped; ++i)  {
1772					nano_blk_addr_t q;
1773					size_t pgnum;
1774					uintptr_t block_offset = index_to_offset(nanozone, pMeta, i);
1775					if (p.addr + block_offset >= slot_bump_addr)
1776						break;
1777
1778					q.addr = p.addr + block_offset;
1779					pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift;
1780
1781					if (i < pMeta->slot_objects_skipped) {
1782						_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"_");
1783					} else if (bitarray_get(slot_bitarray, log_size, i)) {
1784						_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"F");
1785					} else if (pMeta->slot_madvised_pages && (pgnum < ( 1 << pMeta->slot_madvised_log_page_count)) &&
1786							   bitarray_get(pMeta->slot_madvised_pages, pMeta->slot_madvised_log_page_count, pgnum)) {
1787						_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"M");
1788					} else {
1789						_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,".");
1790					}
1791				}
1792				_malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"\n");
1793
1794				free(slot_bitarray);
1795
1796				// push the free list extracted above back onto the LIFO, all at once
1797				if (head)
1798					OSAtomicEnqueue( &(pMeta->slot_LIFO), head, (uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next));
1799			}
1800		}
1801	}
1802	return;
1803}
1804
1805static void
1806nano_log(malloc_zone_t *zone, void *log_address)
1807{
1808}
1809
1810static void
1811nano_force_lock(nanozone_t *nanozone)
1812{
1813	int i;
1814
1815	for (i = 0; i < nanozone->phys_ncpus; ++i) {
1816        _malloc_lock_lock(&nanozone->band_resupply_lock[i]);
1817	}
1818}
1819
1820static void
1821nano_force_unlock(nanozone_t *nanozone)
1822{
1823	int i;
1824
1825	for (i = 0; i < nanozone->phys_ncpus; ++i) {
1826        _malloc_lock_unlock(&nanozone->band_resupply_lock[i]);
1827	}
1828}
1829
1830static void
1831nano_statistics(nanozone_t *nanozone, malloc_statistics_t *stats)
1832{
1833	int i,j;
1834
1835	bzero(stats, sizeof(*stats));
1836
1837	for (i = 0; i < nanozone->phys_ncpus; ++i) {
1838		nano_blk_addr_t	p;
1839
1840		// Establish p as base address for slot 0 in this CPU magazine
1841		p.fields.nano_signature = NANOZONE_SIGNATURE;
1842		p.fields.nano_mag_index = i;
1843		p.fields.nano_band = 0;
1844		p.fields.nano_slot = 0;
1845		p.fields.nano_offset = 0;
1846
1847		for (j = 0; j < NANO_SLOT_SIZE;
1848			 p.addr += SLOT_IN_BAND_SIZE,	    // Advance to next slot base
1849			 ++j) {
1850			nano_meta_admin_t pMeta = &nanozone->meta_data[i][j];
1851			uintptr_t offset = pMeta->slot_bump_addr - p.addr;
1852
1853			if (0 == pMeta->slot_current_base_addr) { // Nothing allocated in this magazine for this slot?
1854				continue;
1855			} else {
1856				unsigned blocks_touched = offset_to_index(nanozone, pMeta, offset) - pMeta->slot_objects_skipped;
1857				unsigned blocks_now_free = count_free(nanozone, pMeta);
1858				unsigned blocks_in_use = blocks_touched - blocks_now_free;
1859
1860				size_t size_hiwater = ((j + 1) << SHIFT_NANO_QUANTUM) * blocks_touched;
1861				size_t size_in_use = ((j + 1) << SHIFT_NANO_QUANTUM) * blocks_in_use;
1862				size_t size_allocated = ((offset / BAND_SIZE) + 1) * SLOT_IN_BAND_SIZE;
1863
1864				stats->blocks_in_use += blocks_in_use;
1865
1866				stats->max_size_in_use += size_hiwater;
1867				stats->size_in_use += size_in_use;
1868				stats->size_allocated += size_allocated;
1869			}
1870		}
1871	}
1872}
1873
1874static boolean_t
1875_nano_locked(nanozone_t *nanozone)
1876{
1877	int i;
1878
1879	for (i = 0; i < nanozone->phys_ncpus; ++i) {
1880        if (_malloc_lock_trylock(&nanozone->band_resupply_lock[i])) {
1881            _malloc_lock_unlock(&nanozone->band_resupply_lock[i]);
1882            return TRUE;
1883        }
1884	}
1885	return FALSE;
1886}
1887
1888static boolean_t
1889nano_locked(nanozone_t *nanozone)
1890{
1891	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
1892
1893	return _nano_locked(nanozone) || zone->introspect->zone_locked(zone);
1894}
1895
1896static const struct malloc_introspection_t nano_introspect = {
1897	(void *)nano_ptr_in_use_enumerator,
1898	(void *)nano_good_size,
1899	(void *)nanozone_check,
1900	(void *)nano_print,
1901	nano_log,
1902	(void *)nano_force_lock,
1903	(void *)nano_force_unlock,
1904	(void *)nano_statistics,
1905	(void *)nano_locked,
1906	NULL, NULL, NULL, NULL, /* Zone enumeration version 7 and forward. */
1907}; // marked as const to spare the DATA section
1908
1909__attribute__((visibility("hidden")))
1910void
1911nano_forked_zone(nanozone_t *nanozone)
1912{
1913	/*
1914	 * Hobble the nano zone in the child of a fork prior to an exec since
1915	 * the state of the zone can be made inconsistent by a parent thread while the
1916	 * fork is underway.
1917	 * All new allocations will be referred to the helper zone (which is more stable.)
1918	 * All free()'s of existing nano objects will be leaked.
1919	 */
1920
1921	mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ | PROT_WRITE);
1922
1923	nanozone->basic_zone.size = (void *)nano_size; /* Unchanged. */
1924	nanozone->basic_zone.malloc = (void *)nano_forked_malloc;
1925	nanozone->basic_zone.calloc = (void *)nano_forked_calloc;
1926	nanozone->basic_zone.valloc = (void *)nano_valloc; /* Unchanged, already always obtained from helper zone. */
1927	nanozone->basic_zone.free = (void *)nano_forked_free;
1928	nanozone->basic_zone.realloc = (void *)nano_forked_realloc;
1929	nanozone->basic_zone.destroy = (void *)nano_destroy; /* Unchanged. */
1930	nanozone->basic_zone.batch_malloc = (void *)nano_forked_batch_malloc;
1931	nanozone->basic_zone.batch_free = (void *)nano_forked_batch_free;
1932	nanozone->basic_zone.introspect = (struct malloc_introspection_t *)&nano_introspect; /* Unchanged. */
1933	nanozone->basic_zone.memalign = (void *)nano_memalign; /* Unchanged. */
1934	nanozone->basic_zone.free_definite_size = (void *)nano_forked_free_definite_size;
1935
1936	mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ);
1937
1938}
1939
1940__attribute__((visibility("hidden")))
1941malloc_zone_t *
1942create_nano_zone(size_t initial_size, malloc_zone_t *helper_zone, unsigned debug_flags)
1943{
1944	nanozone_t	*nanozone;
1945	int i, j;
1946
1947	if (!_malloc_engaged_nano) return NULL;
1948
1949#if defined(__x86_64__)
1950	if (_COMM_PAGE_VERSION_REQD > (*((uint16_t *)_COMM_PAGE_VERSION))) {
1951		malloc_printf("*** FATAL ERROR - comm page version mismatch.\n");
1952		exit(-1);
1953	}
1954#endif
1955
1956	/* get memory for the zone. */
1957	nanozone = allocate_pages(NULL, SZONE_PAGED_SIZE, 0, 0, VM_MEMORY_MALLOC);
1958	if (!nanozone)
1959		return NULL;
1960
1961	/* set up the basic_zone portion of the nanozone structure */
1962	nanozone->basic_zone.version = 8;
1963	nanozone->basic_zone.size = (void *)nano_size;
1964	nanozone->basic_zone.malloc = (debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) ? (void *)nano_malloc_scribble : (void *)nano_malloc;
1965	nanozone->basic_zone.calloc = (void *)nano_calloc;
1966	nanozone->basic_zone.valloc = (void *)nano_valloc;
1967	nanozone->basic_zone.free = (debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) ? (void *)nano_free_scribble : (void *)nano_free;
1968	nanozone->basic_zone.realloc = (void *)nano_realloc;
1969	nanozone->basic_zone.destroy = (void *)nano_destroy;
1970	nanozone->basic_zone.batch_malloc = (void *)nano_batch_malloc;
1971	nanozone->basic_zone.batch_free = (void *)nano_batch_free;
1972	nanozone->basic_zone.introspect = (struct malloc_introspection_t *)&nano_introspect;
1973	nanozone->basic_zone.memalign = (void *)nano_memalign;
1974	nanozone->basic_zone.free_definite_size = (debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) ?
1975	(void *)nano_free_definite_size_scribble : (void *)nano_free_definite_size;
1976
1977	nanozone->basic_zone.pressure_relief = (void *)nano_pressure_relief;
1978
1979	nanozone->basic_zone.reserved1 = 0; /* Set to zero once and for all as required by CFAllocator. */
1980	nanozone->basic_zone.reserved2 = 0; /* Set to zero once and for all as required by CFAllocator. */
1981
1982	mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ); /* Prevent overwriting the function pointers in basic_zone. */
1983
1984	/* set up the remainder of the nanozone structure */
1985	nanozone->debug_flags = debug_flags;
1986	nanozone->our_signature = NANOZONE_SIGNATURE;
1987
1988	/* Query the number of configured processors. */
1989#if defined(__x86_64__)
1990	nanozone->phys_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_PHYSICAL_CPUS;
1991	nanozone->logical_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_LOGICAL_CPUS;
1992#else
1993#error Unknown architecture
1994#endif
1995
1996	if (nanozone->phys_ncpus > sizeof(nanozone->core_mapped_size)/sizeof(nanozone->core_mapped_size[0])) {
1997		_malloc_printf(ASL_LEVEL_NOTICE, "nano zone abandoned because NCPUS mismatch.\n");
1998		return NULL;
1999	}
2000
2001	if (0 != (nanozone->logical_ncpus % nanozone->phys_ncpus)) {
2002		malloc_printf("*** FATAL ERROR - logical_ncpus % phys_ncpus != 0.\n");
2003		exit(-1);
2004	}
2005
2006	switch (nanozone->logical_ncpus/nanozone->phys_ncpus) {
2007		case 1:
2008			nanozone->hyper_shift = 0;
2009			break;
2010		case 2:
2011			nanozone->hyper_shift = 1;
2012			break;
2013		case 4:
2014			nanozone->hyper_shift = 2;
2015			break;
2016		default:
2017			malloc_printf("*** FATAL ERROR - logical_ncpus / phys_ncpus not 1, 2, or 4.\n");
2018			exit(-1);
2019	}
2020
2021	/* Initialize slot queue heads and resupply locks. */
2022	OSQueueHead q0 = OS_ATOMIC_QUEUE_INIT;
2023	for (i = 0; i < nanozone->phys_ncpus; ++i) {
2024        _malloc_lock_init(&nanozone->band_resupply_lock[i]);
2025
2026		for (j = 0; j < NANO_SLOT_SIZE; ++j) {
2027			nanozone->meta_data[i][j].slot_LIFO = q0;
2028		}
2029	}
2030
2031	/* Initialize the security token. */
2032	if (0 == _dyld_get_image_slide((const struct mach_header*)_NSGetMachExecuteHeader())) {
2033		// zero slide when ASLR has been disabled by boot-arg. Eliminate cloaking.
2034		malloc_entropy[0] = 0;
2035		malloc_entropy[1] = 0;
2036	}
2037	nanozone->cookie = (uintptr_t)malloc_entropy[0] & 0x0000ffffffff0000ULL; // scramble central 32bits with this cookie
2038
2039	/* Nano zone does not support SCALABLE_MALLOC_ADD_GUARD_PAGES. */
2040	if (nanozone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) {
2041		_malloc_printf(ASL_LEVEL_INFO, "nano zone does not support guard pages\n");
2042		nanozone->debug_flags &= ~SCALABLE_MALLOC_ADD_GUARD_PAGES;
2043	}
2044
2045	nanozone->helper_zone = helper_zone;
2046
2047	return (malloc_zone_t *)nanozone;
2048}
2049#endif /* defined(__LP64__) */
2050
2051/* vim: set noet:ts=4:sw=4:cindent: */
2052