1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	kern/zalloc.c
60 *	Author:	Avadis Tevanian, Jr.
61 *
62 *	Zone-based memory allocator.  A zone is a collection of fixed size
63 *	data blocks for which quick allocation/deallocation is possible.
64 */
65#include <zone_debug.h>
66#include <zone_alias_addr.h>
67#include <norma_vm.h>
68#include <mach_kdb.h>
69
70#include <mach/mach_types.h>
71#include <mach/vm_param.h>
72#include <mach/kern_return.h>
73#include <mach/mach_host_server.h>
74#include <mach/machine/vm_types.h>
75#include <mach_debug/zone_info.h>
76
77#include <kern/kern_types.h>
78#include <kern/assert.h>
79#include <kern/host.h>
80#include <kern/macro_help.h>
81#include <kern/sched.h>
82#include <kern/lock.h>
83#include <kern/sched_prim.h>
84#include <kern/misc_protos.h>
85#include <kern/thread_call.h>
86#include <kern/zalloc.h>
87#include <kern/kalloc.h>
88
89#include <vm/pmap.h>
90#include <vm/vm_map.h>
91#include <vm/vm_kern.h>
92#include <vm/vm_page.h>
93
94#include <machine/machparam.h>
95
96#include <libkern/OSDebug.h>
97#include <sys/kdebug.h>
98
99#if defined(__ppc__)
100/* for fake zone stat routines */
101#include <ppc/savearea.h>
102#include <ppc/mappings.h>
103#endif
104
105
106/*
107 * Zone Corruption Debugging
108 *
109 * We provide three methods to detect use of a zone element after it's been freed.  These
110 * checks are enabled by specifying "-zc" and/or "-zp" in the boot-args:
111 *
112 * (1) Range-check the free-list "next" ptr for sanity.
113 * (2) Store the ptr in two different words, and compare them against
114 *     each other when re-using the zone element, to detect modifications.
115 * (3) poison the freed memory by overwriting it with 0xdeadbeef.
116 *
117 * The first two checks are farily light weight and are enabled by specifying "-zc"
118 * in the boot-args.  If you want more aggressive checking for use-after-free bugs
119 * and you don't mind the additional overhead, then turn on poisoning by adding
120 * "-zp" to the boot-args in addition to "-zc".  If you specify -zp without -zc,
121 * it still poisons the memory when it's freed, but doesn't check if the memory
122 * has been altered later when it's reallocated.
123 */
124
125boolean_t check_freed_element = FALSE;		/* enabled by -zc in boot-args */
126boolean_t zfree_clear = FALSE;			/* enabled by -zp in boot-args */
127
128#define is_kernel_data_addr(a)	(!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
129
130#define ADD_TO_ZONE(zone, element)					\
131MACRO_BEGIN								\
132	if (zfree_clear)						\
133	{   unsigned int i;						\
134	    for (i=0;							\
135		 i < zone->elem_size/sizeof(uint32_t);			\
136		 i++)							\
137	    ((uint32_t *)(element))[i] = 0xdeadbeef;			\
138	}								\
139	*((vm_offset_t *)(element)) = (zone)->free_elements;		\
140	if (check_freed_element) {					\
141		if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)))	\
142			((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
143				(zone)->free_elements;			\
144	}								\
145	(zone)->free_elements = (vm_offset_t) (element);		\
146	(zone)->count--;						\
147MACRO_END
148
149#define REMOVE_FROM_ZONE(zone, ret, type)					\
150MACRO_BEGIN									\
151	(ret) = (type) (zone)->free_elements;					\
152	if ((ret) != (type) 0) {						\
153		if (check_freed_element) {					\
154			if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0]) ||	\
155			    ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) &&	\
156			    ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
157			    ((vm_offset_t *)(ret))[0]))				\
158				panic("a freed zone element has been modified");\
159			if (zfree_clear) {					\
160				unsigned int ii;				\
161				for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \
162					 ii < zone->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
163					 ii++)					\
164					if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \
165						panic("a freed zone element has been modified");\
166			}							\
167		}								\
168		(zone)->count++;						\
169		(zone)->free_elements = *((vm_offset_t *)(ret));		\
170	}									\
171MACRO_END
172
173#if	ZONE_DEBUG
174#define zone_debug_enabled(z) z->active_zones.next
175#define	ROUNDUP(x,y)		((((x)+(y)-1)/(y))*(y))
176#define ZONE_DEBUG_OFFSET	ROUNDUP(sizeof(queue_chain_t),16)
177#endif	/* ZONE_DEBUG */
178
179/*
180 * Support for garbage collection of unused zone pages:
181 */
182
183struct zone_page_table_entry {
184	struct zone_page_table_entry	*link;
185	short	alloc_count;
186	short	collect_count;
187};
188
189/* Forwards */
190void		zone_page_init(
191				vm_offset_t	addr,
192				vm_size_t	size,
193				int		value);
194
195void		zone_page_alloc(
196				vm_offset_t	addr,
197				vm_size_t	size);
198
199void		zone_page_free_element(
200				struct zone_page_table_entry	**free_pages,
201				vm_offset_t	addr,
202				vm_size_t	size);
203
204void		zone_page_collect(
205				vm_offset_t	addr,
206				vm_size_t	size);
207
208boolean_t	zone_page_collectable(
209				vm_offset_t	addr,
210				vm_size_t	size);
211
212void		zone_page_keep(
213				vm_offset_t	addr,
214				vm_size_t	size);
215
216void		zalloc_async(
217				thread_call_param_t	p0,
218				thread_call_param_t	p1);
219
220
221#if	ZONE_DEBUG && MACH_KDB
222int		zone_count(
223				zone_t		z,
224				int		tail);
225#endif	/* ZONE_DEBUG && MACH_KDB */
226
227vm_map_t	zone_map = VM_MAP_NULL;
228
229zone_t		zone_zone = ZONE_NULL;	/* the zone containing other zones */
230
231/*
232 *	The VM system gives us an initial chunk of memory.
233 *	It has to be big enough to allocate the zone_zone
234 */
235
236vm_offset_t	zdata;
237vm_size_t	zdata_size;
238
239#define lock_zone(zone)					\
240MACRO_BEGIN						\
241	lck_mtx_lock(&(zone)->lock);			\
242MACRO_END
243
244#define unlock_zone(zone)				\
245MACRO_BEGIN						\
246	lck_mtx_unlock(&(zone)->lock);			\
247MACRO_END
248
249#define zone_wakeup(zone) thread_wakeup((event_t)(zone))
250#define zone_sleep(zone)				\
251	(void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT);
252
253
254#define lock_zone_init(zone)				\
255MACRO_BEGIN						\
256	char _name[32];					\
257	(void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \
258	lck_grp_attr_setdefault(&(zone)->lock_grp_attr);		\
259	lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr);	\
260	lck_attr_setdefault(&(zone)->lock_attr);			\
261	lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext,		\
262	    &(zone)->lock_grp, &(zone)->lock_attr);			\
263MACRO_END
264
265#define lock_try_zone(zone)	lck_mtx_try_lock(&zone->lock)
266
267kern_return_t		zget_space(
268				vm_offset_t size,
269				vm_offset_t *result);
270
271decl_simple_lock_data(,zget_space_lock)
272vm_offset_t	zalloc_next_space;
273vm_offset_t	zalloc_end_of_space;
274vm_size_t	zalloc_wasted_space;
275
276/*
277 *	Garbage collection map information
278 */
279struct zone_page_table_entry *	zone_page_table;
280vm_offset_t			zone_map_min_address;
281vm_offset_t			zone_map_max_address;
282unsigned int			zone_pages;
283
284/*
285 *	Exclude more than one concurrent garbage collection
286 */
287decl_mutex_data(,		zone_gc_lock)
288
289#if	!ZONE_ALIAS_ADDR
290#define from_zone_map(addr, size) \
291	((vm_offset_t)(addr) >= zone_map_min_address && \
292	 ((vm_offset_t)(addr) + size -1) <  zone_map_max_address)
293#else
294#define from_zone_map(addr, size) \
295	((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
296	 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) <  zone_map_max_address)
297#endif
298
299#define	ZONE_PAGE_USED  0
300#define ZONE_PAGE_UNUSED -1
301
302
303/*
304 *	Protects first_zone, last_zone, num_zones,
305 *	and the next_zone field of zones.
306 */
307decl_simple_lock_data(,	all_zones_lock)
308zone_t			first_zone;
309zone_t			*last_zone;
310unsigned int		num_zones;
311
312boolean_t zone_gc_allowed = TRUE;
313boolean_t zone_gc_forced = FALSE;
314boolean_t panic_include_zprint = FALSE;
315unsigned zone_gc_last_tick = 0;
316unsigned zone_gc_max_rate = 0;		/* in ticks */
317
318/*
319 * Zone leak debugging code
320 *
321 * When enabled, this code keeps a log to track allocations to a particular zone that have not
322 * yet been freed.  Examining this log will reveal the source of a zone leak.  The log is allocated
323 * only when logging is enabled, so there is no effect on the system when it's turned off.  Logging is
324 * off by default.
325 *
326 * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
327 * is the name of the zone you wish to log.
328 *
329 * This code only tracks one zone, so you need to identify which one is leaking first.
330 * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
331 * garbage collector.  Note that the zone name printed in the panic message is not necessarily the one
332 * containing the leak.  So do a zprint from gdb and locate the zone with the bloated size.  This
333 * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test.  The
334 * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
335 * See the help in the kgmacros for usage info.
336 *
337 *
338 * Zone corruption logging
339 *
340 * Logging can also be used to help identify the source of a zone corruption.  First, identify the zone
341 * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args.  When -zc is used in conjunction
342 * with zlog, it changes the logging style to track both allocations and frees to the zone.  So when the
343 * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
344 * and freed any particular element in the zone.  Use the findelem kgmacro with the address of the element that's been
345 * corrupted to examine its history.  This should lead to the source of the corruption.
346 */
347
348static int log_records;	/* size of the log, expressed in number of records */
349
350#define MAX_ZONE_NAME	32	/* max length of a zone name we can take from the boot-args */
351
352static char zone_name_to_log[MAX_ZONE_NAME] = "";	/* the zone name we're logging, if any */
353
354/*
355 * The number of records in the log is configurable via the zrecs parameter in boot-args.  Set this to
356 * the number of records you want in the log.  For example, "zrecs=1000" sets it to 1000 records.  Note
357 * that the larger the size of the log, the slower the system will run due to linear searching in the log,
358 * but one doesn't generally care about performance when tracking down a leak.  The log is capped at 8000
359 * records since going much larger than this tends to make the system unresponsive and unbootable on small
360 * memory configurations.  The default value is 4000 records.
361 *
362 * MAX_DEPTH configures how deep of a stack trace is taken on each zalloc in the zone of interrest.  15
363 * levels is usually enough to get past all the layers of code in kalloc and IOKit and see who the actual
364 * caller is up above these lower levels.
365 */
366
367#define ZRECORDS_MAX 		8000		/* Max records allowed in the log */
368#define ZRECORDS_DEFAULT	4000		/* default records in log if zrecs is not specificed in boot-args */
369#define MAX_DEPTH 		15		/* number of levels of the stack trace to record */
370
371/*
372 * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows
373 * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace.  A
374 * record is added to the log each time a zalloc() is done in the zone_of_interest.  For leak debugging,
375 * the record is cleared when a zfree() is done.  For corruption debugging, the log tracks both allocs and frees.
376 * If the log fills, old records are replaced as if it were a circular buffer.
377 */
378
379struct zrecord {
380        void		*z_element;		/* the element that was zalloc'ed of zfree'ed */
381        uint32_t	z_opcode:1,		/* whether it was a zalloc or zfree */
382			z_time:31;		/* time index when operation was done */
383        void		*z_pc[MAX_DEPTH];	/* stack trace of caller */
384};
385
386/*
387 * Opcodes for the z_opcode field:
388 */
389
390#define ZOP_ALLOC	1
391#define ZOP_FREE	0
392
393/*
394 * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest
395 */
396
397static struct zrecord *zrecords;		/* the log itself, dynamically allocated when logging is enabled  */
398static int zcurrent  = 0;			/* index of the next slot in the log to use */
399static int zrecorded = 0;			/* number of allocations recorded in the log */
400static unsigned int ztime = 0;			/* a timestamp of sorts */
401static zone_t  zone_of_interest = NULL;		/* the zone being watched; corresponds to zone_name_to_log */
402
403/*
404 * Decide if we want to log this zone by doing a string compare between a zone name and the name
405 * of the zone to log. Return true if the strings are equal, false otherwise.  Because it's not
406 * possible to include spaces in strings passed in via the boot-args, a period in the logname will
407 * match a space in the zone name.
408 */
409
410static int
411log_this_zone(const char *zonename, const char *logname)
412{
413	int len;
414	const char *zc = zonename;
415	const char *lc = logname;
416
417	/*
418	 * Compare the strings.  We bound the compare by MAX_ZONE_NAME.
419	 */
420
421	for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
422
423		/*
424		 * If the current characters don't match, check for a space in
425		 * in the zone name and a corresponding period in the log name.
426		 * If that's not there, then the strings don't match.
427		 */
428
429		if (*zc != *lc && !(*zc == ' ' && *lc == '.'))
430			break;
431
432		/*
433		 * The strings are equal so far.  If we're at the end, then it's a match.
434		 */
435
436		if (*zc == '\0')
437			return TRUE;
438	}
439
440	return FALSE;
441}
442
443
444/*
445 * Test if we want to log this zalloc/zfree event.  We log if this is the zone we're interested in and
446 * the buffer for the records has been allocated.
447 */
448
449#define DO_LOGGING(z)		(zrecords && (z) == zone_of_interest)
450
451extern boolean_t zlog_ready;
452
453
454/*
455 *	zinit initializes a new zone.  The zone data structures themselves
456 *	are stored in a zone, which is initially a static structure that
457 *	is initialized by zone_init.
458 */
459zone_t
460zinit(
461	vm_size_t	size,		/* the size of an element */
462	vm_size_t	max,		/* maximum memory to use */
463	vm_size_t	alloc,		/* allocation size */
464	const char	*name)		/* a name for the zone */
465{
466	zone_t		z;
467
468	if (zone_zone == ZONE_NULL) {
469		if (zget_space(sizeof(struct zone), (vm_offset_t *)&z)
470		    != KERN_SUCCESS)
471			return(ZONE_NULL);
472	} else
473		z = (zone_t) zalloc(zone_zone);
474	if (z == ZONE_NULL)
475		return(ZONE_NULL);
476
477	/*
478	 *	Round off all the parameters appropriately.
479	 */
480	if (size < sizeof(z->free_elements))
481		size = sizeof(z->free_elements);
482	size = ((size-1)  + sizeof(z->free_elements)) -
483		((size-1) % sizeof(z->free_elements));
484 	if (alloc == 0)
485		alloc = PAGE_SIZE;
486	alloc = round_page(alloc);
487	max   = round_page(max);
488	/*
489	 * we look for an allocation size with less than 1% waste
490	 * up to 5 pages in size...
491	 * otherwise, we look for an allocation size with least fragmentation
492	 * in the range of 1 - 5 pages
493	 * This size will be used unless
494	 * the user suggestion is larger AND has less fragmentation
495	 */
496#if	ZONE_ALIAS_ADDR
497	if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10))
498		alloc = PAGE_SIZE;
499	else
500#endif
501	{	vm_size_t best, waste; unsigned int i;
502		best  = PAGE_SIZE;
503		waste = best % size;
504
505		for (i = 1; i <= 5; i++) {
506		        vm_size_t tsize, twaste;
507
508			tsize = i * PAGE_SIZE;
509
510			if ((tsize % size) < (tsize / 100)) {
511			        alloc = tsize;
512				goto use_this_allocation;
513			}
514			twaste = tsize % size;
515			if (twaste < waste)
516				best = tsize, waste = twaste;
517		}
518		if (alloc <= best || (alloc % size >= waste))
519			alloc = best;
520	}
521use_this_allocation:
522	if (max && (max < alloc))
523		max = alloc;
524
525	z->free_elements = 0;
526	z->cur_size = 0;
527	z->max_size = max;
528	z->elem_size = size;
529	z->alloc_size = alloc;
530	z->zone_name = name;
531	z->count = 0;
532	z->doing_alloc = FALSE;
533	z->doing_gc = FALSE;
534	z->exhaustible = FALSE;
535	z->collectable = TRUE;
536	z->allows_foreign = FALSE;
537	z->expandable  = TRUE;
538	z->waiting = FALSE;
539	z->async_pending = FALSE;
540
541#if	ZONE_DEBUG
542	z->active_zones.next = z->active_zones.prev = NULL;
543	zone_debug_enable(z);
544#endif	/* ZONE_DEBUG */
545	lock_zone_init(z);
546
547	/*
548	 *	Add the zone to the all-zones list.
549	 */
550
551	z->next_zone = ZONE_NULL;
552	thread_call_setup(&z->call_async_alloc, zalloc_async, z);
553	simple_lock(&all_zones_lock);
554	*last_zone = z;
555	last_zone = &z->next_zone;
556	num_zones++;
557	simple_unlock(&all_zones_lock);
558
559	/*
560	 * Check if we should be logging this zone.  If so, remember the zone pointer.
561	 */
562
563	 if (log_this_zone(z->zone_name, zone_name_to_log)) {
564	 	zone_of_interest = z;
565	}
566
567	/*
568	 * If we want to log a zone, see if we need to allocate buffer space for the log.  Some vm related zones are
569	 * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case.  zlog_ready is set to
570	 * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work.  If we want to log one
571	 * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again
572	 * later on some other zone.  So note we may be allocating a buffer to log a zone other than the one being initialized
573	 * right now.
574	 */
575
576	if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) {
577		if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) {
578
579			/*
580			 * We got the memory for the log.  Zero it out since the code needs this to identify unused records.
581			 * At this point, everything is set up and we're ready to start logging this zone.
582			 */
583
584			bzero((void *)zrecords, log_records * sizeof(struct zrecord));
585			printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest);
586
587		} else {
588			printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
589			zone_of_interest = NULL;
590		}
591	}
592
593	return(z);
594}
595
596/*
597 *	Cram the given memory into the specified zone.
598 */
599void
600zcram(
601	register zone_t		zone,
602	void			*newaddr,
603	vm_size_t		size)
604{
605	register vm_size_t	elem_size;
606	vm_offset_t		newmem = (vm_offset_t) newaddr;
607
608	/* Basic sanity checks */
609	assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
610	assert(!zone->collectable || zone->allows_foreign
611		|| (from_zone_map(newmem, size)));
612
613	elem_size = zone->elem_size;
614
615	lock_zone(zone);
616	while (size >= elem_size) {
617		ADD_TO_ZONE(zone, newmem);
618		if (from_zone_map(newmem, elem_size))
619			zone_page_alloc(newmem, elem_size);
620		zone->count++;	/* compensate for ADD_TO_ZONE */
621		size -= elem_size;
622		newmem += elem_size;
623		zone->cur_size += elem_size;
624	}
625	unlock_zone(zone);
626}
627
628/*
629 * Contiguous space allocator for non-paged zones. Allocates "size" amount
630 * of memory from zone_map.
631 */
632
633kern_return_t
634zget_space(
635	vm_offset_t size,
636	vm_offset_t *result)
637{
638	vm_offset_t	new_space = 0;
639	vm_size_t	space_to_add = 0;
640
641	simple_lock(&zget_space_lock);
642	while ((zalloc_next_space + size) > zalloc_end_of_space) {
643		/*
644		 *	Add at least one page to allocation area.
645		 */
646
647		space_to_add = round_page(size);
648
649		if (new_space == 0) {
650			kern_return_t retval;
651			/*
652			 *	Memory cannot be wired down while holding
653			 *	any locks that the pageout daemon might
654			 *	need to free up pages.  [Making the zget_space
655			 *	lock a complex lock does not help in this
656			 *	regard.]
657			 *
658			 *	Unlock and allocate memory.  Because several
659			 *	threads might try to do this at once, don't
660			 *	use the memory before checking for available
661			 *	space again.
662			 */
663
664			simple_unlock(&zget_space_lock);
665
666			retval = kernel_memory_allocate(zone_map, &new_space,
667				space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
668			if (retval != KERN_SUCCESS)
669				return(retval);
670#if	ZONE_ALIAS_ADDR
671		 	if (space_to_add == PAGE_SIZE)
672				new_space = zone_alias_addr(new_space);
673#endif
674			zone_page_init(new_space, space_to_add,
675							ZONE_PAGE_USED);
676			simple_lock(&zget_space_lock);
677			continue;
678		}
679
680
681		/*
682	  	 *	Memory was allocated in a previous iteration.
683		 *
684		 *	Check whether the new region is contiguous
685		 *	with the old one.
686		 */
687
688		if (new_space != zalloc_end_of_space) {
689			/*
690			 *	Throw away the remainder of the
691			 *	old space, and start a new one.
692			 */
693			zalloc_wasted_space +=
694				zalloc_end_of_space - zalloc_next_space;
695			zalloc_next_space = new_space;
696		}
697
698		zalloc_end_of_space = new_space + space_to_add;
699
700		new_space = 0;
701	}
702	*result = zalloc_next_space;
703	zalloc_next_space += size;
704	simple_unlock(&zget_space_lock);
705
706	if (new_space != 0)
707		kmem_free(zone_map, new_space, space_to_add);
708
709	return(KERN_SUCCESS);
710}
711
712
713/*
714 *	Steal memory for the zone package.  Called from
715 *	vm_page_bootstrap().
716 */
717void
718zone_steal_memory(void)
719{
720	zdata_size = round_page(128*sizeof(struct zone));
721	zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
722}
723
724
725/*
726 * Fill a zone with enough memory to contain at least nelem elements.
727 * Memory is obtained with kmem_alloc_wired from the kernel_map.
728 * Return the number of elements actually put into the zone, which may
729 * be more than the caller asked for since the memory allocation is
730 * rounded up to a full page.
731 */
732int
733zfill(
734	zone_t	zone,
735	int	nelem)
736{
737	kern_return_t	kr;
738	vm_size_t	size;
739	vm_offset_t	memory;
740	int		nalloc;
741
742	assert(nelem > 0);
743	if (nelem <= 0)
744		return 0;
745	size = nelem * zone->elem_size;
746	size = round_page(size);
747	kr = kmem_alloc_wired(kernel_map, &memory, size);
748	if (kr != KERN_SUCCESS)
749		return 0;
750
751	zone_change(zone, Z_FOREIGN, TRUE);
752	zcram(zone, (void *)memory, size);
753	nalloc = size / zone->elem_size;
754	assert(nalloc >= nelem);
755
756	return nalloc;
757}
758
759/*
760 *	Initialize the "zone of zones" which uses fixed memory allocated
761 *	earlier in memory initialization.  zone_bootstrap is called
762 *	before zone_init.
763 */
764void
765zone_bootstrap(void)
766{
767	vm_size_t zone_zone_size;
768	vm_offset_t zone_zone_space;
769	char temp_buf[16];
770
771	/* see if we want freed zone element checking and/or poisoning */
772	if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
773		check_freed_element = TRUE;
774	}
775
776	if (PE_parse_boot_argn("-zp", temp_buf, sizeof (temp_buf))) {
777		zfree_clear = TRUE;
778	}
779
780	/*
781	 * Check for and set up zone leak detection if requested via boot-args.  We recognized two
782	 * boot-args:
783	 *
784	 *	zlog=<zone_to_log>
785	 *	zrecs=<num_records_in_log>
786	 *
787	 * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
788	 * control the size of the log.  If zrecs is not specified, a default value is used.
789	 */
790
791	if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
792		if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
793
794			/*
795			 * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
796			 * This prevents accidentally hogging too much kernel memory and making the system
797			 * unusable.
798			 */
799
800			log_records = MIN(ZRECORDS_MAX, log_records);
801
802		} else {
803			log_records = ZRECORDS_DEFAULT;
804		}
805	}
806
807	simple_lock_init(&all_zones_lock, 0);
808
809	first_zone = ZONE_NULL;
810	last_zone = &first_zone;
811	num_zones = 0;
812
813	simple_lock_init(&zget_space_lock, 0);
814	zalloc_next_space = zdata;
815	zalloc_end_of_space = zdata + zdata_size;
816	zalloc_wasted_space = 0;
817
818	/* assertion: nobody else called zinit before us */
819	assert(zone_zone == ZONE_NULL);
820	zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
821			  sizeof(struct zone), "zones");
822	zone_change(zone_zone, Z_COLLECT, FALSE);
823	zone_zone_size = zalloc_end_of_space - zalloc_next_space;
824	zget_space(zone_zone_size, &zone_zone_space);
825	zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
826}
827
828void
829zone_init(
830	vm_size_t max_zonemap_size)
831{
832	kern_return_t	retval;
833	vm_offset_t	zone_min;
834	vm_offset_t	zone_max;
835	vm_size_t	zone_table_size;
836
837	retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
838						FALSE, VM_FLAGS_ANYWHERE, &zone_map);
839
840	if (retval != KERN_SUCCESS)
841		panic("zone_init: kmem_suballoc failed");
842	zone_max = zone_min + round_page(max_zonemap_size);
843	/*
844	 * Setup garbage collection information:
845	 */
846	zone_table_size = atop_32(zone_max - zone_min) *
847				sizeof(struct zone_page_table_entry);
848	if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
849			     zone_table_size) != KERN_SUCCESS)
850		panic("zone_init");
851	zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
852	zone_pages = atop_32(zone_max - zone_min);
853	zone_map_min_address = zone_min;
854	zone_map_max_address = zone_max;
855	mutex_init(&zone_gc_lock, 0);
856	zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
857}
858
859
860/*
861 *	zalloc returns an element from the specified zone.
862 */
863void *
864zalloc_canblock(
865	register zone_t	zone,
866	boolean_t canblock)
867{
868	vm_offset_t	addr;
869	kern_return_t retval;
870	void	  	*bt[MAX_DEPTH];		/* only used if zone logging is enabled */
871	int 		numsaved = 0;
872	int		i;
873
874	assert(zone != ZONE_NULL);
875
876	/*
877	 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
878	 */
879
880	if (DO_LOGGING(zone))
881	        numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
882
883	lock_zone(zone);
884
885	REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
886
887	while ((addr == 0) && canblock && (zone->doing_gc)) {
888		zone->waiting = TRUE;
889		zone_sleep(zone);
890		REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
891	}
892
893	while ((addr == 0) && canblock) {
894		/*
895 		 *	If nothing was there, try to get more
896		 */
897		if (zone->doing_alloc) {
898			/*
899			 *	Someone is allocating memory for this zone.
900			 *	Wait for it to show up, then try again.
901			 */
902			zone->waiting = TRUE;
903			zone_sleep(zone);
904		}
905		else {
906			if ((zone->cur_size + zone->elem_size) >
907			    zone->max_size) {
908				if (zone->exhaustible)
909					break;
910				if (zone->expandable) {
911					/*
912					 * We're willing to overflow certain
913					 * zones, but not without complaining.
914					 *
915					 * This is best used in conjunction
916					 * with the collectable flag. What we
917					 * want is an assurance we can get the
918					 * memory back, assuming there's no
919					 * leak.
920					 */
921					zone->max_size += (zone->max_size >> 1);
922				} else {
923					unlock_zone(zone);
924
925					panic("zalloc: zone \"%s\" empty.", zone->zone_name);
926				}
927			}
928			zone->doing_alloc = TRUE;
929			unlock_zone(zone);
930
931			if (zone->collectable) {
932				vm_offset_t space;
933				vm_size_t alloc_size;
934				int retry = 0;
935
936				for (;;) {
937
938				        if (vm_pool_low() || retry >= 1)
939					        alloc_size =
940						  round_page(zone->elem_size);
941					else
942					        alloc_size = zone->alloc_size;
943
944					retval = kernel_memory_allocate(zone_map,
945									&space, alloc_size, 0,
946									KMA_KOBJECT|KMA_NOPAGEWAIT);
947					if (retval == KERN_SUCCESS) {
948#if	ZONE_ALIAS_ADDR
949						if (alloc_size == PAGE_SIZE)
950							space = zone_alias_addr(space);
951#endif
952					        zone_page_init(space, alloc_size,
953							       ZONE_PAGE_USED);
954						zcram(zone, (void *)space, alloc_size);
955
956						break;
957					} else if (retval != KERN_RESOURCE_SHORTAGE) {
958						retry++;
959
960						if (retry == 2) {
961							zone_gc();
962							printf("zalloc did gc\n");
963						}
964					        if (retry == 3) {
965							panic_include_zprint = TRUE;
966						        panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
967						}
968					} else {
969					        break;
970					}
971				}
972				lock_zone(zone);
973				zone->doing_alloc = FALSE;
974				if (zone->waiting) {
975					zone->waiting = FALSE;
976					zone_wakeup(zone);
977				}
978				REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
979				if (addr == 0 &&
980					retval == KERN_RESOURCE_SHORTAGE) {
981					unlock_zone(zone);
982
983					VM_PAGE_WAIT();
984					lock_zone(zone);
985				}
986			} else {
987				vm_offset_t space;
988				retval = zget_space(zone->elem_size, &space);
989
990				lock_zone(zone);
991				zone->doing_alloc = FALSE;
992				if (zone->waiting) {
993					zone->waiting = FALSE;
994					thread_wakeup((event_t)zone);
995				}
996				if (retval == KERN_SUCCESS) {
997					zone->count++;
998					zone->cur_size += zone->elem_size;
999#if	ZONE_DEBUG
1000					if (zone_debug_enabled(zone)) {
1001					    enqueue_tail(&zone->active_zones, (queue_entry_t)space);
1002					}
1003#endif
1004					unlock_zone(zone);
1005					zone_page_alloc(space, zone->elem_size);
1006#if	ZONE_DEBUG
1007					if (zone_debug_enabled(zone))
1008						space += ZONE_DEBUG_OFFSET;
1009#endif
1010					addr = space;
1011					goto success;
1012				}
1013				if (retval == KERN_RESOURCE_SHORTAGE) {
1014					unlock_zone(zone);
1015
1016					VM_PAGE_WAIT();
1017					lock_zone(zone);
1018				} else {
1019					panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
1020				}
1021			}
1022		}
1023		if (addr == 0)
1024			REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1025	}
1026
1027	/*
1028	 * See if we should be logging allocations in this zone.  Logging is rarely done except when a leak is
1029	 * suspected, so this code rarely executes.  We need to do this code while still holding the zone lock
1030	 * since it protects the various log related data structures.
1031	 */
1032
1033	if (DO_LOGGING(zone) && addr) {
1034
1035		/*
1036		 * Look for a place to record this new allocation.  We implement two different logging strategies
1037		 * depending on whether we're looking for the source of a zone leak or a zone corruption.  When looking
1038		 * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker
1039		 * among all the records.  So we look for an unused slot in the log and fill that in before overwriting
1040		 * an old entry.  When looking for a corrution however, it's better to have a chronological log of all
1041		 * the allocations and frees done in the zone so that the history of operations for a specific zone
1042		 * element can be inspected.  So in this case, we treat the log as a circular buffer and overwrite the
1043		 * oldest entry whenever a new one needs to be added.
1044		 *
1045		 * The check_freed_element flag tells us what style of logging to do.  It's set if we're supposed to be
1046		 * doing corruption style logging (indicated via -zc in the boot-args).
1047		 */
1048
1049		if (!check_freed_element && zrecords[zcurrent].z_element && zrecorded < log_records) {
1050
1051			/*
1052			 * If we get here, we're doing leak style logging and there's still some unused entries in
1053			 * the log (since zrecorded is smaller than the size of the log).  Look for an unused slot
1054			 * starting at zcurrent and wrap-around if we reach the end of the buffer.  If the buffer
1055			 * is already full, we just fall through and overwrite the element indexed by zcurrent.
1056		 	 */
1057
1058		       for (i = zcurrent; i < log_records; i++) {
1059			        if (zrecords[i].z_element == NULL) {
1060				        zcurrent = i;
1061				        goto empty_slot;
1062				}
1063			}
1064
1065			for (i = 0; i < zcurrent; i++) {
1066			        if (zrecords[i].z_element == NULL) {
1067				        zcurrent = i;
1068				        goto empty_slot;
1069				}
1070			}
1071		 }
1072
1073		/*
1074		 * Save a record of this allocation
1075		 */
1076
1077empty_slot:
1078		  if (zrecords[zcurrent].z_element == NULL)
1079		        zrecorded++;
1080
1081		  zrecords[zcurrent].z_element = (void *)addr;
1082		  zrecords[zcurrent].z_time = ztime++;
1083		  zrecords[zcurrent].z_opcode = ZOP_ALLOC;
1084
1085		  for (i = 0; i < numsaved; i++)
1086		        zrecords[zcurrent].z_pc[i] = bt[i];
1087
1088		  for (; i < MAX_DEPTH; i++)
1089			zrecords[zcurrent].z_pc[i] = 0;
1090
1091		  zcurrent++;
1092
1093		  if (zcurrent >= log_records)
1094		          zcurrent = 0;
1095	}
1096
1097	if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
1098		zone->async_pending = TRUE;
1099		unlock_zone(zone);
1100		thread_call_enter(&zone->call_async_alloc);
1101		lock_zone(zone);
1102		REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1103	}
1104
1105#if	ZONE_DEBUG
1106	if (addr && zone_debug_enabled(zone)) {
1107		enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
1108		addr += ZONE_DEBUG_OFFSET;
1109	}
1110#endif
1111
1112	unlock_zone(zone);
1113
1114success:
1115	TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
1116
1117	return((void *)addr);
1118}
1119
1120
1121void *
1122zalloc(
1123       register zone_t zone)
1124{
1125  return( zalloc_canblock(zone, TRUE) );
1126}
1127
1128void *
1129zalloc_noblock(
1130	       register zone_t zone)
1131{
1132  return( zalloc_canblock(zone, FALSE) );
1133}
1134
1135void
1136zalloc_async(
1137	thread_call_param_t          p0,
1138	__unused thread_call_param_t p1)
1139{
1140	void *elt;
1141
1142	elt = zalloc_canblock((zone_t)p0, TRUE);
1143	zfree((zone_t)p0, elt);
1144	lock_zone(((zone_t)p0));
1145	((zone_t)p0)->async_pending = FALSE;
1146	unlock_zone(((zone_t)p0));
1147}
1148
1149
1150/*
1151 *	zget returns an element from the specified zone
1152 *	and immediately returns nothing if there is nothing there.
1153 *
1154 *	This form should be used when you can not block (like when
1155 *	processing an interrupt).
1156 */
1157void *
1158zget(
1159	register zone_t	zone)
1160{
1161	register vm_offset_t	addr;
1162
1163	assert( zone != ZONE_NULL );
1164
1165	if (!lock_try_zone(zone))
1166		return NULL;
1167
1168	REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
1169#if	ZONE_DEBUG
1170	if (addr && zone_debug_enabled(zone)) {
1171		enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
1172		addr += ZONE_DEBUG_OFFSET;
1173	}
1174#endif	/* ZONE_DEBUG */
1175	unlock_zone(zone);
1176
1177	return((void *) addr);
1178}
1179
1180/* Keep this FALSE by default.  Large memory machine run orders of magnitude
1181   slower in debug mode when true.  Use debugger to enable if needed */
1182/* static */ boolean_t zone_check = FALSE;
1183
1184static zone_t zone_last_bogus_zone = ZONE_NULL;
1185static vm_offset_t zone_last_bogus_elem = 0;
1186
1187void
1188zfree(
1189	register zone_t	zone,
1190	void 		*addr)
1191{
1192	vm_offset_t	elem = (vm_offset_t) addr;
1193	void		*bt[MAX_DEPTH];			/* only used if zone logging is enable via boot-args */
1194	int		numsaved = 0;
1195
1196	assert(zone != ZONE_NULL);
1197
1198	/*
1199	 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
1200	 */
1201
1202	if (DO_LOGGING(zone))
1203		numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
1204
1205#if MACH_ASSERT
1206	/* Basic sanity checks */
1207	if (zone == ZONE_NULL || elem == (vm_offset_t)0)
1208		panic("zfree: NULL");
1209	/* zone_gc assumes zones are never freed */
1210	if (zone == zone_zone)
1211		panic("zfree: freeing to zone_zone breaks zone_gc!");
1212#endif
1213
1214	TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (int)addr);
1215
1216	if (zone->collectable && !zone->allows_foreign &&
1217	    !from_zone_map(elem, zone->elem_size)) {
1218#if MACH_ASSERT
1219		panic("zfree: non-allocated memory in collectable zone!");
1220#endif
1221		zone_last_bogus_zone = zone;
1222		zone_last_bogus_elem = elem;
1223		return;
1224	}
1225
1226	lock_zone(zone);
1227
1228	/*
1229	 * See if we're doing logging on this zone.  There are two styles of logging used depending on
1230	 * whether we're trying to catch a leak or corruption.  See comments above in zalloc for details.
1231	 */
1232
1233	if (DO_LOGGING(zone)) {
1234	        int  i;
1235
1236		if (check_freed_element) {
1237
1238			/*
1239			 * We're logging to catch a corruption.  Add a record of this zfree operation
1240			 * to log.
1241			 */
1242
1243			if (zrecords[zcurrent].z_element == NULL)
1244				zrecorded++;
1245
1246			zrecords[zcurrent].z_element = (void *)addr;
1247			zrecords[zcurrent].z_time = ztime++;
1248			zrecords[zcurrent].z_opcode = ZOP_FREE;
1249
1250			for (i = 0; i < numsaved; i++)
1251				zrecords[zcurrent].z_pc[i] = bt[i];
1252
1253			for (; i < MAX_DEPTH; i++)
1254				zrecords[zcurrent].z_pc[i] = 0;
1255
1256			zcurrent++;
1257
1258			if (zcurrent >= log_records)
1259				zcurrent = 0;
1260
1261		} else {
1262
1263			/*
1264			 * We're logging to catch a leak. Remove any record we might have for this
1265			 * element since it's being freed.  Note that we may not find it if the buffer
1266			 * overflowed and that's OK.  Since the log is of a limited size, old records
1267			 * get overwritten if there are more zallocs than zfrees.
1268			 */
1269
1270		        for (i = 0; i < log_records; i++) {
1271			        if (zrecords[i].z_element == addr) {
1272				        zrecords[i].z_element = NULL;
1273					zcurrent = i;
1274					zrecorded--;
1275					break;
1276				}
1277			}
1278		}
1279	}
1280
1281
1282#if	ZONE_DEBUG
1283	if (zone_debug_enabled(zone)) {
1284		queue_t tmp_elem;
1285
1286		elem -= ZONE_DEBUG_OFFSET;
1287		if (zone_check) {
1288			/* check the zone's consistency */
1289
1290			for (tmp_elem = queue_first(&zone->active_zones);
1291			     !queue_end(tmp_elem, &zone->active_zones);
1292			     tmp_elem = queue_next(tmp_elem))
1293				if (elem == (vm_offset_t)tmp_elem)
1294					break;
1295			if (elem != (vm_offset_t)tmp_elem)
1296				panic("zfree()ing element from wrong zone");
1297		}
1298		remqueue(&zone->active_zones, (queue_t) elem);
1299	}
1300#endif	/* ZONE_DEBUG */
1301	if (zone_check) {
1302		vm_offset_t this;
1303
1304		/* check the zone's consistency */
1305
1306		for (this = zone->free_elements;
1307		     this != 0;
1308		     this = * (vm_offset_t *) this)
1309			if (!pmap_kernel_va(this) || this == elem)
1310				panic("zfree");
1311	}
1312	ADD_TO_ZONE(zone, elem);
1313
1314	/*
1315	 * If elements have one or more pages, and memory is low,
1316	 * request to run the garbage collection in the zone  the next
1317	 * time the pageout thread runs.
1318	 */
1319	if (zone->elem_size >= PAGE_SIZE &&
1320	    vm_pool_low()){
1321		zone_gc_forced = TRUE;
1322	}
1323	unlock_zone(zone);
1324}
1325
1326
1327/*	Change a zone's flags.
1328 *	This routine must be called immediately after zinit.
1329 */
1330void
1331zone_change(
1332	zone_t		zone,
1333	unsigned int	item,
1334	boolean_t	value)
1335{
1336	assert( zone != ZONE_NULL );
1337	assert( value == TRUE || value == FALSE );
1338
1339	switch(item){
1340		case Z_EXHAUST:
1341			zone->exhaustible = value;
1342			break;
1343		case Z_COLLECT:
1344			zone->collectable = value;
1345			break;
1346		case Z_EXPAND:
1347			zone->expandable = value;
1348			break;
1349		case Z_FOREIGN:
1350			zone->allows_foreign = value;
1351			break;
1352#if MACH_ASSERT
1353		default:
1354			panic("Zone_change: Wrong Item Type!");
1355			/* break; */
1356#endif
1357	}
1358}
1359
1360/*
1361 * Return the expected number of free elements in the zone.
1362 * This calculation will be incorrect if items are zfree'd that
1363 * were never zalloc'd/zget'd. The correct way to stuff memory
1364 * into a zone is by zcram.
1365 */
1366
1367integer_t
1368zone_free_count(zone_t zone)
1369{
1370	integer_t free_count;
1371
1372	lock_zone(zone);
1373	free_count = zone->cur_size/zone->elem_size - zone->count;
1374	unlock_zone(zone);
1375
1376	assert(free_count >= 0);
1377
1378	return(free_count);
1379}
1380
1381/*
1382 *	zprealloc preallocates wired memory, exanding the specified
1383 *      zone to the specified size
1384 */
1385void
1386zprealloc(
1387	zone_t	zone,
1388	vm_size_t size)
1389{
1390        vm_offset_t addr;
1391
1392	if (size != 0) {
1393		if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS)
1394		  panic("zprealloc");
1395		zone_page_init(addr, size, ZONE_PAGE_USED);
1396		zcram(zone, (void *)addr, size);
1397	}
1398}
1399
1400/*
1401 *  Zone garbage collection subroutines
1402 */
1403
1404boolean_t
1405zone_page_collectable(
1406	vm_offset_t	addr,
1407	vm_size_t	size)
1408{
1409	struct zone_page_table_entry	*zp;
1410	natural_t i, j;
1411
1412#if	ZONE_ALIAS_ADDR
1413	addr = zone_virtual_addr(addr);
1414#endif
1415#if MACH_ASSERT
1416	if (!from_zone_map(addr, size))
1417		panic("zone_page_collectable");
1418#endif
1419
1420	i = atop_32(addr-zone_map_min_address);
1421	j = atop_32((addr+size-1) - zone_map_min_address);
1422
1423	for (zp = zone_page_table + i; i <= j; zp++, i++)
1424		if (zp->collect_count == zp->alloc_count)
1425			return (TRUE);
1426
1427	return (FALSE);
1428}
1429
1430void
1431zone_page_keep(
1432	vm_offset_t	addr,
1433	vm_size_t	size)
1434{
1435	struct zone_page_table_entry	*zp;
1436	natural_t i, j;
1437
1438#if	ZONE_ALIAS_ADDR
1439	addr = zone_virtual_addr(addr);
1440#endif
1441#if MACH_ASSERT
1442	if (!from_zone_map(addr, size))
1443		panic("zone_page_keep");
1444#endif
1445
1446	i = atop_32(addr-zone_map_min_address);
1447	j = atop_32((addr+size-1) - zone_map_min_address);
1448
1449	for (zp = zone_page_table + i; i <= j; zp++, i++)
1450		zp->collect_count = 0;
1451}
1452
1453void
1454zone_page_collect(
1455	vm_offset_t	addr,
1456	vm_size_t	size)
1457{
1458	struct zone_page_table_entry	*zp;
1459	natural_t i, j;
1460
1461#if	ZONE_ALIAS_ADDR
1462	addr = zone_virtual_addr(addr);
1463#endif
1464#if MACH_ASSERT
1465	if (!from_zone_map(addr, size))
1466		panic("zone_page_collect");
1467#endif
1468
1469	i = atop_32(addr-zone_map_min_address);
1470	j = atop_32((addr+size-1) - zone_map_min_address);
1471
1472	for (zp = zone_page_table + i; i <= j; zp++, i++)
1473		++zp->collect_count;
1474}
1475
1476void
1477zone_page_init(
1478	vm_offset_t	addr,
1479	vm_size_t	size,
1480	int		value)
1481{
1482	struct zone_page_table_entry	*zp;
1483	natural_t i, j;
1484
1485#if	ZONE_ALIAS_ADDR
1486	addr = zone_virtual_addr(addr);
1487#endif
1488#if MACH_ASSERT
1489	if (!from_zone_map(addr, size))
1490		panic("zone_page_init");
1491#endif
1492
1493	i = atop_32(addr-zone_map_min_address);
1494	j = atop_32((addr+size-1) - zone_map_min_address);
1495
1496	for (zp = zone_page_table + i; i <= j; zp++, i++) {
1497		zp->alloc_count = value;
1498		zp->collect_count = 0;
1499	}
1500}
1501
1502void
1503zone_page_alloc(
1504	vm_offset_t	addr,
1505	vm_size_t	size)
1506{
1507	struct zone_page_table_entry	*zp;
1508	natural_t i, j;
1509
1510#if	ZONE_ALIAS_ADDR
1511	addr = zone_virtual_addr(addr);
1512#endif
1513#if MACH_ASSERT
1514	if (!from_zone_map(addr, size))
1515		panic("zone_page_alloc");
1516#endif
1517
1518	i = atop_32(addr-zone_map_min_address);
1519	j = atop_32((addr+size-1) - zone_map_min_address);
1520
1521	for (zp = zone_page_table + i; i <= j; zp++, i++) {
1522		/*
1523		 * Set alloc_count to (ZONE_PAGE_USED + 1) if
1524		 * it was previously set to ZONE_PAGE_UNUSED.
1525		 */
1526		if (zp->alloc_count == ZONE_PAGE_UNUSED)
1527			zp->alloc_count = 1;
1528		else
1529			++zp->alloc_count;
1530	}
1531}
1532
1533void
1534zone_page_free_element(
1535	struct zone_page_table_entry	**free_pages,
1536	vm_offset_t	addr,
1537	vm_size_t	size)
1538{
1539	struct zone_page_table_entry	*zp;
1540	natural_t i, j;
1541
1542#if	ZONE_ALIAS_ADDR
1543	addr = zone_virtual_addr(addr);
1544#endif
1545#if MACH_ASSERT
1546	if (!from_zone_map(addr, size))
1547		panic("zone_page_free_element");
1548#endif
1549
1550	i = atop_32(addr-zone_map_min_address);
1551	j = atop_32((addr+size-1) - zone_map_min_address);
1552
1553	for (zp = zone_page_table + i; i <= j; zp++, i++) {
1554		if (zp->collect_count > 0)
1555			--zp->collect_count;
1556		if (--zp->alloc_count == 0) {
1557			zp->alloc_count  = ZONE_PAGE_UNUSED;
1558			zp->collect_count = 0;
1559
1560			zp->link = *free_pages;
1561			*free_pages = zp;
1562		}
1563	}
1564}
1565
1566
1567/* This is used for walking through a zone's free element list.
1568 */
1569struct zone_free_element {
1570	struct zone_free_element * next;
1571};
1572
1573/*
1574 * Add a linked list of pages starting at base back into the zone
1575 * free list. Tail points to the last element on the list.
1576 */
1577
1578#define ADD_LIST_TO_ZONE(zone, base, tail)				\
1579MACRO_BEGIN								\
1580	(tail)->next = (void *)((zone)->free_elements);			\
1581	if (check_freed_element) {					\
1582		if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)))	\
1583			((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1584                                        (zone)->free_elements;		\
1585	}								\
1586	(zone)->free_elements = (unsigned long)(base);			\
1587MACRO_END
1588
1589/*
1590 * Add an element to the chain pointed to by prev.
1591 */
1592
1593#define ADD_ELEMENT(zone, prev, elem)						\
1594MACRO_BEGIN								\
1595	(prev)->next = (elem);						\
1596	if (check_freed_element) {					\
1597		if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)))     \
1598			((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
1599					(vm_offset_t)(elem); 		\
1600        }								\
1601MACRO_END
1602
1603struct {
1604	uint32_t	pgs_freed;
1605
1606	uint32_t	elems_collected,
1607				elems_freed,
1608				elems_kept;
1609} zgc_stats;
1610
1611/*	Zone garbage collection
1612 *
1613 *	zone_gc will walk through all the free elements in all the
1614 *	zones that are marked collectable looking for reclaimable
1615 *	pages.  zone_gc is called by consider_zone_gc when the system
1616 *	begins to run out of memory.
1617 */
1618void
1619zone_gc(void)
1620{
1621	unsigned int	max_zones;
1622	zone_t			z;
1623	unsigned int	i;
1624	struct zone_page_table_entry	*zp, *zone_free_pages;
1625
1626	mutex_lock(&zone_gc_lock);
1627
1628	simple_lock(&all_zones_lock);
1629	max_zones = num_zones;
1630	z = first_zone;
1631	simple_unlock(&all_zones_lock);
1632
1633#if MACH_ASSERT
1634	for (i = 0; i < zone_pages; i++)
1635		assert(zone_page_table[i].collect_count == 0);
1636#endif /* MACH_ASSERT */
1637
1638	zone_free_pages = NULL;
1639
1640	for (i = 0; i < max_zones; i++, z = z->next_zone) {
1641		unsigned int				n, m;
1642		vm_size_t					elt_size, size_freed;
1643		struct zone_free_element	*elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
1644
1645		assert(z != ZONE_NULL);
1646
1647		if (!z->collectable)
1648			continue;
1649
1650		lock_zone(z);
1651
1652		elt_size = z->elem_size;
1653
1654		/*
1655		 * Do a quick feasability check before we scan the zone:
1656		 * skip unless there is likelihood of getting pages back
1657		 * (i.e we need a whole allocation block's worth of free
1658		 * elements before we can garbage collect) and
1659		 * the zone has more than 10 percent of it's elements free
1660		 * or the element size is a multiple of the PAGE_SIZE
1661		 */
1662		if ((elt_size & PAGE_MASK) &&
1663		     (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) ||
1664		      ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) {
1665			unlock_zone(z);
1666			continue;
1667		}
1668
1669		z->doing_gc = TRUE;
1670
1671		/*
1672		 * Snatch all of the free elements away from the zone.
1673		 */
1674
1675		scan = (void *)z->free_elements;
1676		z->free_elements = 0;
1677
1678		unlock_zone(z);
1679
1680		/*
1681		 * Pass 1:
1682		 *
1683		 * Determine which elements we can attempt to collect
1684		 * and count them up in the page table.  Foreign elements
1685		 * are returned to the zone.
1686		 */
1687
1688		prev = (void *)&scan;
1689		elt = scan;
1690		n = 0; tail = keep = NULL;
1691		while (elt != NULL) {
1692			if (from_zone_map(elt, elt_size)) {
1693				zone_page_collect((vm_offset_t)elt, elt_size);
1694
1695				prev = elt;
1696				elt = elt->next;
1697
1698				++zgc_stats.elems_collected;
1699			}
1700			else {
1701				if (keep == NULL)
1702					keep = tail = elt;
1703				else {
1704					ADD_ELEMENT(z, tail, elt);
1705					tail = elt;
1706				}
1707
1708				ADD_ELEMENT(z, prev, elt->next);
1709				elt = elt->next;
1710				ADD_ELEMENT(z, tail, NULL);
1711			}
1712
1713			/*
1714			 * Dribble back the elements we are keeping.
1715			 */
1716
1717			if (++n >= 50) {
1718				if (z->waiting == TRUE) {
1719					lock_zone(z);
1720
1721					if (keep != NULL) {
1722						ADD_LIST_TO_ZONE(z, keep, tail);
1723						tail = keep = NULL;
1724					} else {
1725						m =0;
1726						base_elt = elt;
1727						base_prev = prev;
1728						while ((elt != NULL) && (++m < 50)) {
1729							prev = elt;
1730							elt = elt->next;
1731						}
1732						if (m !=0 ) {
1733							ADD_LIST_TO_ZONE(z, base_elt, prev);
1734							ADD_ELEMENT(z, base_prev, elt);
1735							prev = base_prev;
1736						}
1737					}
1738
1739					if (z->waiting) {
1740						z->waiting = FALSE;
1741						zone_wakeup(z);
1742					}
1743
1744					unlock_zone(z);
1745				}
1746				n =0;
1747			}
1748		}
1749
1750		/*
1751		 * Return any remaining elements.
1752		 */
1753
1754		if (keep != NULL) {
1755			lock_zone(z);
1756
1757			ADD_LIST_TO_ZONE(z, keep, tail);
1758
1759			unlock_zone(z);
1760		}
1761
1762		/*
1763		 * Pass 2:
1764		 *
1765		 * Determine which pages we can reclaim and
1766		 * free those elements.
1767		 */
1768
1769		size_freed = 0;
1770		elt = scan;
1771		n = 0; tail = keep = NULL;
1772		while (elt != NULL) {
1773			if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
1774				size_freed += elt_size;
1775				zone_page_free_element(&zone_free_pages,
1776										(vm_offset_t)elt, elt_size);
1777
1778				elt = elt->next;
1779
1780				++zgc_stats.elems_freed;
1781			}
1782			else {
1783				zone_page_keep((vm_offset_t)elt, elt_size);
1784
1785				if (keep == NULL)
1786					keep = tail = elt;
1787				else {
1788					ADD_ELEMENT(z, tail, elt);
1789					tail = elt;
1790				}
1791
1792				elt = elt->next;
1793				ADD_ELEMENT(z, tail, NULL);
1794
1795				++zgc_stats.elems_kept;
1796			}
1797
1798			/*
1799			 * Dribble back the elements we are keeping,
1800			 * and update the zone size info.
1801			 */
1802
1803			if (++n >= 50) {
1804				lock_zone(z);
1805
1806				z->cur_size -= size_freed;
1807				size_freed = 0;
1808
1809				if (keep != NULL) {
1810					ADD_LIST_TO_ZONE(z, keep, tail);
1811				}
1812
1813				if (z->waiting) {
1814					z->waiting = FALSE;
1815					zone_wakeup(z);
1816				}
1817
1818				unlock_zone(z);
1819
1820				n = 0; tail = keep = NULL;
1821			}
1822		}
1823
1824		/*
1825		 * Return any remaining elements, and update
1826		 * the zone size info.
1827		 */
1828
1829		lock_zone(z);
1830
1831		if (size_freed > 0 || keep != NULL) {
1832
1833			z->cur_size -= size_freed;
1834
1835			if (keep != NULL) {
1836				ADD_LIST_TO_ZONE(z, keep, tail);
1837			}
1838
1839		}
1840
1841		z->doing_gc = FALSE;
1842		if (z->waiting) {
1843			z->waiting = FALSE;
1844			zone_wakeup(z);
1845		}
1846		unlock_zone(z);
1847	}
1848
1849	/*
1850	 * Reclaim the pages we are freeing.
1851	 */
1852
1853	while ((zp = zone_free_pages) != NULL) {
1854		zone_free_pages = zp->link;
1855#if	ZONE_ALIAS_ADDR
1856		z = zone_virtual_addr((vm_map_address_t)z);
1857#endif
1858		kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
1859										(zp - zone_page_table), PAGE_SIZE);
1860		++zgc_stats.pgs_freed;
1861	}
1862
1863	mutex_unlock(&zone_gc_lock);
1864}
1865
1866/*
1867 *	consider_zone_gc:
1868 *
1869 *	Called by the pageout daemon when the system needs more free pages.
1870 */
1871
1872void
1873consider_zone_gc(void)
1874{
1875	/*
1876	 *	By default, don't attempt zone GC more frequently
1877	 *	than once / 1 minutes.
1878	 */
1879
1880	if (zone_gc_max_rate == 0)
1881		zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
1882
1883	if (zone_gc_allowed &&
1884	    ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
1885	     zone_gc_forced)) {
1886		zone_gc_forced = FALSE;
1887		zone_gc_last_tick = sched_tick;
1888		zone_gc();
1889	}
1890}
1891
1892struct fake_zone_info {
1893	const char* name;
1894	void (*func)(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
1895		    int *, int *);
1896};
1897
1898static struct fake_zone_info fake_zones[] = {
1899	{
1900		.name = "kernel_stacks",
1901		.func = stack_fake_zone_info,
1902	},
1903#ifdef ppc
1904	{
1905		.name = "save_areas",
1906		.func = save_fake_zone_info,
1907	},
1908	{
1909		.name = "pmap_mappings",
1910		.func = mapping_fake_zone_info,
1911	},
1912#endif /* ppc */
1913#ifdef i386
1914	{
1915		.name = "page_tables",
1916		.func = pt_fake_zone_info,
1917	},
1918#endif /* i386 */
1919	{
1920		.name = "kalloc.large",
1921		.func = kalloc_fake_zone_info,
1922	},
1923};
1924
1925kern_return_t
1926host_zone_info(
1927	host_t			host,
1928	zone_name_array_t	*namesp,
1929	mach_msg_type_number_t  *namesCntp,
1930	zone_info_array_t	*infop,
1931	mach_msg_type_number_t  *infoCntp)
1932{
1933	zone_name_t	*names;
1934	vm_offset_t	names_addr;
1935	vm_size_t	names_size;
1936	zone_info_t	*info;
1937	vm_offset_t	info_addr;
1938	vm_size_t	info_size;
1939	unsigned int	max_zones, i;
1940	zone_t		z;
1941	zone_name_t    *zn;
1942	zone_info_t    *zi;
1943	kern_return_t	kr;
1944	size_t		num_fake_zones;
1945
1946	if (host == HOST_NULL)
1947		return KERN_INVALID_HOST;
1948
1949	num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
1950
1951	/*
1952	 *	We assume that zones aren't freed once allocated.
1953	 *	We won't pick up any zones that are allocated later.
1954	 */
1955
1956	simple_lock(&all_zones_lock);
1957	max_zones = num_zones + num_fake_zones;
1958	z = first_zone;
1959	simple_unlock(&all_zones_lock);
1960
1961	if (max_zones <= *namesCntp) {
1962		/* use in-line memory */
1963		names_size = *namesCntp * sizeof *names;
1964		names = *namesp;
1965	} else {
1966		names_size = round_page(max_zones * sizeof *names);
1967		kr = kmem_alloc_pageable(ipc_kernel_map,
1968					 &names_addr, names_size);
1969		if (kr != KERN_SUCCESS)
1970			return kr;
1971		names = (zone_name_t *) names_addr;
1972	}
1973
1974	if (max_zones <= *infoCntp) {
1975		/* use in-line memory */
1976	  	info_size = *infoCntp * sizeof *info;
1977		info = *infop;
1978	} else {
1979		info_size = round_page(max_zones * sizeof *info);
1980		kr = kmem_alloc_pageable(ipc_kernel_map,
1981					 &info_addr, info_size);
1982		if (kr != KERN_SUCCESS) {
1983			if (names != *namesp)
1984				kmem_free(ipc_kernel_map,
1985					  names_addr, names_size);
1986			return kr;
1987		}
1988
1989		info = (zone_info_t *) info_addr;
1990	}
1991	zn = &names[0];
1992	zi = &info[0];
1993
1994	for (i = 0; i < num_zones; i++) {
1995		struct zone zcopy;
1996
1997		assert(z != ZONE_NULL);
1998
1999		lock_zone(z);
2000		zcopy = *z;
2001		unlock_zone(z);
2002
2003		simple_lock(&all_zones_lock);
2004		z = z->next_zone;
2005		simple_unlock(&all_zones_lock);
2006
2007		/* assuming here the name data is static */
2008		(void) strncpy(zn->zn_name, zcopy.zone_name,
2009			       sizeof zn->zn_name);
2010		zn->zn_name[sizeof zn->zn_name - 1] = '\0';
2011
2012		zi->zi_count = zcopy.count;
2013		zi->zi_cur_size = zcopy.cur_size;
2014		zi->zi_max_size = zcopy.max_size;
2015		zi->zi_elem_size = zcopy.elem_size;
2016		zi->zi_alloc_size = zcopy.alloc_size;
2017		zi->zi_exhaustible = zcopy.exhaustible;
2018		zi->zi_collectable = zcopy.collectable;
2019
2020		zn++;
2021		zi++;
2022	}
2023
2024	/*
2025	 * loop through the fake zones and fill them using the specialized
2026	 * functions
2027	 */
2028	for (i = 0; i < num_fake_zones; i++) {
2029		strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
2030		zn->zn_name[sizeof zn->zn_name - 1] = '\0';
2031		fake_zones[i].func(&zi->zi_count, &zi->zi_cur_size,
2032				   &zi->zi_max_size, &zi->zi_elem_size,
2033				   &zi->zi_alloc_size, &zi->zi_collectable,
2034				   &zi->zi_exhaustible);
2035		zn++;
2036		zi++;
2037	}
2038
2039	if (names != *namesp) {
2040		vm_size_t used;
2041		vm_map_copy_t copy;
2042
2043		used = max_zones * sizeof *names;
2044
2045		if (used != names_size)
2046			bzero((char *) (names_addr + used), names_size - used);
2047
2048		kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
2049				   (vm_map_size_t)names_size, TRUE, &copy);
2050		assert(kr == KERN_SUCCESS);
2051
2052		*namesp = (zone_name_t *) copy;
2053	}
2054	*namesCntp = max_zones;
2055
2056	if (info != *infop) {
2057		vm_size_t used;
2058		vm_map_copy_t copy;
2059
2060		used = max_zones * sizeof *info;
2061
2062		if (used != info_size)
2063			bzero((char *) (info_addr + used), info_size - used);
2064
2065		kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
2066				   (vm_map_size_t)info_size, TRUE, &copy);
2067		assert(kr == KERN_SUCCESS);
2068
2069		*infop = (zone_info_t *) copy;
2070	}
2071	*infoCntp = max_zones;
2072
2073	return KERN_SUCCESS;
2074}
2075
2076#if	MACH_KDB
2077#include <ddb/db_command.h>
2078#include <ddb/db_output.h>
2079#include <kern/kern_print.h>
2080
2081const char *zone_labels =
2082"ENTRY       COUNT   TOT_SZ   MAX_SZ ELT_SZ ALLOC_SZ NAME";
2083
2084/* Forwards */
2085void	db_print_zone(
2086		zone_t		addr);
2087
2088#if	ZONE_DEBUG
2089void	db_zone_check_active(
2090		zone_t		zone);
2091void	db_zone_print_active(
2092		zone_t		zone);
2093#endif	/* ZONE_DEBUG */
2094void	db_zone_print_free(
2095		zone_t		zone);
2096void
2097db_print_zone(
2098	zone_t		addr)
2099{
2100	struct zone zcopy;
2101
2102	zcopy = *addr;
2103
2104	db_printf("%8x %8x %8x %8x %6x %8x %s ",
2105		  addr, zcopy.count, zcopy.cur_size,
2106		  zcopy.max_size, zcopy.elem_size,
2107		  zcopy.alloc_size, zcopy.zone_name);
2108	if (zcopy.exhaustible)
2109	  	db_printf("H");
2110	if (zcopy.collectable)
2111	  	db_printf("C");
2112	if (zcopy.expandable)
2113	  	db_printf("X");
2114	db_printf("\n");
2115}
2116
2117/*ARGSUSED*/
2118void
2119db_show_one_zone(db_expr_t addr, boolean_t have_addr,
2120		 __unused db_expr_t count, __unused char *modif)
2121{
2122	struct zone *z = (zone_t)((char *)0 + addr);
2123
2124	if (z == ZONE_NULL || !have_addr){
2125		db_error("No Zone\n");
2126		/*NOTREACHED*/
2127	}
2128
2129	db_printf("%s\n", zone_labels);
2130	db_print_zone(z);
2131}
2132
2133/*ARGSUSED*/
2134void
2135db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
2136		  __unused char *modif)
2137{
2138	zone_t		z;
2139	unsigned total = 0;
2140
2141	/*
2142	 * Don't risk hanging by unconditionally locking,
2143	 * risk of incoherent data is small (zones aren't freed).
2144	 */
2145	have_addr = simple_lock_try(&all_zones_lock);
2146	count = num_zones;
2147	z = first_zone;
2148	if (have_addr) {
2149		simple_unlock(&all_zones_lock);
2150	}
2151
2152	db_printf("%s\n", zone_labels);
2153	for (  ; count > 0; count--) {
2154		if (!z) {
2155			db_error("Mangled Zone List\n");
2156			/*NOTREACHED*/
2157		}
2158		db_print_zone(z);
2159		total += z->cur_size,
2160
2161		have_addr = simple_lock_try(&all_zones_lock);
2162		z = z->next_zone;
2163		if (have_addr) {
2164			simple_unlock(&all_zones_lock);
2165		}
2166	}
2167	db_printf("\nTotal              %8x", total);
2168	db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
2169}
2170
2171#if	ZONE_DEBUG
2172void
2173db_zone_check_active(
2174	zone_t	zone)
2175{
2176	int count = 0;
2177	queue_t	tmp_elem;
2178
2179	if (!zone_debug_enabled(zone) || !zone_check)
2180		return;
2181	tmp_elem = queue_first(&zone->active_zones);
2182	while (count < zone->count) {
2183		count++;
2184		if (tmp_elem == 0) {
2185			printf("unexpected zero element, zone=%p, count=%d\n",
2186				zone, count);
2187			assert(FALSE);
2188			break;
2189		}
2190		if (queue_end(tmp_elem, &zone->active_zones)) {
2191			printf("unexpected queue_end, zone=%p, count=%d\n",
2192				zone, count);
2193			assert(FALSE);
2194			break;
2195		}
2196		tmp_elem = queue_next(tmp_elem);
2197	}
2198	if (!queue_end(tmp_elem, &zone->active_zones)) {
2199		printf("not at queue_end, zone=%p, tmp_elem=%p\n",
2200			zone, tmp_elem);
2201		assert(FALSE);
2202	}
2203}
2204
2205void
2206db_zone_print_active(
2207	zone_t	zone)
2208{
2209	int count = 0;
2210	queue_t	tmp_elem;
2211
2212	if (!zone_debug_enabled(zone)) {
2213		printf("zone %p debug not enabled\n", zone);
2214		return;
2215	}
2216	if (!zone_check) {
2217		printf("zone_check FALSE\n");
2218		return;
2219	}
2220
2221	printf("zone %p, active elements %d\n", zone, zone->count);
2222	printf("active list:\n");
2223	tmp_elem = queue_first(&zone->active_zones);
2224	while (count < zone->count) {
2225		printf("  %p", tmp_elem);
2226		count++;
2227		if ((count % 6) == 0)
2228			printf("\n");
2229		if (tmp_elem == 0) {
2230			printf("\nunexpected zero element, count=%d\n", count);
2231			break;
2232		}
2233		if (queue_end(tmp_elem, &zone->active_zones)) {
2234			printf("\nunexpected queue_end, count=%d\n", count);
2235			break;
2236		}
2237		tmp_elem = queue_next(tmp_elem);
2238	}
2239	if (!queue_end(tmp_elem, &zone->active_zones))
2240		printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
2241	else
2242		printf("\n");
2243}
2244#endif	/* ZONE_DEBUG */
2245
2246void
2247db_zone_print_free(
2248	zone_t	zone)
2249{
2250	int count = 0;
2251	int freecount;
2252	vm_offset_t elem;
2253
2254	freecount = zone_free_count(zone);
2255	printf("zone %p, free elements %d\n", zone, freecount);
2256	printf("free list:\n");
2257	elem = zone->free_elements;
2258	while (count < freecount) {
2259		printf("  0x%x", elem);
2260		count++;
2261		if ((count % 6) == 0)
2262			printf("\n");
2263		if (elem == 0) {
2264			printf("\nunexpected zero element, count=%d\n", count);
2265			break;
2266		}
2267		elem = *((vm_offset_t *)elem);
2268	}
2269	if (elem != 0)
2270		printf("\nnot at end of free list, elem=0x%x\n", elem);
2271	else
2272		printf("\n");
2273}
2274
2275#endif /* MACH_KDB */
2276
2277
2278#if	ZONE_DEBUG
2279
2280/* should we care about locks here ? */
2281
2282#if	MACH_KDB
2283void *
2284next_element(
2285	zone_t		z,
2286	void		*prev)
2287{
2288	char		*elt = (char *)prev;
2289
2290	if (!zone_debug_enabled(z))
2291		return(NULL);
2292	elt -= ZONE_DEBUG_OFFSET;
2293	elt = (char *) queue_next((queue_t) elt);
2294	if ((queue_t) elt == &z->active_zones)
2295		return(NULL);
2296	elt += ZONE_DEBUG_OFFSET;
2297	return(elt);
2298}
2299
2300void *
2301first_element(
2302	zone_t		z)
2303{
2304	char 		*elt;
2305
2306	if (!zone_debug_enabled(z))
2307		return(NULL);
2308	if (queue_empty(&z->active_zones))
2309		return(NULL);
2310	elt = (char *)queue_first(&z->active_zones);
2311	elt += ZONE_DEBUG_OFFSET;
2312	return(elt);
2313}
2314
2315/*
2316 * Second arg controls how many zone elements are printed:
2317 *   0 => none
2318 *   n, n < 0 => all
2319 *   n, n > 0 => last n on active list
2320 */
2321int
2322zone_count(
2323	zone_t		z,
2324	int		tail)
2325{
2326	void		*elt;
2327	int		count = 0;
2328	boolean_t	print = (tail != 0);
2329
2330	if (tail < 0)
2331		tail = z->count;
2332	if (z->count < tail)
2333		tail = 0;
2334	tail = z->count - tail;
2335	for (elt = first_element(z); elt; elt = next_element(z, elt)) {
2336		if (print && tail <= count)
2337			db_printf("%8x\n", elt);
2338		count++;
2339	}
2340	assert(count == z->count);
2341	return(count);
2342}
2343#endif /* MACH_KDB */
2344
2345#define zone_in_use(z) 	( z->count || z->free_elements )
2346
2347void
2348zone_debug_enable(
2349	zone_t		z)
2350{
2351	if (zone_debug_enabled(z) || zone_in_use(z) ||
2352	    z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET))
2353		return;
2354	queue_init(&z->active_zones);
2355	z->elem_size += ZONE_DEBUG_OFFSET;
2356}
2357
2358void
2359zone_debug_disable(
2360	zone_t		z)
2361{
2362	if (!zone_debug_enabled(z) || zone_in_use(z))
2363		return;
2364	z->elem_size -= ZONE_DEBUG_OFFSET;
2365	z->active_zones.next = z->active_zones.prev = NULL;
2366}
2367#endif	/* ZONE_DEBUG */
2368