1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 *	File:	kern/gzalloc.c
30 *	Author:	Derek Kumar
31 *
32 *	"Guard mode" zone allocator, used to trap use-after-free errors,
33 *	overruns, underruns, mismatched allocations/frees, uninitialized
34 *	zone element use, timing dependent races etc.
35 *
36 *	The allocator is configured by these boot-args:
37 *	gzalloc_size=<size>: target all zones with elements of <size> bytes
38 *	gzalloc_min=<size>: target zones with elements >= size
39 *	gzalloc_max=<size>: target zones with elements <= size
40 * 	gzalloc_min/max can be specified in conjunction to target a range of
41 *	sizes
42 *	gzalloc_fc_size=<size>: number of zone elements (effectively page
43 *	multiple sized) to retain in the free VA cache. This cache is evicted
44 *	(backing pages and VA released) in a least-recently-freed fashion.
45 *	Larger free VA caches allow for a longer window of opportunity to trap
46 *	delayed use-after-free operations, but use more memory.
47 *	-gzalloc_wp: Write protect, rather than unmap, freed allocations
48 *	lingering in the free VA cache. Useful to disambiguate between
49 *	read-after-frees/read overruns and writes. Also permits direct inspection
50 *	of the freed element in the cache via the kernel debugger. As each
51 *	element has a "header" (trailer in underflow detection mode), the zone
52 *	of origin of the element can be easily determined in this mode.
53 *	-gzalloc_uf_mode: Underflow detection mode, where the guard page
54 *	adjoining each element is placed *before* the element page rather than
55 *	after. The element is also located at the top of the page, rather than
56 *	abutting the bottom as with the standard overflow detection mode.
57 *	-gzalloc_noconsistency: disable consistency checks that flag mismatched
58 *	frees, corruptions of the header/trailer signatures etc.
59 *	-nogzalloc_mode: Disables the guard mode allocator. The DEBUG kernel
60 *	enables the guard allocator for zones sized 8K-16K (if present) by
61 *	default, this option can disable that behaviour.
62 */
63
64#include <zone_debug.h>
65#include <zone_alias_addr.h>
66
67#include <mach/mach_types.h>
68#include <mach/vm_param.h>
69#include <mach/kern_return.h>
70#include <mach/machine/vm_types.h>
71#include <mach_debug/zone_info.h>
72#include <mach/vm_map.h>
73
74#include <kern/kern_types.h>
75#include <kern/assert.h>
76#include <kern/sched.h>
77#include <kern/locks.h>
78#include <kern/misc_protos.h>
79#include <kern/zalloc.h>
80#include <kern/kalloc.h>
81
82#include <vm/pmap.h>
83#include <vm/vm_map.h>
84#include <vm/vm_kern.h>
85#include <vm/vm_page.h>
86
87#include <pexpert/pexpert.h>
88
89#include <machine/machparam.h>
90
91#include <libkern/OSDebug.h>
92#include <libkern/OSAtomic.h>
93#include <sys/kdebug.h>
94
95extern boolean_t vm_kernel_ready, kmem_ready;
96boolean_t gzalloc_mode = FALSE;
97uint32_t pdzalloc_count, pdzfree_count;
98
99#define	GZALLOC_MIN_DEFAULT (1024)
100#define GZDEADZONE ((zone_t) 0xDEAD201E)
101#define GZALLOC_SIGNATURE (0xABADCAFE)
102#define GZALLOC_RESERVE_SIZE_DEFAULT (2 * 1024 * 1024)
103#define GZFC_DEFAULT_SIZE (1024)
104
105char gzalloc_fill_pattern = 0x67; /* 'g' */
106
107uint32_t gzalloc_min = ~0U;
108uint32_t gzalloc_max = 0;
109uint32_t gzalloc_size = 0;
110uint64_t gzalloc_allocated, gzalloc_freed, gzalloc_early_alloc, gzalloc_early_free, gzalloc_wasted;
111boolean_t gzalloc_uf_mode = FALSE, gzalloc_consistency_checks = TRUE;
112vm_prot_t gzalloc_prot = VM_PROT_NONE;
113uint32_t gzalloc_guard = KMA_GUARD_LAST;
114uint32_t gzfc_size = GZFC_DEFAULT_SIZE;
115
116vm_map_t gzalloc_map;
117vm_offset_t gzalloc_map_min, gzalloc_map_max;
118vm_offset_t gzalloc_reserve;
119vm_size_t gzalloc_reserve_size;
120
121typedef struct gzalloc_header {
122	zone_t gzone;
123	uint32_t  gzsize;
124	uint32_t  gzsig;
125} gzhdr_t;
126
127#define GZHEADER_SIZE (sizeof(gzhdr_t))
128
129extern zone_t vm_page_zone;
130
131void gzalloc_reconfigure(__unused zone_t z) {
132	/* Nothing for now */
133}
134
135boolean_t gzalloc_enabled(void) {
136	return gzalloc_mode;
137}
138
139void gzalloc_zone_init(zone_t z) {
140	if (gzalloc_mode) {
141		bzero(&z->gz, sizeof(z->gz));
142
143		if (gzfc_size && (z->elem_size >= gzalloc_min) && (z->elem_size <= gzalloc_max) && (z->gzalloc_exempt == FALSE)) {
144			vm_size_t gzfcsz = round_page(sizeof(*z->gz.gzfc) * gzfc_size);
145
146			/* If the VM/kmem system aren't yet configured, carve
147			 * out the free element cache structure directly from the
148			 * gzalloc_reserve supplied by the pmap layer.
149			*/
150			if (!kmem_ready) {
151				if (gzalloc_reserve_size < gzfcsz)
152					panic("gzalloc reserve exhausted");
153
154				z->gz.gzfc = (vm_offset_t *)gzalloc_reserve;
155				gzalloc_reserve += gzfcsz;
156				gzalloc_reserve_size -= gzfcsz;
157			} else {
158				kern_return_t kr;
159
160				if ((kr = kernel_memory_allocate(kernel_map, (vm_offset_t *)&z->gz.gzfc, gzfcsz, 0, KMA_KOBJECT)) != KERN_SUCCESS) {
161					panic("zinit/gzalloc: kernel_memory_allocate failed (%d) for 0x%lx bytes", kr, (unsigned long) gzfcsz);
162				}
163			}
164			bzero((void *)z->gz.gzfc, gzfcsz);
165		}
166	}
167}
168
169void gzalloc_configure(void) {
170	char temp_buf[16];
171
172	if (PE_parse_boot_argn("-gzalloc_mode", temp_buf, sizeof (temp_buf))) {
173		gzalloc_mode = TRUE;
174		gzalloc_min = GZALLOC_MIN_DEFAULT;
175#if	ZONE_DEBUG
176		gzalloc_min += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
177#endif
178		gzalloc_max = ~0U;
179	}
180
181	if (PE_parse_boot_argn("gzalloc_min", &gzalloc_min, sizeof(gzalloc_min))) {
182#if	ZONE_DEBUG
183		gzalloc_min += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
184#endif
185		gzalloc_mode = TRUE;
186		gzalloc_max = ~0U;
187	}
188
189	if (PE_parse_boot_argn("gzalloc_max", &gzalloc_max, sizeof(gzalloc_max))) {
190#if	ZONE_DEBUG
191		gzalloc_max += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
192#endif
193		gzalloc_mode = TRUE;
194		if (gzalloc_min == ~0U)
195			gzalloc_min = 0;
196	}
197
198	if (PE_parse_boot_argn("gzalloc_size", &gzalloc_size, sizeof(gzalloc_size))) {
199#if	ZONE_DEBUG
200		gzalloc_size += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
201#endif
202		gzalloc_min = gzalloc_max = gzalloc_size;
203		gzalloc_mode = TRUE;
204	}
205
206	(void)PE_parse_boot_argn("gzalloc_fc_size", &gzfc_size, sizeof(gzfc_size));
207
208	if (PE_parse_boot_argn("-gzalloc_wp", temp_buf, sizeof (temp_buf))) {
209		gzalloc_prot = VM_PROT_READ;
210	}
211
212	if (PE_parse_boot_argn("-gzalloc_uf_mode", temp_buf, sizeof (temp_buf))) {
213		gzalloc_uf_mode = TRUE;
214		gzalloc_guard = KMA_GUARD_FIRST;
215	}
216
217	if (PE_parse_boot_argn("-gzalloc_noconsistency", temp_buf, sizeof (temp_buf))) {
218		gzalloc_consistency_checks = FALSE;
219	}
220#if	DEBUG
221	if (gzalloc_mode == FALSE) {
222		gzalloc_min = 8192;
223		gzalloc_max = 16384;
224		gzalloc_prot = VM_PROT_READ;
225		gzalloc_mode = TRUE;
226	}
227#endif
228	if (PE_parse_boot_argn("-nogzalloc_mode", temp_buf, sizeof (temp_buf)))
229		gzalloc_mode = FALSE;
230
231	if (gzalloc_mode) {
232		gzalloc_reserve_size = GZALLOC_RESERVE_SIZE_DEFAULT;
233		gzalloc_reserve = (vm_offset_t) pmap_steal_memory(gzalloc_reserve_size);
234	}
235}
236
237void gzalloc_init(vm_size_t max_zonemap_size) {
238	kern_return_t retval;
239
240	if (gzalloc_mode) {
241		retval = kmem_suballoc(kernel_map, &gzalloc_map_min, (max_zonemap_size << 2),
242		    FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
243		    &gzalloc_map);
244
245		if (retval != KERN_SUCCESS)
246			panic("zone_init: kmem_suballoc(gzalloc) failed");
247		gzalloc_map_max = gzalloc_map_min + (max_zonemap_size << 2);
248	}
249}
250
251vm_offset_t
252gzalloc_alloc(zone_t zone, boolean_t canblock) {
253	vm_offset_t addr = 0;
254
255	if (__improbable(gzalloc_mode &&
256		(((zone->elem_size >= gzalloc_min) &&
257		    (zone->elem_size <= gzalloc_max))) &&
258		(zone->gzalloc_exempt == 0))) {
259
260		if (get_preemption_level() != 0) {
261			if (canblock == TRUE) {
262				pdzalloc_count++;
263			}
264			else
265				return 0;
266		}
267
268		vm_offset_t rounded_size = round_page(zone->elem_size + GZHEADER_SIZE);
269		vm_offset_t residue = rounded_size - zone->elem_size;
270		vm_offset_t gzaddr = 0;
271		gzhdr_t *gzh;
272
273		if (!kmem_ready || (vm_page_zone == ZONE_NULL)) {
274			/* Early allocations are supplied directly from the
275			 * reserve.
276			 */
277			if (gzalloc_reserve_size < rounded_size)
278				panic("gzalloc reserve exhausted");
279			gzaddr = gzalloc_reserve;
280			/* No guard page for these early allocations, just
281			 * waste an additional page.
282			 */
283			gzalloc_reserve += rounded_size + PAGE_SIZE;
284			gzalloc_reserve_size -= rounded_size + PAGE_SIZE;
285			OSAddAtomic64((SInt32) (rounded_size), &gzalloc_early_alloc);
286		}
287		else {
288			kern_return_t kr = kernel_memory_allocate(gzalloc_map,
289			    &gzaddr, rounded_size + (1*PAGE_SIZE),
290			    0, KMA_KOBJECT | gzalloc_guard);
291			if (kr != KERN_SUCCESS)
292				panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d", (uint64_t)rounded_size, kr);
293
294		}
295
296		if (gzalloc_uf_mode) {
297			gzaddr += PAGE_SIZE;
298			/* The "header" becomes a "footer" in underflow
299			 * mode.
300			 */
301			gzh = (gzhdr_t *) (gzaddr + zone->elem_size);
302			addr = gzaddr;
303		} else {
304			gzh = (gzhdr_t *) (gzaddr + residue - GZHEADER_SIZE);
305			addr = (gzaddr + residue);
306		}
307
308		/* Fill with a pattern on allocation to trap uninitialized
309		 * data use. Since the element size may be "rounded up"
310		 * by higher layers such as the kalloc layer, this may
311		 * also identify overruns between the originally requested
312		 * size and the rounded size via visual inspection.
313		 * TBD: plumb through the originally requested size,
314		 * prior to rounding by kalloc/IOMalloc etc.
315		 * We also add a signature and the zone of origin in a header
316		 * prefixed to the allocation.
317		 */
318		memset((void *)gzaddr, gzalloc_fill_pattern, rounded_size);
319
320		gzh->gzone = (kmem_ready && vm_page_zone) ? zone : GZDEADZONE;
321		gzh->gzsize = (uint32_t) zone->elem_size;
322		gzh->gzsig = GZALLOC_SIGNATURE;
323
324		lock_zone(zone);
325		zone->count++;
326		zone->sum_count++;
327		zone->cur_size += rounded_size;
328		unlock_zone(zone);
329
330		OSAddAtomic64((SInt32) rounded_size, &gzalloc_allocated);
331		OSAddAtomic64((SInt32) (rounded_size - zone->elem_size), &gzalloc_wasted);
332	}
333	return addr;
334}
335
336boolean_t gzalloc_free(zone_t zone, void *addr) {
337	boolean_t gzfreed = FALSE;
338	kern_return_t kr;
339
340	if (__improbable(gzalloc_mode &&
341		(((zone->elem_size >= gzalloc_min) &&
342		    (zone->elem_size <= gzalloc_max))) &&
343		(zone->gzalloc_exempt == 0))) {
344		gzhdr_t *gzh;
345		vm_offset_t rounded_size = round_page(zone->elem_size + GZHEADER_SIZE);
346		vm_offset_t residue = rounded_size - zone->elem_size;
347		vm_offset_t saddr;
348		vm_offset_t free_addr = 0;
349
350		if (gzalloc_uf_mode) {
351			gzh = (gzhdr_t *)((vm_offset_t)addr + zone->elem_size);
352			saddr = (vm_offset_t) addr - PAGE_SIZE;
353		} else {
354			gzh = (gzhdr_t *)((vm_offset_t)addr - GZHEADER_SIZE);
355			saddr = ((vm_offset_t)addr) - residue;
356		}
357
358		assert((saddr & PAGE_MASK) == 0);
359
360		if (gzalloc_consistency_checks) {
361			if (gzh->gzsig != GZALLOC_SIGNATURE) {
362				panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", addr, GZALLOC_SIGNATURE, gzh->gzsig);
363			}
364
365			if (gzh->gzone != zone && (gzh->gzone != GZDEADZONE))
366				panic("%s: Mismatched zone or under/overflow, current zone: %p, recorded zone: %p, address: %p", __FUNCTION__, zone, gzh->gzone, (void *)addr);
367			/* Partially redundant given the zone check, but may flag header corruption */
368			if (gzh->gzsize != zone->elem_size) {
369				panic("Mismatched zfree or under/overflow for zone %p, recorded size: 0x%x, element size: 0x%x, address: %p\n", zone, gzh->gzsize, (uint32_t) zone->elem_size, (void *)addr);
370			}
371		}
372
373		if (!kmem_ready || gzh->gzone == GZDEADZONE) {
374			/* For now, just leak frees of early allocations
375			 * performed before kmem is fully configured.
376			 * They don't seem to get freed currently;
377			 * consider ml_static_mfree in the future.
378			 */
379			OSAddAtomic64((SInt32) (rounded_size), &gzalloc_early_free);
380			return TRUE;
381		}
382
383		if (get_preemption_level() != 0) {
384				pdzfree_count++;
385		}
386
387		if (gzfc_size) {
388			/* Either write protect or unmap the newly freed
389			 * allocation
390			 */
391			kr = vm_map_protect(
392				gzalloc_map,
393				saddr,
394				saddr + rounded_size + (1 * PAGE_SIZE),
395				gzalloc_prot,
396				FALSE);
397			if (kr != KERN_SUCCESS)
398				panic("%s: vm_map_protect: %p, 0x%x", __FUNCTION__, (void *)saddr, kr);
399		} else {
400			free_addr = saddr;
401		}
402
403		lock_zone(zone);
404
405		/* Insert newly freed element into the protected free element
406		 * cache, and rotate out the LRU element.
407		 */
408		if (gzfc_size) {
409			if (zone->gz.gzfc_index >= gzfc_size) {
410				zone->gz.gzfc_index = 0;
411			}
412			free_addr = zone->gz.gzfc[zone->gz.gzfc_index];
413			zone->gz.gzfc[zone->gz.gzfc_index++] = saddr;
414		}
415
416		if (free_addr) {
417			zone->count--;
418			zone->cur_size -= rounded_size;
419		}
420
421		unlock_zone(zone);
422
423		if (free_addr) {
424			kr = vm_map_remove(
425				gzalloc_map,
426				free_addr,
427				free_addr + rounded_size + (1 * PAGE_SIZE),
428				VM_MAP_REMOVE_KUNWIRE);
429			if (kr != KERN_SUCCESS)
430				panic("gzfree: vm_map_remove: %p, 0x%x", (void *)free_addr, kr);
431
432			OSAddAtomic64((SInt32)rounded_size, &gzalloc_freed);
433			OSAddAtomic64(-((SInt32) (rounded_size - zone->elem_size)), &gzalloc_wasted);
434		}
435
436		gzfreed = TRUE;
437	}
438	return gzfreed;
439}
440