1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_kern.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *	Date:	1985
62 *
63 *	Kernel memory management.
64 */
65
66#include <mach/kern_return.h>
67#include <mach/vm_param.h>
68#include <kern/assert.h>
69#include <kern/lock.h>
70#include <kern/thread.h>
71#include <vm/vm_kern.h>
72#include <vm/vm_map.h>
73#include <vm/vm_object.h>
74#include <vm/vm_page.h>
75#include <vm/vm_pageout.h>
76#include <kern/misc_protos.h>
77#include <vm/cpm.h>
78
79#include <string.h>
80
81#include <libkern/OSDebug.h>
82#include <sys/kdebug.h>
83
84/*
85 *	Variables exported by this module.
86 */
87
88vm_map_t	kernel_map;
89vm_map_t	kernel_pageable_map;
90
91extern boolean_t vm_kernel_ready;
92
93/*
94 * Forward declarations for internal functions.
95 */
96extern kern_return_t kmem_alloc_pages(
97	register vm_object_t		object,
98	register vm_object_offset_t	offset,
99	register vm_object_size_t	size);
100
101extern void kmem_remap_pages(
102	register vm_object_t		object,
103	register vm_object_offset_t	offset,
104	register vm_offset_t		start,
105	register vm_offset_t		end,
106	vm_prot_t			protection);
107
108kern_return_t
109kmem_alloc_contig(
110	vm_map_t		map,
111	vm_offset_t		*addrp,
112	vm_size_t		size,
113	vm_offset_t 		mask,
114	ppnum_t			max_pnum,
115	ppnum_t			pnum_mask,
116	int 			flags)
117{
118	vm_object_t		object;
119	vm_object_offset_t	offset;
120	vm_map_offset_t		map_addr;
121	vm_map_offset_t		map_mask;
122	vm_map_size_t		map_size, i;
123	vm_map_entry_t		entry;
124	vm_page_t		m, pages;
125	kern_return_t		kr;
126
127	if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT)))
128		return KERN_INVALID_ARGUMENT;
129
130	map_size = vm_map_round_page(size);
131	map_mask = (vm_map_offset_t)mask;
132
133	/* Check for zero allocation size (either directly or via overflow) */
134	if (map_size == 0) {
135		*addrp = 0;
136		return KERN_INVALID_ARGUMENT;
137	}
138
139	/*
140	 *	Allocate a new object (if necessary) and the reference we
141	 *	will be donating to the map entry.  We must do this before
142	 *	locking the map, or risk deadlock with the default pager.
143	 */
144	if ((flags & KMA_KOBJECT) != 0) {
145		object = kernel_object;
146		vm_object_reference(object);
147	} else {
148		object = vm_object_allocate(map_size);
149	}
150
151	kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry);
152	if (KERN_SUCCESS != kr) {
153		vm_object_deallocate(object);
154		return kr;
155	}
156
157	entry->object.vm_object = object;
158	entry->offset = offset = (object == kernel_object) ?
159		        map_addr : 0;
160
161	/* Take an extra object ref in case the map entry gets deleted */
162	vm_object_reference(object);
163	vm_map_unlock(map);
164
165	kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
166
167	if (kr != KERN_SUCCESS) {
168		vm_map_remove(map, vm_map_trunc_page(map_addr),
169			      vm_map_round_page(map_addr + map_size), 0);
170		vm_object_deallocate(object);
171		*addrp = 0;
172		return kr;
173	}
174
175	vm_object_lock(object);
176	for (i = 0; i < map_size; i += PAGE_SIZE) {
177		m = pages;
178		pages = NEXT_PAGE(m);
179		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
180		m->busy = FALSE;
181		vm_page_insert(m, object, offset + i);
182	}
183	vm_object_unlock(object);
184
185	if ((kr = vm_map_wire(map, vm_map_trunc_page(map_addr),
186			      vm_map_round_page(map_addr + map_size), VM_PROT_DEFAULT, FALSE))
187		!= KERN_SUCCESS) {
188		if (object == kernel_object) {
189			vm_object_lock(object);
190			vm_object_page_remove(object, offset, offset + map_size);
191			vm_object_unlock(object);
192		}
193		vm_map_remove(map, vm_map_trunc_page(map_addr),
194			      vm_map_round_page(map_addr + map_size), 0);
195		vm_object_deallocate(object);
196		return kr;
197	}
198	vm_object_deallocate(object);
199
200	if (object == kernel_object)
201		vm_map_simplify(map, map_addr);
202
203	*addrp = (vm_offset_t) map_addr;
204	assert((vm_map_offset_t) *addrp == map_addr);
205	return KERN_SUCCESS;
206}
207
208/*
209 * Master entry point for allocating kernel memory.
210 * NOTE: this routine is _never_ interrupt safe.
211 *
212 * map		: map to allocate into
213 * addrp	: pointer to start address of new memory
214 * size		: size of memory requested
215 * flags	: options
216 *		  KMA_HERE		*addrp is base address, else "anywhere"
217 *		  KMA_NOPAGEWAIT	don't wait for pages if unavailable
218 *		  KMA_KOBJECT		use kernel_object
219 *		  KMA_LOMEM		support for 32 bit devices in a 64 bit world
220 *					if set and a lomemory pool is available
221 *					grab pages from it... this also implies
222 *					KMA_NOPAGEWAIT
223 */
224
225kern_return_t
226kernel_memory_allocate(
227	register vm_map_t	map,
228	register vm_offset_t	*addrp,
229	register vm_size_t	size,
230	register vm_offset_t	mask,
231	int			flags)
232{
233	vm_object_t 		object;
234	vm_object_offset_t 	offset;
235	vm_object_offset_t 	pg_offset;
236	vm_map_entry_t 		entry = NULL;
237	vm_map_offset_t 	map_addr, fill_start;
238	vm_map_offset_t		map_mask;
239	vm_map_size_t		map_size, fill_size;
240	kern_return_t 		kr;
241	vm_page_t		mem;
242	vm_page_t		guard_page_list = NULL;
243	vm_page_t		wired_page_list = NULL;
244	int			guard_page_count = 0;
245	int			wired_page_count = 0;
246	int			i;
247	int			vm_alloc_flags;
248	vm_prot_t		kma_prot;
249
250	if (! vm_kernel_ready) {
251		panic("kernel_memory_allocate: VM is not ready");
252	}
253
254	map_size = vm_map_round_page(size);
255	map_mask = (vm_map_offset_t) mask;
256	vm_alloc_flags = 0;
257
258	/* Check for zero allocation size (either directly or via overflow) */
259	if (map_size == 0) {
260		*addrp = 0;
261		return KERN_INVALID_ARGUMENT;
262	}
263
264	/*
265	 * limit the size of a single extent of wired memory
266	 * to try and limit the damage to the system if
267	 * too many pages get wired down
268	 */
269        if (map_size > (1 << 30)) {
270                return KERN_RESOURCE_SHORTAGE;
271        }
272
273	/*
274	 * Guard pages:
275	 *
276	 * Guard pages are implemented as ficticious pages.  By placing guard pages
277	 * on either end of a stack, they can help detect cases where a thread walks
278	 * off either end of its stack.  They are allocated and set up here and attempts
279	 * to access those pages are trapped in vm_fault_page().
280	 *
281	 * The map_size we were passed may include extra space for
282	 * guard pages.  If those were requested, then back it out of fill_size
283	 * since vm_map_find_space() takes just the actual size not including
284	 * guard pages.  Similarly, fill_start indicates where the actual pages
285	 * will begin in the range.
286	 */
287
288	fill_start = 0;
289	fill_size = map_size;
290
291	if (flags & KMA_GUARD_FIRST) {
292		vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE;
293		fill_start += PAGE_SIZE_64;
294		fill_size -= PAGE_SIZE_64;
295		if (map_size < fill_start + fill_size) {
296			/* no space for a guard page */
297			*addrp = 0;
298			return KERN_INVALID_ARGUMENT;
299		}
300		guard_page_count++;
301	}
302	if (flags & KMA_GUARD_LAST) {
303		vm_alloc_flags |= VM_FLAGS_GUARD_AFTER;
304		fill_size -= PAGE_SIZE_64;
305		if (map_size <= fill_start + fill_size) {
306			/* no space for a guard page */
307			*addrp = 0;
308			return KERN_INVALID_ARGUMENT;
309		}
310		guard_page_count++;
311	}
312	wired_page_count = (int) (fill_size / PAGE_SIZE_64);
313	assert(wired_page_count * PAGE_SIZE_64 == fill_size);
314
315	for (i = 0; i < guard_page_count; i++) {
316		for (;;) {
317			mem = vm_page_grab_guard();
318
319			if (mem != VM_PAGE_NULL)
320				break;
321			if (flags & KMA_NOPAGEWAIT) {
322				kr = KERN_RESOURCE_SHORTAGE;
323				goto out;
324			}
325			vm_page_more_fictitious();
326		}
327		mem->pageq.next = (queue_entry_t)guard_page_list;
328		guard_page_list = mem;
329	}
330
331	for (i = 0; i < wired_page_count; i++) {
332		uint64_t	unavailable;
333
334		for (;;) {
335		        if (flags & KMA_LOMEM)
336			        mem = vm_page_grablo();
337			else
338			        mem = vm_page_grab();
339
340		        if (mem != VM_PAGE_NULL)
341			        break;
342
343			if (flags & KMA_NOPAGEWAIT) {
344				kr = KERN_RESOURCE_SHORTAGE;
345				goto out;
346			}
347			if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
348				kr = KERN_RESOURCE_SHORTAGE;
349				goto out;
350			}
351			unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
352
353			if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
354				kr = KERN_RESOURCE_SHORTAGE;
355				goto out;
356			}
357			VM_PAGE_WAIT();
358		}
359		mem->pageq.next = (queue_entry_t)wired_page_list;
360		wired_page_list = mem;
361	}
362
363	/*
364	 *	Allocate a new object (if necessary).  We must do this before
365	 *	locking the map, or risk deadlock with the default pager.
366	 */
367	if ((flags & KMA_KOBJECT) != 0) {
368		object = kernel_object;
369		vm_object_reference(object);
370	} else {
371		object = vm_object_allocate(map_size);
372	}
373
374	kr = vm_map_find_space(map, &map_addr,
375			       fill_size, map_mask,
376			       vm_alloc_flags, &entry);
377	if (KERN_SUCCESS != kr) {
378		vm_object_deallocate(object);
379		goto out;
380	}
381
382	entry->object.vm_object = object;
383	entry->offset = offset = (object == kernel_object) ?
384		        map_addr : 0;
385
386	entry->wired_count++;
387
388	if (flags & KMA_PERMANENT)
389		entry->permanent = TRUE;
390
391	if (object != kernel_object)
392		vm_object_reference(object);
393
394	vm_object_lock(object);
395	vm_map_unlock(map);
396
397	pg_offset = 0;
398
399	if (fill_start) {
400		if (guard_page_list == NULL)
401			panic("kernel_memory_allocate: guard_page_list == NULL");
402
403		mem = guard_page_list;
404		guard_page_list = (vm_page_t)mem->pageq.next;
405		mem->pageq.next = NULL;
406
407		vm_page_insert(mem, object, offset + pg_offset);
408
409		mem->busy = FALSE;
410		pg_offset += PAGE_SIZE_64;
411	}
412
413	kma_prot = VM_PROT_READ | VM_PROT_WRITE;
414
415	for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
416		if (wired_page_list == NULL)
417			panic("kernel_memory_allocate: wired_page_list == NULL");
418
419		mem = wired_page_list;
420		wired_page_list = (vm_page_t)mem->pageq.next;
421		mem->pageq.next = NULL;
422		mem->wire_count++;
423
424		vm_page_insert(mem, object, offset + pg_offset);
425
426		mem->busy = FALSE;
427		mem->pmapped = TRUE;
428		mem->wpmapped = TRUE;
429
430		PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
431			   kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE);
432
433		if (flags & KMA_NOENCRYPT) {
434			bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
435
436			pmap_set_noencrypt(mem->phys_page);
437		}
438	}
439	if ((fill_start + fill_size) < map_size) {
440		if (guard_page_list == NULL)
441			panic("kernel_memory_allocate: guard_page_list == NULL");
442
443		mem = guard_page_list;
444		guard_page_list = (vm_page_t)mem->pageq.next;
445		mem->pageq.next = NULL;
446
447		vm_page_insert(mem, object, offset + pg_offset);
448
449		mem->busy = FALSE;
450	}
451	if (guard_page_list || wired_page_list)
452		panic("kernel_memory_allocate: non empty list\n");
453
454	vm_page_lockspin_queues();
455	vm_page_wire_count += wired_page_count;
456	vm_page_unlock_queues();
457
458	vm_object_unlock(object);
459
460	/*
461	 * now that the pages are wired, we no longer have to fear coalesce
462	 */
463	if (object == kernel_object)
464		vm_map_simplify(map, map_addr);
465	else
466		vm_object_deallocate(object);
467
468	/*
469	 *	Return the memory, not zeroed.
470	 */
471	*addrp = CAST_DOWN(vm_offset_t, map_addr);
472	return KERN_SUCCESS;
473
474out:
475	if (guard_page_list)
476		vm_page_free_list(guard_page_list, FALSE);
477
478	if (wired_page_list)
479		vm_page_free_list(wired_page_list, FALSE);
480
481	return kr;
482}
483
484/*
485 *	kmem_alloc:
486 *
487 *	Allocate wired-down memory in the kernel's address map
488 *	or a submap.  The memory is not zero-filled.
489 */
490
491kern_return_t
492kmem_alloc(
493	vm_map_t	map,
494	vm_offset_t	*addrp,
495	vm_size_t	size)
496{
497	kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0);
498	TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
499	return kr;
500}
501
502/*
503 *	kmem_realloc:
504 *
505 *	Reallocate wired-down memory in the kernel's address map
506 *	or a submap.  Newly allocated pages are not zeroed.
507 *	This can only be used on regions allocated with kmem_alloc.
508 *
509 *	If successful, the pages in the old region are mapped twice.
510 *	The old region is unchanged.  Use kmem_free to get rid of it.
511 */
512kern_return_t
513kmem_realloc(
514	vm_map_t		map,
515	vm_offset_t		oldaddr,
516	vm_size_t		oldsize,
517	vm_offset_t		*newaddrp,
518	vm_size_t		newsize)
519{
520	vm_object_t		object;
521	vm_object_offset_t	offset;
522	vm_map_offset_t		oldmapmin;
523	vm_map_offset_t		oldmapmax;
524	vm_map_offset_t		newmapaddr;
525	vm_map_size_t		oldmapsize;
526	vm_map_size_t		newmapsize;
527	vm_map_entry_t		oldentry;
528	vm_map_entry_t		newentry;
529	vm_page_t		mem;
530	kern_return_t		kr;
531
532	oldmapmin = vm_map_trunc_page(oldaddr);
533	oldmapmax = vm_map_round_page(oldaddr + oldsize);
534	oldmapsize = oldmapmax - oldmapmin;
535	newmapsize = vm_map_round_page(newsize);
536
537
538	/*
539	 *	Find the VM object backing the old region.
540	 */
541
542	vm_map_lock(map);
543
544	if (!vm_map_lookup_entry(map, oldmapmin, &oldentry))
545		panic("kmem_realloc");
546	object = oldentry->object.vm_object;
547
548	/*
549	 *	Increase the size of the object and
550	 *	fill in the new region.
551	 */
552
553	vm_object_reference(object);
554	/* by grabbing the object lock before unlocking the map */
555	/* we guarantee that we will panic if more than one     */
556	/* attempt is made to realloc a kmem_alloc'd area       */
557	vm_object_lock(object);
558	vm_map_unlock(map);
559	if (object->vo_size != oldmapsize)
560		panic("kmem_realloc");
561	object->vo_size = newmapsize;
562	vm_object_unlock(object);
563
564	/* allocate the new pages while expanded portion of the */
565	/* object is still not mapped */
566	kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
567			 vm_object_round_page(newmapsize-oldmapsize));
568
569	/*
570	 *	Find space for the new region.
571	 */
572
573	kr = vm_map_find_space(map, &newmapaddr, newmapsize,
574			       (vm_map_offset_t) 0, 0, &newentry);
575	if (kr != KERN_SUCCESS) {
576		vm_object_lock(object);
577		for(offset = oldmapsize;
578		    offset < newmapsize; offset += PAGE_SIZE) {
579	    		if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
580				VM_PAGE_FREE(mem);
581			}
582		}
583		object->vo_size = oldmapsize;
584		vm_object_unlock(object);
585		vm_object_deallocate(object);
586		return kr;
587	}
588	newentry->object.vm_object = object;
589	newentry->offset = 0;
590	assert (newentry->wired_count == 0);
591
592
593	/* add an extra reference in case we have someone doing an */
594	/* unexpected deallocate */
595	vm_object_reference(object);
596	vm_map_unlock(map);
597
598	kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE);
599	if (KERN_SUCCESS != kr) {
600		vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0);
601		vm_object_lock(object);
602		for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
603	    		if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
604				VM_PAGE_FREE(mem);
605			}
606		}
607		object->vo_size = oldmapsize;
608		vm_object_unlock(object);
609		vm_object_deallocate(object);
610		return (kr);
611	}
612	vm_object_deallocate(object);
613
614	*newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
615	return KERN_SUCCESS;
616}
617
618/*
619 *	kmem_alloc_kobject:
620 *
621 *	Allocate wired-down memory in the kernel's address map
622 *	or a submap.  The memory is not zero-filled.
623 *
624 *	The memory is allocated in the kernel_object.
625 *	It may not be copied with vm_map_copy, and
626 *	it may not be reallocated with kmem_realloc.
627 */
628
629kern_return_t
630kmem_alloc_kobject(
631	vm_map_t	map,
632	vm_offset_t	*addrp,
633	vm_size_t	size)
634{
635	return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT);
636}
637
638/*
639 *	kmem_alloc_aligned:
640 *
641 *	Like kmem_alloc_kobject, except that the memory is aligned.
642 *	The size should be a power-of-2.
643 */
644
645kern_return_t
646kmem_alloc_aligned(
647	vm_map_t	map,
648	vm_offset_t	*addrp,
649	vm_size_t	size)
650{
651	if ((size & (size - 1)) != 0)
652		panic("kmem_alloc_aligned: size not aligned");
653	return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT);
654}
655
656/*
657 *	kmem_alloc_pageable:
658 *
659 *	Allocate pageable memory in the kernel's address map.
660 */
661
662kern_return_t
663kmem_alloc_pageable(
664	vm_map_t	map,
665	vm_offset_t	*addrp,
666	vm_size_t	size)
667{
668	vm_map_offset_t map_addr;
669	vm_map_size_t	map_size;
670	kern_return_t kr;
671
672#ifndef normal
673	map_addr = (vm_map_min(map)) + 0x1000;
674#else
675	map_addr = vm_map_min(map);
676#endif
677	map_size = vm_map_round_page(size);
678
679	kr = vm_map_enter(map, &map_addr, map_size,
680			  (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
681			  VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
682			  VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
683
684	if (kr != KERN_SUCCESS)
685		return kr;
686
687	*addrp = CAST_DOWN(vm_offset_t, map_addr);
688	return KERN_SUCCESS;
689}
690
691/*
692 *	kmem_free:
693 *
694 *	Release a region of kernel virtual memory allocated
695 *	with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
696 *	and return the physical pages associated with that region.
697 */
698
699void
700kmem_free(
701	vm_map_t	map,
702	vm_offset_t	addr,
703	vm_size_t	size)
704{
705	kern_return_t kr;
706
707	assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
708
709	TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
710
711	if(size == 0) {
712#if MACH_ASSERT
713		printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr);
714#endif
715		return;
716	}
717
718	kr = vm_map_remove(map, vm_map_trunc_page(addr),
719				vm_map_round_page(addr + size),
720				VM_MAP_REMOVE_KUNWIRE);
721	if (kr != KERN_SUCCESS)
722		panic("kmem_free");
723}
724
725/*
726 *	Allocate new pages in an object.
727 */
728
729kern_return_t
730kmem_alloc_pages(
731	register vm_object_t		object,
732	register vm_object_offset_t	offset,
733	register vm_object_size_t	size)
734{
735	vm_object_size_t		alloc_size;
736
737	alloc_size = vm_object_round_page(size);
738        vm_object_lock(object);
739	while (alloc_size) {
740	    register vm_page_t	mem;
741
742
743	    /*
744	     *	Allocate a page
745	     */
746	    while (VM_PAGE_NULL ==
747		  (mem = vm_page_alloc(object, offset))) {
748		vm_object_unlock(object);
749		VM_PAGE_WAIT();
750		vm_object_lock(object);
751	    }
752	    mem->busy = FALSE;
753
754	    alloc_size -= PAGE_SIZE;
755	    offset += PAGE_SIZE;
756	}
757	vm_object_unlock(object);
758	return KERN_SUCCESS;
759}
760
761/*
762 *	Remap wired pages in an object into a new region.
763 *	The object is assumed to be mapped into the kernel map or
764 *	a submap.
765 */
766void
767kmem_remap_pages(
768	register vm_object_t		object,
769	register vm_object_offset_t	offset,
770	register vm_offset_t		start,
771	register vm_offset_t		end,
772	vm_prot_t			protection)
773{
774
775	vm_map_offset_t			map_start;
776	vm_map_offset_t			map_end;
777
778	/*
779	 *	Mark the pmap region as not pageable.
780	 */
781	map_start = vm_map_trunc_page(start);
782	map_end = vm_map_round_page(end);
783
784	pmap_pageable(kernel_pmap, map_start, map_end, FALSE);
785
786	while (map_start < map_end) {
787	    register vm_page_t	mem;
788
789	    vm_object_lock(object);
790
791	    /*
792	     *	Find a page
793	     */
794	    if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
795		panic("kmem_remap_pages");
796
797	    /*
798	     *	Wire it down (again)
799	     */
800	    vm_page_lockspin_queues();
801	    vm_page_wire(mem);
802	    vm_page_unlock_queues();
803	    vm_object_unlock(object);
804
805	    /*
806	     * ENCRYPTED SWAP:
807	     * The page is supposed to be wired now, so it
808	     * shouldn't be encrypted at this point.  It can
809	     * safely be entered in the page table.
810	     */
811	    ASSERT_PAGE_DECRYPTED(mem);
812
813	    /*
814	     *	Enter it in the kernel pmap.  The page isn't busy,
815	     *	but this shouldn't be a problem because it is wired.
816	     */
817
818	    mem->pmapped = TRUE;
819	    mem->wpmapped = TRUE;
820
821	    PMAP_ENTER(kernel_pmap, map_start, mem, protection, VM_PROT_NONE, 0, TRUE);
822
823	    map_start += PAGE_SIZE;
824	    offset += PAGE_SIZE;
825	}
826}
827
828/*
829 *	kmem_suballoc:
830 *
831 *	Allocates a map to manage a subrange
832 *	of the kernel virtual address space.
833 *
834 *	Arguments are as follows:
835 *
836 *	parent		Map to take range from
837 *	addr		Address of start of range (IN/OUT)
838 *	size		Size of range to find
839 *	pageable	Can region be paged
840 *	anywhere	Can region be located anywhere in map
841 *	new_map		Pointer to new submap
842 */
843kern_return_t
844kmem_suballoc(
845	vm_map_t	parent,
846	vm_offset_t	*addr,
847	vm_size_t	size,
848	boolean_t	pageable,
849	int		flags,
850	vm_map_t	*new_map)
851{
852	vm_map_t	map;
853	vm_map_offset_t	map_addr;
854	vm_map_size_t	map_size;
855	kern_return_t	kr;
856
857	map_size = vm_map_round_page(size);
858
859	/*
860	 *	Need reference on submap object because it is internal
861	 *	to the vm_system.  vm_object_enter will never be called
862	 *	on it (usual source of reference for vm_map_enter).
863	 */
864	vm_object_reference(vm_submap_object);
865
866	map_addr = (flags & VM_FLAGS_ANYWHERE) ?
867	           vm_map_min(parent) : vm_map_trunc_page(*addr);
868
869	kr = vm_map_enter(parent, &map_addr, map_size,
870			  (vm_map_offset_t) 0, flags,
871			  vm_submap_object, (vm_object_offset_t) 0, FALSE,
872			  VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
873
874    if (kr != KERN_SUCCESS) {
875		vm_object_deallocate(vm_submap_object);
876		return (kr);
877	}
878
879	pmap_reference(vm_map_pmap(parent));
880	map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
881	if (map == VM_MAP_NULL)
882		panic("kmem_suballoc: vm_map_create failed");	/* "can't happen" */
883
884	kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
885	if (kr != KERN_SUCCESS) {
886		/*
887		 * See comment preceding vm_map_submap().
888		 */
889		vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS);
890		vm_map_deallocate(map);	/* also removes ref to pmap */
891		vm_object_deallocate(vm_submap_object);
892		return (kr);
893	}
894	*addr = CAST_DOWN(vm_offset_t, map_addr);
895	*new_map = map;
896	return (KERN_SUCCESS);
897}
898
899/*
900 *	kmem_init:
901 *
902 *	Initialize the kernel's virtual memory map, taking
903 *	into account all memory allocated up to this time.
904 */
905void
906kmem_init(
907	vm_offset_t	start,
908	vm_offset_t	end)
909{
910	vm_map_offset_t map_start;
911	vm_map_offset_t map_end;
912
913	map_start = vm_map_trunc_page(start);
914	map_end = vm_map_round_page(end);
915
916	kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS,
917			    map_end, FALSE);
918
919#if 0
920    printf("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx)\n",
921          (uint64_t) map_start, (uint64_t) map_end,
922          (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
923           (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS));
924#endif
925
926	/*
927	 *	Reserve virtual memory allocated up to this time.
928	 */
929	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
930		vm_map_offset_t map_addr;
931		kern_return_t kr;
932
933		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
934		kr = vm_map_enter(kernel_map,
935			&map_addr,
936		    	(vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
937			(vm_map_offset_t) 0,
938			VM_FLAGS_FIXED | VM_FLAGS_NO_PMAP_CHECK,
939			VM_OBJECT_NULL,
940			(vm_object_offset_t) 0, FALSE,
941			VM_PROT_NONE, VM_PROT_NONE,
942			VM_INHERIT_DEFAULT);
943
944		if (kr != KERN_SUCCESS) {
945			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
946			      (uint64_t) start, (uint64_t) end,
947			      (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
948			      (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
949			      kr);
950		}
951	}
952
953	/*
954	 * Set the default global user wire limit which limits the amount of
955	 * memory that can be locked via mlock().  We set this to the total
956	 * amount of memory that are potentially usable by a user app (max_mem)
957	 * minus a certain amount.  This can be overridden via a sysctl.
958	 */
959	vm_global_no_user_wire_amount = MIN(max_mem*20/100,
960					    VM_NOT_USER_WIREABLE);
961	vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount;
962
963	/* the default per user limit is the same as the global limit */
964	vm_user_wire_limit = vm_global_user_wire_limit;
965}
966
967
968/*
969 *	Routine:	copyinmap
970 *	Purpose:
971 *		Like copyin, except that fromaddr is an address
972 *		in the specified VM map.  This implementation
973 *		is incomplete; it handles the current user map
974 *		and the kernel map/submaps.
975 */
976kern_return_t
977copyinmap(
978	vm_map_t		map,
979	vm_map_offset_t		fromaddr,
980	void			*todata,
981	vm_size_t		length)
982{
983	kern_return_t	kr = KERN_SUCCESS;
984	vm_map_t oldmap;
985
986	if (vm_map_pmap(map) == pmap_kernel())
987	{
988		/* assume a correct copy */
989		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
990	}
991	else if (current_map() == map)
992	{
993		if (copyin(fromaddr, todata, length) != 0)
994			kr = KERN_INVALID_ADDRESS;
995	}
996	else
997	{
998		vm_map_reference(map);
999		oldmap = vm_map_switch(map);
1000		if (copyin(fromaddr, todata, length) != 0)
1001			kr = KERN_INVALID_ADDRESS;
1002		vm_map_switch(oldmap);
1003		vm_map_deallocate(map);
1004	}
1005	return kr;
1006}
1007
1008/*
1009 *	Routine:	copyoutmap
1010 *	Purpose:
1011 *		Like copyout, except that toaddr is an address
1012 *		in the specified VM map.  This implementation
1013 *		is incomplete; it handles the current user map
1014 *		and the kernel map/submaps.
1015 */
1016kern_return_t
1017copyoutmap(
1018	vm_map_t		map,
1019	void			*fromdata,
1020	vm_map_address_t	toaddr,
1021	vm_size_t		length)
1022{
1023	if (vm_map_pmap(map) == pmap_kernel()) {
1024		/* assume a correct copy */
1025		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1026		return KERN_SUCCESS;
1027	}
1028
1029	if (current_map() != map)
1030		return KERN_NOT_SUPPORTED;
1031
1032	if (copyout(fromdata, toaddr, length) != 0)
1033		return KERN_INVALID_ADDRESS;
1034
1035	return KERN_SUCCESS;
1036}
1037
1038
1039kern_return_t
1040vm_conflict_check(
1041	vm_map_t		map,
1042	vm_map_offset_t	off,
1043	vm_map_size_t		len,
1044	memory_object_t	pager,
1045	vm_object_offset_t	file_off)
1046{
1047	vm_map_entry_t		entry;
1048	vm_object_t		obj;
1049	vm_object_offset_t	obj_off;
1050	vm_map_t		base_map;
1051	vm_map_offset_t		base_offset;
1052	vm_map_offset_t		original_offset;
1053	kern_return_t		kr;
1054	vm_map_size_t		local_len;
1055
1056	base_map = map;
1057	base_offset = off;
1058	original_offset = off;
1059	kr = KERN_SUCCESS;
1060	vm_map_lock(map);
1061	while(vm_map_lookup_entry(map, off, &entry)) {
1062		local_len = len;
1063
1064		if (entry->object.vm_object == VM_OBJECT_NULL) {
1065			vm_map_unlock(map);
1066			return KERN_SUCCESS;
1067		}
1068		if (entry->is_sub_map) {
1069			vm_map_t	old_map;
1070
1071			old_map = map;
1072			vm_map_lock(entry->object.sub_map);
1073			map = entry->object.sub_map;
1074			off = entry->offset + (off - entry->vme_start);
1075			vm_map_unlock(old_map);
1076			continue;
1077		}
1078		obj = entry->object.vm_object;
1079		obj_off = (off - entry->vme_start) + entry->offset;
1080		while(obj->shadow) {
1081			obj_off += obj->vo_shadow_offset;
1082			obj = obj->shadow;
1083		}
1084		if((obj->pager_created) && (obj->pager == pager)) {
1085			if(((obj->paging_offset) + obj_off) == file_off) {
1086				if(off != base_offset) {
1087					vm_map_unlock(map);
1088					return KERN_FAILURE;
1089				}
1090				kr = KERN_ALREADY_WAITING;
1091			} else {
1092			       	vm_object_offset_t	obj_off_aligned;
1093				vm_object_offset_t	file_off_aligned;
1094
1095				obj_off_aligned = obj_off & ~PAGE_MASK;
1096				file_off_aligned = file_off & ~PAGE_MASK;
1097
1098				if (file_off_aligned == (obj->paging_offset + obj_off_aligned)) {
1099				        /*
1100					 * the target map and the file offset start in the same page
1101					 * but are not identical...
1102					 */
1103				        vm_map_unlock(map);
1104					return KERN_FAILURE;
1105				}
1106				if ((file_off < (obj->paging_offset + obj_off_aligned)) &&
1107				    ((file_off + len) > (obj->paging_offset + obj_off_aligned))) {
1108				        /*
1109					 * some portion of the tail of the I/O will fall
1110					 * within the encompass of the target map
1111					 */
1112				        vm_map_unlock(map);
1113					return KERN_FAILURE;
1114				}
1115				if ((file_off_aligned > (obj->paging_offset + obj_off)) &&
1116				    (file_off_aligned < (obj->paging_offset + obj_off) + len)) {
1117				        /*
1118					 * the beginning page of the file offset falls within
1119					 * the target map's encompass
1120					 */
1121				        vm_map_unlock(map);
1122					return KERN_FAILURE;
1123				}
1124			}
1125		} else if(kr != KERN_SUCCESS) {
1126		        vm_map_unlock(map);
1127			return KERN_FAILURE;
1128		}
1129
1130		if(len <= ((entry->vme_end - entry->vme_start) -
1131						(off - entry->vme_start))) {
1132			vm_map_unlock(map);
1133			return kr;
1134		} else {
1135			len -= (entry->vme_end - entry->vme_start) -
1136						(off - entry->vme_start);
1137		}
1138		base_offset = base_offset + (local_len - len);
1139		file_off = file_off + (local_len - len);
1140		off = base_offset;
1141		if(map != base_map) {
1142			vm_map_unlock(map);
1143			vm_map_lock(base_map);
1144			map = base_map;
1145		}
1146	}
1147
1148	vm_map_unlock(map);
1149	return kr;
1150}
1151