1/*
2 * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 *	Kernel stack management routines.
30 */
31
32#include <mach/mach_host.h>
33#include <mach/mach_types.h>
34#include <mach/processor_set.h>
35
36#include <kern/kern_types.h>
37#include <kern/mach_param.h>
38#include <kern/processor.h>
39#include <kern/thread.h>
40#include <kern/zalloc.h>
41#include <kern/kalloc.h>
42#include <kern/ledger.h>
43
44#include <vm/vm_map.h>
45#include <vm/vm_kern.h>
46
47#include <mach_debug.h>
48
49/*
50 *	We allocate stacks from generic kernel VM.
51 *
52 *	The stack_free_list can only be accessed at splsched,
53 *	because stack_alloc_try/thread_invoke operate at splsched.
54 */
55
56decl_simple_lock_data(static,stack_lock_data)
57#define stack_lock()		simple_lock(&stack_lock_data)
58#define stack_unlock()		simple_unlock(&stack_lock_data)
59
60#define STACK_CACHE_SIZE	2
61
62static vm_offset_t		stack_free_list;
63
64static unsigned int		stack_free_count, stack_free_hiwat;		/* free list count */
65static unsigned int		stack_hiwat;
66unsigned int			stack_total;				/* current total count */
67unsigned long long		stack_allocs;				/* total count of allocations */
68
69static int			stack_fake_zone_index = -1;	/* index in zone_info array */
70
71static unsigned int		stack_free_target;
72static int				stack_free_delta;
73
74static unsigned int		stack_new_count;						/* total new stack allocations */
75
76static vm_offset_t		stack_addr_mask;
77
78unsigned int			kernel_stack_pages;
79vm_offset_t			kernel_stack_size;
80vm_offset_t			kernel_stack_mask;
81vm_offset_t			kernel_stack_depth_max;
82
83static inline void
84STACK_ZINFO_PALLOC(thread_t thread)
85{
86	task_t task;
87	zinfo_usage_t zinfo;
88
89	ledger_credit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
90
91	if (stack_fake_zone_index != -1 &&
92	    (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
93		OSAddAtomic64(kernel_stack_size,
94			      (int64_t *)&zinfo[stack_fake_zone_index].alloc);
95}
96
97static inline void
98STACK_ZINFO_PFREE(thread_t thread)
99{
100	task_t task;
101	zinfo_usage_t zinfo;
102
103	ledger_debit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
104
105	if (stack_fake_zone_index != -1 &&
106	    (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
107		OSAddAtomic64(kernel_stack_size,
108			      (int64_t *)&zinfo[stack_fake_zone_index].free);
109}
110
111static inline void
112STACK_ZINFO_HANDOFF(thread_t from, thread_t to)
113{
114	ledger_debit(from->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
115	ledger_credit(to->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
116
117	if (stack_fake_zone_index != -1) {
118		task_t task;
119		zinfo_usage_t zinfo;
120
121		if ((task = from->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
122			OSAddAtomic64(kernel_stack_size,
123				      (int64_t *)&zinfo[stack_fake_zone_index].free);
124
125		if ((task = to->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
126			OSAddAtomic64(kernel_stack_size,
127				      (int64_t *)&zinfo[stack_fake_zone_index].alloc);
128	}
129}
130
131/*
132 *	The next field is at the base of the stack,
133 *	so the low end is left unsullied.
134 */
135#define stack_next(stack)	\
136	(*((vm_offset_t *)((stack) + kernel_stack_size) - 1))
137
138static inline int
139log2(vm_offset_t size)
140{
141	int	result;
142	for (result = 0; size > 0; result++)
143		size >>= 1;
144	return result;
145}
146
147static inline vm_offset_t
148roundup_pow2(vm_offset_t size)
149{
150	return 1UL << (log2(size - 1) + 1);
151}
152
153static vm_offset_t stack_alloc_internal(void);
154static void stack_free_stack(vm_offset_t);
155
156void
157stack_init(void)
158{
159	simple_lock_init(&stack_lock_data, 0);
160
161	kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE;
162	kernel_stack_size = KERNEL_STACK_SIZE;
163	kernel_stack_mask = -KERNEL_STACK_SIZE;
164	kernel_stack_depth_max = 0;
165
166	if (PE_parse_boot_argn("kernel_stack_pages",
167			       &kernel_stack_pages,
168			       sizeof (kernel_stack_pages))) {
169		kernel_stack_size = kernel_stack_pages * PAGE_SIZE;
170		printf("stack_init: kernel_stack_pages=%d kernel_stack_size=%p\n",
171			kernel_stack_pages, (void *) kernel_stack_size);
172	}
173
174	if (kernel_stack_size < round_page(kernel_stack_size))
175		panic("stack_init: stack size %p not a multiple of page size %d\n",
176			(void *) kernel_stack_size, PAGE_SIZE);
177
178	stack_addr_mask = roundup_pow2(kernel_stack_size) - 1;
179	kernel_stack_mask = ~stack_addr_mask;
180}
181
182/*
183 *	stack_alloc:
184 *
185 *	Allocate a stack for a thread, may
186 *	block.
187 */
188
189static vm_offset_t
190stack_alloc_internal(void)
191{
192	vm_offset_t		stack;
193	spl_t			s;
194	int			guard_flags;
195
196	s = splsched();
197	stack_lock();
198	stack_allocs++;
199	stack = stack_free_list;
200	if (stack != 0) {
201		stack_free_list = stack_next(stack);
202		stack_free_count--;
203	}
204	else {
205		if (++stack_total > stack_hiwat)
206			stack_hiwat = stack_total;
207		stack_new_count++;
208	}
209	stack_free_delta--;
210	stack_unlock();
211	splx(s);
212
213	if (stack == 0) {
214
215		/*
216		 * Request guard pages on either side of the stack.  Ask
217		 * kernel_memory_allocate() for two extra pages to account
218		 * for these.
219		 */
220
221		guard_flags = KMA_GUARD_FIRST | KMA_GUARD_LAST;
222		if (kernel_memory_allocate(kernel_map, &stack,
223					   kernel_stack_size + (2*PAGE_SIZE),
224					   stack_addr_mask,
225					   KMA_KSTACK | KMA_KOBJECT | guard_flags)
226		    != KERN_SUCCESS)
227			panic("stack_alloc: kernel_memory_allocate");
228
229		/*
230		 * The stack address that comes back is the address of the lower
231		 * guard page.  Skip past it to get the actual stack base address.
232		 */
233
234		stack += PAGE_SIZE;
235	}
236	return stack;
237}
238
239void
240stack_alloc(
241	thread_t	thread)
242{
243
244	assert(thread->kernel_stack == 0);
245	machine_stack_attach(thread, stack_alloc_internal());
246	STACK_ZINFO_PALLOC(thread);
247}
248
249void
250stack_handoff(thread_t from, thread_t to)
251{
252	assert(from == current_thread());
253	machine_stack_handoff(from, to);
254	STACK_ZINFO_HANDOFF(from, to);
255}
256
257/*
258 *	stack_free:
259 *
260 *	Detach and free the stack for a thread.
261 */
262void
263stack_free(
264	thread_t	thread)
265{
266    vm_offset_t		stack = machine_stack_detach(thread);
267
268	assert(stack);
269	if (stack != thread->reserved_stack) {
270		STACK_ZINFO_PFREE(thread);
271		stack_free_stack(stack);
272	}
273}
274
275void
276stack_free_reserved(
277	thread_t	thread)
278{
279	if (thread->reserved_stack != thread->kernel_stack) {
280		stack_free_stack(thread->reserved_stack);
281		STACK_ZINFO_PFREE(thread);
282	}
283}
284
285static void
286stack_free_stack(
287	vm_offset_t		stack)
288{
289	struct stack_cache	*cache;
290	spl_t				s;
291
292	s = splsched();
293	cache = &PROCESSOR_DATA(current_processor(), stack_cache);
294	if (cache->count < STACK_CACHE_SIZE) {
295		stack_next(stack) = cache->free;
296		cache->free = stack;
297		cache->count++;
298	}
299	else {
300		stack_lock();
301		stack_next(stack) = stack_free_list;
302		stack_free_list = stack;
303		if (++stack_free_count > stack_free_hiwat)
304			stack_free_hiwat = stack_free_count;
305		stack_free_delta++;
306		stack_unlock();
307	}
308	splx(s);
309}
310
311/*
312 *	stack_alloc_try:
313 *
314 *	Non-blocking attempt to allocate a
315 *	stack for a thread.
316 *
317 *	Returns TRUE on success.
318 *
319 *	Called at splsched.
320 */
321boolean_t
322stack_alloc_try(
323	thread_t		thread)
324{
325	struct stack_cache	*cache;
326	vm_offset_t			stack;
327
328	cache = &PROCESSOR_DATA(current_processor(), stack_cache);
329	stack = cache->free;
330	if (stack != 0) {
331		STACK_ZINFO_PALLOC(thread);
332		cache->free = stack_next(stack);
333		cache->count--;
334	}
335	else {
336		if (stack_free_list != 0) {
337			stack_lock();
338			stack = stack_free_list;
339			if (stack != 0) {
340				STACK_ZINFO_PALLOC(thread);
341				stack_free_list = stack_next(stack);
342				stack_free_count--;
343				stack_free_delta--;
344			}
345			stack_unlock();
346		}
347	}
348
349	if (stack != 0 || (stack = thread->reserved_stack) != 0) {
350		machine_stack_attach(thread, stack);
351		return (TRUE);
352	}
353
354	return (FALSE);
355}
356
357static unsigned int		stack_collect_tick, last_stack_tick;
358
359/*
360 *	stack_collect:
361 *
362 *	Free excess kernel stacks, may
363 *	block.
364 */
365void
366stack_collect(void)
367{
368	if (stack_collect_tick != last_stack_tick) {
369		unsigned int	target;
370		vm_offset_t		stack;
371		spl_t			s;
372
373		s = splsched();
374		stack_lock();
375
376		target = stack_free_target + (STACK_CACHE_SIZE * processor_count);
377		target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta;
378
379		while (stack_free_count > target) {
380			stack = stack_free_list;
381			stack_free_list = stack_next(stack);
382			stack_free_count--; stack_total--;
383			stack_unlock();
384			splx(s);
385
386			/*
387			 * Get the stack base address, then decrement by one page
388			 * to account for the lower guard page.  Add two extra pages
389			 * to the size to account for the guard pages on both ends
390			 * that were originally requested when the stack was allocated
391			 * back in stack_alloc().
392			 */
393
394			stack = (vm_offset_t)vm_map_trunc_page(
395				stack,
396				VM_MAP_PAGE_MASK(kernel_map));
397			stack -= PAGE_SIZE;
398			if (vm_map_remove(
399				    kernel_map,
400				    stack,
401				    stack + kernel_stack_size+(2*PAGE_SIZE),
402				    VM_MAP_REMOVE_KUNWIRE)
403			    != KERN_SUCCESS)
404				panic("stack_collect: vm_map_remove");
405			stack = 0;
406
407			s = splsched();
408			stack_lock();
409
410			target = stack_free_target + (STACK_CACHE_SIZE * processor_count);
411			target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta;
412		}
413
414		last_stack_tick = stack_collect_tick;
415
416		stack_unlock();
417		splx(s);
418	}
419}
420
421/*
422 *	compute_stack_target:
423 *
424 *	Computes a new target free list count
425 *	based on recent alloc / free activity.
426 *
427 *	Limits stack collection to once per
428 *	computation period.
429 */
430void
431compute_stack_target(
432__unused void		*arg)
433{
434	spl_t		s;
435
436	s = splsched();
437	stack_lock();
438
439	if (stack_free_target > 5)
440		stack_free_target = (4 * stack_free_target) / 5;
441	else
442	if (stack_free_target > 0)
443		stack_free_target--;
444
445	stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta;
446
447	stack_free_delta = 0;
448	stack_collect_tick++;
449
450	stack_unlock();
451	splx(s);
452}
453
454void
455stack_fake_zone_init(int zone_index)
456{
457	stack_fake_zone_index = zone_index;
458}
459
460void
461stack_fake_zone_info(int *count,
462		     vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
463		     uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
464{
465	unsigned int	total, hiwat, free;
466	unsigned long long all;
467	spl_t			s;
468
469	s = splsched();
470	stack_lock();
471	all = stack_allocs;
472	total = stack_total;
473	hiwat = stack_hiwat;
474	free = stack_free_count;
475	stack_unlock();
476	splx(s);
477
478	*count      = total - free;
479	*cur_size   = kernel_stack_size * total;
480	*max_size   = kernel_stack_size * hiwat;
481	*elem_size  = kernel_stack_size;
482	*alloc_size = kernel_stack_size;
483	*sum_size = all * kernel_stack_size;
484
485	*collectable = 1;
486	*exhaustable = 0;
487	*caller_acct = 1;
488}
489
490/* OBSOLETE */
491void	stack_privilege(
492			thread_t	thread);
493
494void
495stack_privilege(
496	__unused thread_t	thread)
497{
498	/* OBSOLETE */
499}
500
501/*
502 * Return info on stack usage for threads in a specific processor set
503 */
504kern_return_t
505processor_set_stack_usage(
506	processor_set_t	pset,
507	unsigned int	*totalp,
508	vm_size_t	*spacep,
509	vm_size_t	*residentp,
510	vm_size_t	*maxusagep,
511	vm_offset_t	*maxstackp)
512{
513#if !MACH_DEBUG
514        return KERN_NOT_SUPPORTED;
515#else
516	unsigned int total;
517	vm_size_t maxusage;
518	vm_offset_t maxstack;
519
520	register thread_t *thread_list;
521	register thread_t thread;
522
523	unsigned int actual;	/* this many things */
524	unsigned int i;
525
526	vm_size_t size, size_needed;
527	void *addr;
528
529	if (pset == PROCESSOR_SET_NULL || pset != &pset0)
530		return KERN_INVALID_ARGUMENT;
531
532	size = 0;
533	addr = NULL;
534
535	for (;;) {
536		lck_mtx_lock(&tasks_threads_lock);
537
538		actual = threads_count;
539
540		/* do we have the memory we need? */
541
542		size_needed = actual * sizeof(thread_t);
543		if (size_needed <= size)
544			break;
545
546		lck_mtx_unlock(&tasks_threads_lock);
547
548		if (size != 0)
549			kfree(addr, size);
550
551		assert(size_needed > 0);
552		size = size_needed;
553
554		addr = kalloc(size);
555		if (addr == 0)
556			return KERN_RESOURCE_SHORTAGE;
557	}
558
559	/* OK, have memory and list is locked */
560	thread_list = (thread_t *) addr;
561	for (i = 0, thread = (thread_t)(void *) queue_first(&threads);
562					!queue_end(&threads, (queue_entry_t) thread);
563					thread = (thread_t)(void *) queue_next(&thread->threads)) {
564		thread_reference_internal(thread);
565		thread_list[i++] = thread;
566	}
567	assert(i <= actual);
568
569	lck_mtx_unlock(&tasks_threads_lock);
570
571	/* calculate maxusage and free thread references */
572
573	total = 0;
574	maxusage = 0;
575	maxstack = 0;
576	while (i > 0) {
577		thread_t threadref = thread_list[--i];
578
579		if (threadref->kernel_stack != 0)
580			total++;
581
582		thread_deallocate(threadref);
583	}
584
585	if (size != 0)
586		kfree(addr, size);
587
588	*totalp = total;
589	*residentp = *spacep = total * round_page(kernel_stack_size);
590	*maxusagep = maxusage;
591	*maxstackp = maxstack;
592	return KERN_SUCCESS;
593
594#endif	/* MACH_DEBUG */
595}
596
597vm_offset_t min_valid_stack_address(void)
598{
599	return (vm_offset_t)vm_map_min(kernel_map);
600}
601
602vm_offset_t max_valid_stack_address(void)
603{
604	return (vm_offset_t)vm_map_max(kernel_map);
605}
606