180021Sjasone/*
280021Sjasone * Copyright (c) 2001 Daniel Eischen <deischen@freebsd.org>
380021Sjasone * Copyright (c) 2000-2001 Jason Evans <jasone@freebsd.org>
480021Sjasone * All rights reserved.
580021Sjasone *
680021Sjasone * Redistribution and use in source and binary forms, with or without
780021Sjasone * modification, are permitted provided that the following conditions
880021Sjasone * are met:
980021Sjasone * 1. Redistributions of source code must retain the above copyright
1080021Sjasone *    notice, this list of conditions and the following disclaimer.
1180021Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1280021Sjasone *    notice, this list of conditions and the following disclaimer in the
1380021Sjasone *    documentation and/or other materials provided with the distribution.
1480021Sjasone *
1580021Sjasone * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1680021Sjasone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1780021Sjasone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1880021Sjasone * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1980021Sjasone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2080021Sjasone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2180021Sjasone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2280021Sjasone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2380021Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2480021Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2580021Sjasone * SUCH DAMAGE.
2680021Sjasone *
2780021Sjasone * $FreeBSD$
2880021Sjasone */
29174112Sdeischen
30174112Sdeischen#include "namespace.h"
3180021Sjasone#include <sys/types.h>
3280021Sjasone#include <sys/mman.h>
3380021Sjasone#include <sys/queue.h>
3480021Sjasone#include <stdlib.h>
3580021Sjasone#include <pthread.h>
36174112Sdeischen#include "un-namespace.h"
37103388Smini#include "thr_private.h"
3880021Sjasone
3980021Sjasone/* Spare thread stack. */
4080021Sjasonestruct stack {
4180021Sjasone	LIST_ENTRY(stack)	qe;		/* Stack queue linkage. */
4280021Sjasone	size_t			stacksize;	/* Stack size (rounded up). */
4380021Sjasone	size_t			guardsize;	/* Guard size. */
4480021Sjasone	void			*stackaddr;	/* Stack address. */
4580021Sjasone};
4680021Sjasone
4780021Sjasone/*
48113658Sdeischen * Default sized (stack and guard) spare stack queue.  Stacks are cached
49113658Sdeischen * to avoid additional complexity managing mmap()ed stack regions.  Spare
50113658Sdeischen * stacks are used in LIFO order to increase cache locality.
5180021Sjasone */
52113658Sdeischenstatic LIST_HEAD(, stack)	dstackq = LIST_HEAD_INITIALIZER(dstackq);
5380021Sjasone
5480021Sjasone/*
5580021Sjasone * Miscellaneous sized (non-default stack and/or guard) spare stack queue.
56113658Sdeischen * Stacks are cached to avoid additional complexity managing mmap()ed
57113658Sdeischen * stack regions.  This list is unordered, since ordering on both stack
58113658Sdeischen * size and guard size would be more trouble than it's worth.  Stacks are
59113658Sdeischen * allocated from this cache on a first size match basis.
6080021Sjasone */
61113658Sdeischenstatic LIST_HEAD(, stack)	mstackq = LIST_HEAD_INITIALIZER(mstackq);
6280021Sjasone
6380021Sjasone/**
64113658Sdeischen * Base address of the last stack allocated (including its red zone, if
65113658Sdeischen * there is one).  Stacks are allocated contiguously, starting beyond the
66113658Sdeischen * top of the main stack.  When a new stack is created, a red zone is
67136190Sdavidxu * typically created (actually, the red zone is mapped with PROT_NONE) above
68113658Sdeischen * the top of the stack, such that the stack will not be able to grow all
69113658Sdeischen * the way to the bottom of the next stack.  This isn't fool-proof.  It is
70113658Sdeischen * possible for a stack to grow by a large amount, such that it grows into
71113658Sdeischen * the next stack, and as long as the memory within the red zone is never
72113658Sdeischen * accessed, nothing will prevent one thread stack from trouncing all over
73113658Sdeischen * the next.
7480021Sjasone *
7580021Sjasone * low memory
7680021Sjasone *     . . . . . . . . . . . . . . . . . .
7780021Sjasone *    |                                   |
7880021Sjasone *    |             stack 3               | start of 3rd thread stack
7980021Sjasone *    +-----------------------------------+
8080021Sjasone *    |                                   |
8180021Sjasone *    |       Red Zone (guard page)       | red zone for 2nd thread
8280021Sjasone *    |                                   |
8380021Sjasone *    +-----------------------------------+
8480021Sjasone *    |  stack 2 - PTHREAD_STACK_DEFAULT  | top of 2nd thread stack
8580021Sjasone *    |                                   |
8680021Sjasone *    |                                   |
8780021Sjasone *    |                                   |
8880021Sjasone *    |                                   |
8980021Sjasone *    |             stack 2               |
9080021Sjasone *    +-----------------------------------+ <-- start of 2nd thread stack
9180021Sjasone *    |                                   |
9280021Sjasone *    |       Red Zone                    | red zone for 1st thread
9380021Sjasone *    |                                   |
9480021Sjasone *    +-----------------------------------+
9580021Sjasone *    |  stack 1 - PTHREAD_STACK_DEFAULT  | top of 1st thread stack
9680021Sjasone *    |                                   |
9780021Sjasone *    |                                   |
9880021Sjasone *    |                                   |
9980021Sjasone *    |                                   |
10080021Sjasone *    |             stack 1               |
10180021Sjasone *    +-----------------------------------+ <-- start of 1st thread stack
10280021Sjasone *    |                                   |   (initial value of last_stack)
10380021Sjasone *    |       Red Zone                    |
10480021Sjasone *    |                                   | red zone for main thread
10580021Sjasone *    +-----------------------------------+
10680021Sjasone *    | USRSTACK - PTHREAD_STACK_INITIAL  | top of main thread stack
10780021Sjasone *    |                                   | ^
10880021Sjasone *    |                                   | |
10980021Sjasone *    |                                   | |
11080021Sjasone *    |                                   | | stack growth
11180021Sjasone *    |                                   |
11280021Sjasone *    +-----------------------------------+ <-- start of main thread stack
11380021Sjasone *                                              (USRSTACK)
11480021Sjasone * high memory
11580021Sjasone *
11680021Sjasone */
117113658Sdeischenstatic void *last_stack = NULL;
11880021Sjasone
119120072Sdavidxu/*
120120072Sdavidxu * Round size up to the nearest multiple of
121120072Sdavidxu * _thr_page_size.
122120072Sdavidxu */
123120072Sdavidxustatic inline size_t
124120072Sdavidxuround_up(size_t size)
125120072Sdavidxu{
126120072Sdavidxu	if (size % _thr_page_size != 0)
127120072Sdavidxu		size = ((size / _thr_page_size) + 1) *
128120072Sdavidxu		    _thr_page_size;
129174112Sdeischen	return (size);
130120072Sdavidxu}
131120072Sdavidxu
132113658Sdeischenint
133113658Sdeischen_thr_stack_alloc(struct pthread_attr *attr)
13480021Sjasone{
135113658Sdeischen	struct stack *spare_stack;
136113658Sdeischen	struct kse *curkse;
137113658Sdeischen	kse_critical_t crit;
138113658Sdeischen	size_t stacksize;
139113658Sdeischen	size_t guardsize;
140136190Sdavidxu	char *stackaddr;
14180021Sjasone
14280021Sjasone	/*
143113658Sdeischen	 * Round up stack size to nearest multiple of _thr_page_size so
144113658Sdeischen	 * that mmap() * will work.  If the stack size is not an even
145113658Sdeischen	 * multiple, we end up initializing things such that there is
146113658Sdeischen	 * unused space above the beginning of the stack, so the stack
147113658Sdeischen	 * sits snugly against its guard.
14880021Sjasone	 */
149120072Sdavidxu	stacksize = round_up(attr->stacksize_attr);
150120072Sdavidxu	guardsize = round_up(attr->guardsize_attr);
151120072Sdavidxu
152113658Sdeischen	attr->stackaddr_attr = NULL;
153113658Sdeischen	attr->flags &= ~THR_STACK_USER;
15480021Sjasone
15580021Sjasone	/*
156113658Sdeischen	 * Use the garbage collector lock for synchronization of the
157113658Sdeischen	 * spare stack lists and allocations from usrstack.
158113658Sdeischen	 */
159113658Sdeischen	crit = _kse_critical_enter();
160113658Sdeischen	curkse = _get_curkse();
161113658Sdeischen	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
162113658Sdeischen	/*
16380021Sjasone	 * If the stack and guard sizes are default, try to allocate a stack
16480021Sjasone	 * from the default-size stack cache:
16580021Sjasone	 */
166141822Sdeischen	if ((stacksize == _thr_stack_default) &&
167113658Sdeischen	    (guardsize == _thr_guard_default)) {
168113658Sdeischen		if ((spare_stack = LIST_FIRST(&dstackq)) != NULL) {
169113658Sdeischen			/* Use the spare stack. */
17080021Sjasone			LIST_REMOVE(spare_stack, qe);
171113658Sdeischen			attr->stackaddr_attr = spare_stack->stackaddr;
17280021Sjasone		}
17380021Sjasone	}
17480021Sjasone	/*
17580021Sjasone	 * The user specified a non-default stack and/or guard size, so try to
17680021Sjasone	 * allocate a stack from the non-default size stack cache, using the
17780021Sjasone	 * rounded up stack size (stack_size) in the search:
17880021Sjasone	 */
17980021Sjasone	else {
180113658Sdeischen		LIST_FOREACH(spare_stack, &mstackq, qe) {
181113658Sdeischen			if (spare_stack->stacksize == stacksize &&
18280021Sjasone			    spare_stack->guardsize == guardsize) {
18380021Sjasone				LIST_REMOVE(spare_stack, qe);
184113658Sdeischen				attr->stackaddr_attr = spare_stack->stackaddr;
18580021Sjasone				break;
18680021Sjasone			}
18780021Sjasone		}
18880021Sjasone	}
189113658Sdeischen	if (attr->stackaddr_attr != NULL) {
190113658Sdeischen		/* A cached stack was found.  Release the lock. */
191113658Sdeischen		KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
192113658Sdeischen		_kse_critical_leave(crit);
193113658Sdeischen	}
194113658Sdeischen	else {
195113658Sdeischen		/* Allocate a stack from usrstack. */
19685567Speter		if (last_stack == NULL)
197174112Sdeischen			last_stack = (void *)((uintptr_t)_usrstack -
198174112Sdeischen			    (uintptr_t)_thr_stack_initial -
199174112Sdeischen			    (uintptr_t)_thr_guard_default);
20085567Speter
20180021Sjasone		/* Allocate a new stack. */
202174112Sdeischen		stackaddr = (void *)((uintptr_t)last_stack -
203174112Sdeischen		    (uintptr_t)stacksize - (uintptr_t)guardsize);
20480021Sjasone
20580021Sjasone		/*
206113658Sdeischen		 * Even if stack allocation fails, we don't want to try to
207113658Sdeischen		 * use this location again, so unconditionally decrement
20880021Sjasone		 * last_stack.  Under normal operating conditions, the most
209113658Sdeischen		 * likely reason for an mmap() error is a stack overflow of
210113658Sdeischen		 * the adjacent thread stack.
21180021Sjasone		 */
212174112Sdeischen		last_stack = (void *)((uintptr_t)last_stack -
213174112Sdeischen		    (uintptr_t)(stacksize + guardsize));
21480021Sjasone
215113658Sdeischen		/* Release the lock before mmap'ing it. */
216113658Sdeischen		KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
217113658Sdeischen		_kse_critical_leave(crit);
218113658Sdeischen
219136190Sdavidxu		/* Map the stack and guard page together, and split guard
220136190Sdavidxu		   page from allocated space: */
221136190Sdavidxu		if ((stackaddr = mmap(stackaddr, stacksize+guardsize,
222136190Sdavidxu		     PROT_READ | PROT_WRITE, MAP_STACK,
223136190Sdavidxu		     -1, 0)) != MAP_FAILED &&
224136190Sdavidxu		    (guardsize == 0 ||
225136190Sdavidxu		     mprotect(stackaddr, guardsize, PROT_NONE) == 0)) {
226136190Sdavidxu			stackaddr += guardsize;
227136190Sdavidxu		} else {
228136190Sdavidxu			if (stackaddr != MAP_FAILED)
229136190Sdavidxu				munmap(stackaddr, stacksize + guardsize);
230136190Sdavidxu			stackaddr = NULL;
231136190Sdavidxu		}
232136190Sdavidxu		attr->stackaddr_attr = stackaddr;
23380021Sjasone	}
234113658Sdeischen	if (attr->stackaddr_attr != NULL)
235113658Sdeischen		return (0);
236113658Sdeischen	else
237113658Sdeischen		return (-1);
23880021Sjasone}
23980021Sjasone
240113658Sdeischen/* This function must be called with _thread_list_lock held. */
24180021Sjasonevoid
242113658Sdeischen_thr_stack_free(struct pthread_attr *attr)
24380021Sjasone{
244113658Sdeischen	struct stack *spare_stack;
24580021Sjasone
246113658Sdeischen	if ((attr != NULL) && ((attr->flags & THR_STACK_USER) == 0)
247113658Sdeischen	    && (attr->stackaddr_attr != NULL)) {
248174112Sdeischen		spare_stack = (struct stack *)((uintptr_t)attr->stackaddr_attr
249174112Sdeischen		    + (uintptr_t)attr->stacksize_attr - sizeof(struct stack));
250120072Sdavidxu		spare_stack->stacksize = round_up(attr->stacksize_attr);
251120072Sdavidxu		spare_stack->guardsize = round_up(attr->guardsize_attr);
252113658Sdeischen		spare_stack->stackaddr = attr->stackaddr_attr;
25380021Sjasone
254141822Sdeischen		if (spare_stack->stacksize == _thr_stack_default &&
255113658Sdeischen		    spare_stack->guardsize == _thr_guard_default) {
256113658Sdeischen			/* Default stack/guard size. */
257113658Sdeischen			LIST_INSERT_HEAD(&dstackq, spare_stack, qe);
258113658Sdeischen		} else {
259113658Sdeischen			/* Non-default stack/guard size. */
260113658Sdeischen			LIST_INSERT_HEAD(&mstackq, spare_stack, qe);
261113658Sdeischen		}
262113658Sdeischen		attr->stackaddr_attr = NULL;
26380021Sjasone	}
26480021Sjasone}
265