1112918Sjeff/*
2112918Sjeff * Copyright (c) 2001 Daniel Eischen <deischen@freebsd.org>
3112918Sjeff * Copyright (c) 2000-2001 Jason Evans <jasone@freebsd.org>
4112918Sjeff * All rights reserved.
5112918Sjeff *
6112918Sjeff * Redistribution and use in source and binary forms, with or without
7112918Sjeff * modification, are permitted provided that the following conditions
8112918Sjeff * are met:
9112918Sjeff * 1. Redistributions of source code must retain the above copyright
10112918Sjeff *    notice, this list of conditions and the following disclaimer.
11112918Sjeff * 2. Redistributions in binary form must reproduce the above copyright
12112918Sjeff *    notice, this list of conditions and the following disclaimer in the
13112918Sjeff *    documentation and/or other materials provided with the distribution.
14112918Sjeff *
15112918Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16112918Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17112918Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18112918Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19112918Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20112918Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21112918Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22112918Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23112918Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24112918Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25112918Sjeff * SUCH DAMAGE.
26112918Sjeff *
27112918Sjeff * $FreeBSD$
28112918Sjeff */
29144518Sdavidxu
30112918Sjeff#include <sys/types.h>
31112918Sjeff#include <sys/mman.h>
32112918Sjeff#include <sys/queue.h>
33217224Skib#include <sys/resource.h>
34217224Skib#include <sys/sysctl.h>
35112918Sjeff#include <stdlib.h>
36112918Sjeff#include <pthread.h>
37217191Skib#include <link.h>
38144518Sdavidxu
39112918Sjeff#include "thr_private.h"
40112918Sjeff
41112918Sjeff/* Spare thread stack. */
42112918Sjeffstruct stack {
43112918Sjeff	LIST_ENTRY(stack)	qe;		/* Stack queue linkage. */
44112918Sjeff	size_t			stacksize;	/* Stack size (rounded up). */
45112918Sjeff	size_t			guardsize;	/* Guard size. */
46112918Sjeff	void			*stackaddr;	/* Stack address. */
47112918Sjeff};
48112918Sjeff
49112918Sjeff/*
50144518Sdavidxu * Default sized (stack and guard) spare stack queue.  Stacks are cached
51144518Sdavidxu * to avoid additional complexity managing mmap()ed stack regions.  Spare
52144518Sdavidxu * stacks are used in LIFO order to increase cache locality.
53112918Sjeff */
54144518Sdavidxustatic LIST_HEAD(, stack)	dstackq = LIST_HEAD_INITIALIZER(dstackq);
55112918Sjeff
56112918Sjeff/*
57112918Sjeff * Miscellaneous sized (non-default stack and/or guard) spare stack queue.
58144518Sdavidxu * Stacks are cached to avoid additional complexity managing mmap()ed
59144518Sdavidxu * stack regions.  This list is unordered, since ordering on both stack
60144518Sdavidxu * size and guard size would be more trouble than it's worth.  Stacks are
61144518Sdavidxu * allocated from this cache on a first size match basis.
62112918Sjeff */
63144518Sdavidxustatic LIST_HEAD(, stack)	mstackq = LIST_HEAD_INITIALIZER(mstackq);
64112918Sjeff
65112918Sjeff/**
66144518Sdavidxu * Base address of the last stack allocated (including its red zone, if
67144518Sdavidxu * there is one).  Stacks are allocated contiguously, starting beyond the
68144518Sdavidxu * top of the main stack.  When a new stack is created, a red zone is
69144518Sdavidxu * typically created (actually, the red zone is mapped with PROT_NONE) above
70144518Sdavidxu * the top of the stack, such that the stack will not be able to grow all
71144518Sdavidxu * the way to the bottom of the next stack.  This isn't fool-proof.  It is
72144518Sdavidxu * possible for a stack to grow by a large amount, such that it grows into
73144518Sdavidxu * the next stack, and as long as the memory within the red zone is never
74144518Sdavidxu * accessed, nothing will prevent one thread stack from trouncing all over
75144518Sdavidxu * the next.
76112918Sjeff *
77112918Sjeff * low memory
78112918Sjeff *     . . . . . . . . . . . . . . . . . .
79112918Sjeff *    |                                   |
80112918Sjeff *    |             stack 3               | start of 3rd thread stack
81112918Sjeff *    +-----------------------------------+
82112918Sjeff *    |                                   |
83112918Sjeff *    |       Red Zone (guard page)       | red zone for 2nd thread
84112918Sjeff *    |                                   |
85112918Sjeff *    +-----------------------------------+
86144518Sdavidxu *    |  stack 2 - _thr_stack_default     | top of 2nd thread stack
87112918Sjeff *    |                                   |
88112918Sjeff *    |                                   |
89112918Sjeff *    |                                   |
90112918Sjeff *    |                                   |
91112918Sjeff *    |             stack 2               |
92112918Sjeff *    +-----------------------------------+ <-- start of 2nd thread stack
93112918Sjeff *    |                                   |
94112918Sjeff *    |       Red Zone                    | red zone for 1st thread
95112918Sjeff *    |                                   |
96112918Sjeff *    +-----------------------------------+
97144518Sdavidxu *    |  stack 1 - _thr_stack_default     | top of 1st thread stack
98112918Sjeff *    |                                   |
99112918Sjeff *    |                                   |
100112918Sjeff *    |                                   |
101112918Sjeff *    |                                   |
102112918Sjeff *    |             stack 1               |
103112918Sjeff *    +-----------------------------------+ <-- start of 1st thread stack
104112918Sjeff *    |                                   |   (initial value of last_stack)
105112918Sjeff *    |       Red Zone                    |
106112918Sjeff *    |                                   | red zone for main thread
107112918Sjeff *    +-----------------------------------+
108144518Sdavidxu *    | USRSTACK - _thr_stack_initial     | top of main thread stack
109112918Sjeff *    |                                   | ^
110112918Sjeff *    |                                   | |
111112918Sjeff *    |                                   | |
112112918Sjeff *    |                                   | | stack growth
113112918Sjeff *    |                                   |
114112918Sjeff *    +-----------------------------------+ <-- start of main thread stack
115112918Sjeff *                                              (USRSTACK)
116112918Sjeff * high memory
117112918Sjeff *
118112918Sjeff */
119157457Sdavidxustatic char *last_stack = NULL;
120112918Sjeff
121144518Sdavidxu/*
122144518Sdavidxu * Round size up to the nearest multiple of
123144518Sdavidxu * _thr_page_size.
124144518Sdavidxu */
125144518Sdavidxustatic inline size_t
126144518Sdavidxuround_up(size_t size)
127112918Sjeff{
128144518Sdavidxu	if (size % _thr_page_size != 0)
129144518Sdavidxu		size = ((size / _thr_page_size) + 1) *
130144518Sdavidxu		    _thr_page_size;
131144518Sdavidxu	return size;
132144518Sdavidxu}
133112918Sjeff
134217191Skibvoid
135217191Skib_thr_stack_fix_protection(struct pthread *thrd)
136217191Skib{
137217191Skib
138217191Skib	mprotect((char *)thrd->attr.stackaddr_attr +
139217191Skib	    round_up(thrd->attr.guardsize_attr),
140217191Skib	    round_up(thrd->attr.stacksize_attr),
141217191Skib	    _rtld_get_stack_prot());
142217191Skib}
143217191Skib
144217224Skibstatic void
145217224Skibsinglethread_map_stacks_exec(void)
146217224Skib{
147217224Skib	int mib[2];
148217224Skib	struct rlimit rlim;
149217224Skib	u_long usrstack;
150217224Skib	size_t len;
151217224Skib
152217224Skib	mib[0] = CTL_KERN;
153217224Skib	mib[1] = KERN_USRSTACK;
154217224Skib	len = sizeof(usrstack);
155217224Skib	if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), &usrstack, &len, NULL, 0)
156217224Skib	    == -1)
157217224Skib		return;
158217224Skib	if (getrlimit(RLIMIT_STACK, &rlim) == -1)
159217224Skib		return;
160217224Skib	mprotect((void *)(uintptr_t)(usrstack - rlim.rlim_cur),
161217224Skib	    rlim.rlim_cur, _rtld_get_stack_prot());
162217224Skib}
163217224Skib
164217191Skibvoid __pthread_map_stacks_exec(void);
165217191Skibvoid
166217191Skib__pthread_map_stacks_exec(void)
167217191Skib{
168217191Skib	struct pthread *curthread, *thrd;
169217191Skib	struct stack *st;
170217191Skib
171217224Skib	if (!_thr_is_inited()) {
172217224Skib		singlethread_map_stacks_exec();
173217224Skib		return;
174217224Skib	}
175217191Skib	curthread = _get_curthread();
176217191Skib	THREAD_LIST_RDLOCK(curthread);
177217191Skib	LIST_FOREACH(st, &mstackq, qe)
178217191Skib		mprotect((char *)st->stackaddr + st->guardsize, st->stacksize,
179217191Skib		    _rtld_get_stack_prot());
180217191Skib	LIST_FOREACH(st, &dstackq, qe)
181217191Skib		mprotect((char *)st->stackaddr + st->guardsize, st->stacksize,
182217191Skib		    _rtld_get_stack_prot());
183217191Skib	TAILQ_FOREACH(thrd, &_thread_gc_list, gcle)
184217191Skib		_thr_stack_fix_protection(thrd);
185217191Skib	TAILQ_FOREACH(thrd, &_thread_list, tle)
186217191Skib		_thr_stack_fix_protection(thrd);
187217191Skib	THREAD_LIST_UNLOCK(curthread);
188217191Skib}
189217191Skib
190144518Sdavidxuint
191144518Sdavidxu_thr_stack_alloc(struct pthread_attr *attr)
192144518Sdavidxu{
193144518Sdavidxu	struct pthread *curthread = _get_curthread();
194144518Sdavidxu	struct stack *spare_stack;
195144518Sdavidxu	size_t stacksize;
196144518Sdavidxu	size_t guardsize;
197144518Sdavidxu	char *stackaddr;
198144518Sdavidxu
199112918Sjeff	/*
200144518Sdavidxu	 * Round up stack size to nearest multiple of _thr_page_size so
201144518Sdavidxu	 * that mmap() * will work.  If the stack size is not an even
202144518Sdavidxu	 * multiple, we end up initializing things such that there is
203144518Sdavidxu	 * unused space above the beginning of the stack, so the stack
204144518Sdavidxu	 * sits snugly against its guard.
205112918Sjeff	 */
206144518Sdavidxu	stacksize = round_up(attr->stacksize_attr);
207144518Sdavidxu	guardsize = round_up(attr->guardsize_attr);
208112918Sjeff
209144518Sdavidxu	attr->stackaddr_attr = NULL;
210144518Sdavidxu	attr->flags &= ~THR_STACK_USER;
211144518Sdavidxu
212112918Sjeff	/*
213144518Sdavidxu	 * Use the garbage collector lock for synchronization of the
214144518Sdavidxu	 * spare stack lists and allocations from usrstack.
215144518Sdavidxu	 */
216212536Sdavidxu	THREAD_LIST_WRLOCK(curthread);
217144518Sdavidxu	/*
218112918Sjeff	 * If the stack and guard sizes are default, try to allocate a stack
219112918Sjeff	 * from the default-size stack cache:
220112918Sjeff	 */
221144518Sdavidxu	if ((stacksize == THR_STACK_DEFAULT) &&
222144518Sdavidxu	    (guardsize == _thr_guard_default)) {
223144518Sdavidxu		if ((spare_stack = LIST_FIRST(&dstackq)) != NULL) {
224144518Sdavidxu			/* Use the spare stack. */
225112918Sjeff			LIST_REMOVE(spare_stack, qe);
226144518Sdavidxu			attr->stackaddr_attr = spare_stack->stackaddr;
227112918Sjeff		}
228112918Sjeff	}
229112918Sjeff	/*
230112918Sjeff	 * The user specified a non-default stack and/or guard size, so try to
231112918Sjeff	 * allocate a stack from the non-default size stack cache, using the
232112918Sjeff	 * rounded up stack size (stack_size) in the search:
233112918Sjeff	 */
234112918Sjeff	else {
235144518Sdavidxu		LIST_FOREACH(spare_stack, &mstackq, qe) {
236144518Sdavidxu			if (spare_stack->stacksize == stacksize &&
237112918Sjeff			    spare_stack->guardsize == guardsize) {
238112918Sjeff				LIST_REMOVE(spare_stack, qe);
239144518Sdavidxu				attr->stackaddr_attr = spare_stack->stackaddr;
240112918Sjeff				break;
241112918Sjeff			}
242112918Sjeff		}
243112918Sjeff	}
244144518Sdavidxu	if (attr->stackaddr_attr != NULL) {
245144518Sdavidxu		/* A cached stack was found.  Release the lock. */
246144518Sdavidxu		THREAD_LIST_UNLOCK(curthread);
247144518Sdavidxu	}
248144518Sdavidxu	else {
249144518Sdavidxu		/* Allocate a stack from usrstack. */
250112918Sjeff		if (last_stack == NULL)
251144518Sdavidxu			last_stack = _usrstack - _thr_stack_initial -
252144518Sdavidxu			    _thr_guard_default;
253112918Sjeff
254112918Sjeff		/* Allocate a new stack. */
255144518Sdavidxu		stackaddr = last_stack - stacksize - guardsize;
256112918Sjeff
257112918Sjeff		/*
258144518Sdavidxu		 * Even if stack allocation fails, we don't want to try to
259144518Sdavidxu		 * use this location again, so unconditionally decrement
260112918Sjeff		 * last_stack.  Under normal operating conditions, the most
261144518Sdavidxu		 * likely reason for an mmap() error is a stack overflow of
262144518Sdavidxu		 * the adjacent thread stack.
263112918Sjeff		 */
264144518Sdavidxu		last_stack -= (stacksize + guardsize);
265112918Sjeff
266144518Sdavidxu		/* Release the lock before mmap'ing it. */
267144518Sdavidxu		THREAD_LIST_UNLOCK(curthread);
268144518Sdavidxu
269144518Sdavidxu		/* Map the stack and guard page together, and split guard
270144518Sdavidxu		   page from allocated space: */
271144518Sdavidxu		if ((stackaddr = mmap(stackaddr, stacksize+guardsize,
272217191Skib		     _rtld_get_stack_prot(), MAP_STACK,
273144518Sdavidxu		     -1, 0)) != MAP_FAILED &&
274144518Sdavidxu		    (guardsize == 0 ||
275144518Sdavidxu		     mprotect(stackaddr, guardsize, PROT_NONE) == 0)) {
276144518Sdavidxu			stackaddr += guardsize;
277144518Sdavidxu		} else {
278144518Sdavidxu			if (stackaddr != MAP_FAILED)
279144518Sdavidxu				munmap(stackaddr, stacksize + guardsize);
280144518Sdavidxu			stackaddr = NULL;
281144518Sdavidxu		}
282144518Sdavidxu		attr->stackaddr_attr = stackaddr;
283112918Sjeff	}
284144518Sdavidxu	if (attr->stackaddr_attr != NULL)
285144518Sdavidxu		return (0);
286144518Sdavidxu	else
287144518Sdavidxu		return (-1);
288112918Sjeff}
289112918Sjeff
290144518Sdavidxu/* This function must be called with _thread_list_lock held. */
291112918Sjeffvoid
292144518Sdavidxu_thr_stack_free(struct pthread_attr *attr)
293112918Sjeff{
294144518Sdavidxu	struct stack *spare_stack;
295112918Sjeff
296144518Sdavidxu	if ((attr != NULL) && ((attr->flags & THR_STACK_USER) == 0)
297144518Sdavidxu	    && (attr->stackaddr_attr != NULL)) {
298157457Sdavidxu		spare_stack = (struct stack *)
299157457Sdavidxu			((char *)attr->stackaddr_attr +
300157457Sdavidxu			attr->stacksize_attr - sizeof(struct stack));
301144518Sdavidxu		spare_stack->stacksize = round_up(attr->stacksize_attr);
302144518Sdavidxu		spare_stack->guardsize = round_up(attr->guardsize_attr);
303144518Sdavidxu		spare_stack->stackaddr = attr->stackaddr_attr;
304112918Sjeff
305144518Sdavidxu		if (spare_stack->stacksize == THR_STACK_DEFAULT &&
306144518Sdavidxu		    spare_stack->guardsize == _thr_guard_default) {
307144518Sdavidxu			/* Default stack/guard size. */
308144518Sdavidxu			LIST_INSERT_HEAD(&dstackq, spare_stack, qe);
309144518Sdavidxu		} else {
310144518Sdavidxu			/* Non-default stack/guard size. */
311144518Sdavidxu			LIST_INSERT_HEAD(&mstackq, spare_stack, qe);
312144518Sdavidxu		}
313144518Sdavidxu		attr->stackaddr_attr = NULL;
314112918Sjeff	}
315112918Sjeff}
316