1112918Sjeff/* 2112918Sjeff * Copyright (c) 2001 Daniel Eischen <deischen@freebsd.org> 3112918Sjeff * Copyright (c) 2000-2001 Jason Evans <jasone@freebsd.org> 4112918Sjeff * All rights reserved. 5112918Sjeff * 6112918Sjeff * Redistribution and use in source and binary forms, with or without 7112918Sjeff * modification, are permitted provided that the following conditions 8112918Sjeff * are met: 9112918Sjeff * 1. Redistributions of source code must retain the above copyright 10112918Sjeff * notice, this list of conditions and the following disclaimer. 11112918Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12112918Sjeff * notice, this list of conditions and the following disclaimer in the 13112918Sjeff * documentation and/or other materials provided with the distribution. 14112918Sjeff * 15112918Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16112918Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17112918Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18112918Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19112918Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20112918Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21112918Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22112918Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23112918Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24112918Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25112918Sjeff * SUCH DAMAGE. 26112918Sjeff * 27112918Sjeff * $FreeBSD$ 28112918Sjeff */ 29144518Sdavidxu 30112918Sjeff#include <sys/types.h> 31112918Sjeff#include <sys/mman.h> 32112918Sjeff#include <sys/queue.h> 33217224Skib#include <sys/resource.h> 34217224Skib#include <sys/sysctl.h> 35112918Sjeff#include <stdlib.h> 36112918Sjeff#include <pthread.h> 37217191Skib#include <link.h> 38144518Sdavidxu 39112918Sjeff#include "thr_private.h" 40112918Sjeff 41112918Sjeff/* Spare thread stack. */ 42112918Sjeffstruct stack { 43112918Sjeff LIST_ENTRY(stack) qe; /* Stack queue linkage. */ 44112918Sjeff size_t stacksize; /* Stack size (rounded up). */ 45112918Sjeff size_t guardsize; /* Guard size. */ 46112918Sjeff void *stackaddr; /* Stack address. */ 47112918Sjeff}; 48112918Sjeff 49112918Sjeff/* 50144518Sdavidxu * Default sized (stack and guard) spare stack queue. Stacks are cached 51144518Sdavidxu * to avoid additional complexity managing mmap()ed stack regions. Spare 52144518Sdavidxu * stacks are used in LIFO order to increase cache locality. 53112918Sjeff */ 54144518Sdavidxustatic LIST_HEAD(, stack) dstackq = LIST_HEAD_INITIALIZER(dstackq); 55112918Sjeff 56112918Sjeff/* 57112918Sjeff * Miscellaneous sized (non-default stack and/or guard) spare stack queue. 58144518Sdavidxu * Stacks are cached to avoid additional complexity managing mmap()ed 59144518Sdavidxu * stack regions. This list is unordered, since ordering on both stack 60144518Sdavidxu * size and guard size would be more trouble than it's worth. Stacks are 61144518Sdavidxu * allocated from this cache on a first size match basis. 62112918Sjeff */ 63144518Sdavidxustatic LIST_HEAD(, stack) mstackq = LIST_HEAD_INITIALIZER(mstackq); 64112918Sjeff 65112918Sjeff/** 66144518Sdavidxu * Base address of the last stack allocated (including its red zone, if 67144518Sdavidxu * there is one). Stacks are allocated contiguously, starting beyond the 68144518Sdavidxu * top of the main stack. When a new stack is created, a red zone is 69144518Sdavidxu * typically created (actually, the red zone is mapped with PROT_NONE) above 70144518Sdavidxu * the top of the stack, such that the stack will not be able to grow all 71144518Sdavidxu * the way to the bottom of the next stack. This isn't fool-proof. It is 72144518Sdavidxu * possible for a stack to grow by a large amount, such that it grows into 73144518Sdavidxu * the next stack, and as long as the memory within the red zone is never 74144518Sdavidxu * accessed, nothing will prevent one thread stack from trouncing all over 75144518Sdavidxu * the next. 76112918Sjeff * 77112918Sjeff * low memory 78112918Sjeff * . . . . . . . . . . . . . . . . . . 79112918Sjeff * | | 80112918Sjeff * | stack 3 | start of 3rd thread stack 81112918Sjeff * +-----------------------------------+ 82112918Sjeff * | | 83112918Sjeff * | Red Zone (guard page) | red zone for 2nd thread 84112918Sjeff * | | 85112918Sjeff * +-----------------------------------+ 86144518Sdavidxu * | stack 2 - _thr_stack_default | top of 2nd thread stack 87112918Sjeff * | | 88112918Sjeff * | | 89112918Sjeff * | | 90112918Sjeff * | | 91112918Sjeff * | stack 2 | 92112918Sjeff * +-----------------------------------+ <-- start of 2nd thread stack 93112918Sjeff * | | 94112918Sjeff * | Red Zone | red zone for 1st thread 95112918Sjeff * | | 96112918Sjeff * +-----------------------------------+ 97144518Sdavidxu * | stack 1 - _thr_stack_default | top of 1st thread stack 98112918Sjeff * | | 99112918Sjeff * | | 100112918Sjeff * | | 101112918Sjeff * | | 102112918Sjeff * | stack 1 | 103112918Sjeff * +-----------------------------------+ <-- start of 1st thread stack 104112918Sjeff * | | (initial value of last_stack) 105112918Sjeff * | Red Zone | 106112918Sjeff * | | red zone for main thread 107112918Sjeff * +-----------------------------------+ 108144518Sdavidxu * | USRSTACK - _thr_stack_initial | top of main thread stack 109112918Sjeff * | | ^ 110112918Sjeff * | | | 111112918Sjeff * | | | 112112918Sjeff * | | | stack growth 113112918Sjeff * | | 114112918Sjeff * +-----------------------------------+ <-- start of main thread stack 115112918Sjeff * (USRSTACK) 116112918Sjeff * high memory 117112918Sjeff * 118112918Sjeff */ 119157457Sdavidxustatic char *last_stack = NULL; 120112918Sjeff 121144518Sdavidxu/* 122144518Sdavidxu * Round size up to the nearest multiple of 123144518Sdavidxu * _thr_page_size. 124144518Sdavidxu */ 125144518Sdavidxustatic inline size_t 126144518Sdavidxuround_up(size_t size) 127112918Sjeff{ 128144518Sdavidxu if (size % _thr_page_size != 0) 129144518Sdavidxu size = ((size / _thr_page_size) + 1) * 130144518Sdavidxu _thr_page_size; 131144518Sdavidxu return size; 132144518Sdavidxu} 133112918Sjeff 134217191Skibvoid 135217191Skib_thr_stack_fix_protection(struct pthread *thrd) 136217191Skib{ 137217191Skib 138217191Skib mprotect((char *)thrd->attr.stackaddr_attr + 139217191Skib round_up(thrd->attr.guardsize_attr), 140217191Skib round_up(thrd->attr.stacksize_attr), 141217191Skib _rtld_get_stack_prot()); 142217191Skib} 143217191Skib 144217224Skibstatic void 145217224Skibsinglethread_map_stacks_exec(void) 146217224Skib{ 147217224Skib int mib[2]; 148217224Skib struct rlimit rlim; 149217224Skib u_long usrstack; 150217224Skib size_t len; 151217224Skib 152217224Skib mib[0] = CTL_KERN; 153217224Skib mib[1] = KERN_USRSTACK; 154217224Skib len = sizeof(usrstack); 155217224Skib if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), &usrstack, &len, NULL, 0) 156217224Skib == -1) 157217224Skib return; 158217224Skib if (getrlimit(RLIMIT_STACK, &rlim) == -1) 159217224Skib return; 160217224Skib mprotect((void *)(uintptr_t)(usrstack - rlim.rlim_cur), 161217224Skib rlim.rlim_cur, _rtld_get_stack_prot()); 162217224Skib} 163217224Skib 164217191Skibvoid __pthread_map_stacks_exec(void); 165217191Skibvoid 166217191Skib__pthread_map_stacks_exec(void) 167217191Skib{ 168217191Skib struct pthread *curthread, *thrd; 169217191Skib struct stack *st; 170217191Skib 171217224Skib if (!_thr_is_inited()) { 172217224Skib singlethread_map_stacks_exec(); 173217224Skib return; 174217224Skib } 175217191Skib curthread = _get_curthread(); 176217191Skib THREAD_LIST_RDLOCK(curthread); 177217191Skib LIST_FOREACH(st, &mstackq, qe) 178217191Skib mprotect((char *)st->stackaddr + st->guardsize, st->stacksize, 179217191Skib _rtld_get_stack_prot()); 180217191Skib LIST_FOREACH(st, &dstackq, qe) 181217191Skib mprotect((char *)st->stackaddr + st->guardsize, st->stacksize, 182217191Skib _rtld_get_stack_prot()); 183217191Skib TAILQ_FOREACH(thrd, &_thread_gc_list, gcle) 184217191Skib _thr_stack_fix_protection(thrd); 185217191Skib TAILQ_FOREACH(thrd, &_thread_list, tle) 186217191Skib _thr_stack_fix_protection(thrd); 187217191Skib THREAD_LIST_UNLOCK(curthread); 188217191Skib} 189217191Skib 190144518Sdavidxuint 191144518Sdavidxu_thr_stack_alloc(struct pthread_attr *attr) 192144518Sdavidxu{ 193144518Sdavidxu struct pthread *curthread = _get_curthread(); 194144518Sdavidxu struct stack *spare_stack; 195144518Sdavidxu size_t stacksize; 196144518Sdavidxu size_t guardsize; 197144518Sdavidxu char *stackaddr; 198144518Sdavidxu 199112918Sjeff /* 200144518Sdavidxu * Round up stack size to nearest multiple of _thr_page_size so 201144518Sdavidxu * that mmap() * will work. If the stack size is not an even 202144518Sdavidxu * multiple, we end up initializing things such that there is 203144518Sdavidxu * unused space above the beginning of the stack, so the stack 204144518Sdavidxu * sits snugly against its guard. 205112918Sjeff */ 206144518Sdavidxu stacksize = round_up(attr->stacksize_attr); 207144518Sdavidxu guardsize = round_up(attr->guardsize_attr); 208112918Sjeff 209144518Sdavidxu attr->stackaddr_attr = NULL; 210144518Sdavidxu attr->flags &= ~THR_STACK_USER; 211144518Sdavidxu 212112918Sjeff /* 213144518Sdavidxu * Use the garbage collector lock for synchronization of the 214144518Sdavidxu * spare stack lists and allocations from usrstack. 215144518Sdavidxu */ 216212536Sdavidxu THREAD_LIST_WRLOCK(curthread); 217144518Sdavidxu /* 218112918Sjeff * If the stack and guard sizes are default, try to allocate a stack 219112918Sjeff * from the default-size stack cache: 220112918Sjeff */ 221144518Sdavidxu if ((stacksize == THR_STACK_DEFAULT) && 222144518Sdavidxu (guardsize == _thr_guard_default)) { 223144518Sdavidxu if ((spare_stack = LIST_FIRST(&dstackq)) != NULL) { 224144518Sdavidxu /* Use the spare stack. */ 225112918Sjeff LIST_REMOVE(spare_stack, qe); 226144518Sdavidxu attr->stackaddr_attr = spare_stack->stackaddr; 227112918Sjeff } 228112918Sjeff } 229112918Sjeff /* 230112918Sjeff * The user specified a non-default stack and/or guard size, so try to 231112918Sjeff * allocate a stack from the non-default size stack cache, using the 232112918Sjeff * rounded up stack size (stack_size) in the search: 233112918Sjeff */ 234112918Sjeff else { 235144518Sdavidxu LIST_FOREACH(spare_stack, &mstackq, qe) { 236144518Sdavidxu if (spare_stack->stacksize == stacksize && 237112918Sjeff spare_stack->guardsize == guardsize) { 238112918Sjeff LIST_REMOVE(spare_stack, qe); 239144518Sdavidxu attr->stackaddr_attr = spare_stack->stackaddr; 240112918Sjeff break; 241112918Sjeff } 242112918Sjeff } 243112918Sjeff } 244144518Sdavidxu if (attr->stackaddr_attr != NULL) { 245144518Sdavidxu /* A cached stack was found. Release the lock. */ 246144518Sdavidxu THREAD_LIST_UNLOCK(curthread); 247144518Sdavidxu } 248144518Sdavidxu else { 249144518Sdavidxu /* Allocate a stack from usrstack. */ 250112918Sjeff if (last_stack == NULL) 251144518Sdavidxu last_stack = _usrstack - _thr_stack_initial - 252144518Sdavidxu _thr_guard_default; 253112918Sjeff 254112918Sjeff /* Allocate a new stack. */ 255144518Sdavidxu stackaddr = last_stack - stacksize - guardsize; 256112918Sjeff 257112918Sjeff /* 258144518Sdavidxu * Even if stack allocation fails, we don't want to try to 259144518Sdavidxu * use this location again, so unconditionally decrement 260112918Sjeff * last_stack. Under normal operating conditions, the most 261144518Sdavidxu * likely reason for an mmap() error is a stack overflow of 262144518Sdavidxu * the adjacent thread stack. 263112918Sjeff */ 264144518Sdavidxu last_stack -= (stacksize + guardsize); 265112918Sjeff 266144518Sdavidxu /* Release the lock before mmap'ing it. */ 267144518Sdavidxu THREAD_LIST_UNLOCK(curthread); 268144518Sdavidxu 269144518Sdavidxu /* Map the stack and guard page together, and split guard 270144518Sdavidxu page from allocated space: */ 271144518Sdavidxu if ((stackaddr = mmap(stackaddr, stacksize+guardsize, 272217191Skib _rtld_get_stack_prot(), MAP_STACK, 273144518Sdavidxu -1, 0)) != MAP_FAILED && 274144518Sdavidxu (guardsize == 0 || 275144518Sdavidxu mprotect(stackaddr, guardsize, PROT_NONE) == 0)) { 276144518Sdavidxu stackaddr += guardsize; 277144518Sdavidxu } else { 278144518Sdavidxu if (stackaddr != MAP_FAILED) 279144518Sdavidxu munmap(stackaddr, stacksize + guardsize); 280144518Sdavidxu stackaddr = NULL; 281144518Sdavidxu } 282144518Sdavidxu attr->stackaddr_attr = stackaddr; 283112918Sjeff } 284144518Sdavidxu if (attr->stackaddr_attr != NULL) 285144518Sdavidxu return (0); 286144518Sdavidxu else 287144518Sdavidxu return (-1); 288112918Sjeff} 289112918Sjeff 290144518Sdavidxu/* This function must be called with _thread_list_lock held. */ 291112918Sjeffvoid 292144518Sdavidxu_thr_stack_free(struct pthread_attr *attr) 293112918Sjeff{ 294144518Sdavidxu struct stack *spare_stack; 295112918Sjeff 296144518Sdavidxu if ((attr != NULL) && ((attr->flags & THR_STACK_USER) == 0) 297144518Sdavidxu && (attr->stackaddr_attr != NULL)) { 298157457Sdavidxu spare_stack = (struct stack *) 299157457Sdavidxu ((char *)attr->stackaddr_attr + 300157457Sdavidxu attr->stacksize_attr - sizeof(struct stack)); 301144518Sdavidxu spare_stack->stacksize = round_up(attr->stacksize_attr); 302144518Sdavidxu spare_stack->guardsize = round_up(attr->guardsize_attr); 303144518Sdavidxu spare_stack->stackaddr = attr->stackaddr_attr; 304112918Sjeff 305144518Sdavidxu if (spare_stack->stacksize == THR_STACK_DEFAULT && 306144518Sdavidxu spare_stack->guardsize == _thr_guard_default) { 307144518Sdavidxu /* Default stack/guard size. */ 308144518Sdavidxu LIST_INSERT_HEAD(&dstackq, spare_stack, qe); 309144518Sdavidxu } else { 310144518Sdavidxu /* Non-default stack/guard size. */ 311144518Sdavidxu LIST_INSERT_HEAD(&mstackq, spare_stack, qe); 312144518Sdavidxu } 313144518Sdavidxu attr->stackaddr_attr = NULL; 314112918Sjeff } 315112918Sjeff} 316