guard.cc revision 278724
10SN/A/* 24475SN/A * Copyright 2010-2012 PathScale, Inc. All rights reserved. 30SN/A * 40SN/A * Redistribution and use in source and binary forms, with or without 50SN/A * modification, are permitted provided that the following conditions are met: 60SN/A * 72362SN/A * 1. Redistributions of source code must retain the above copyright notice, 80SN/A * this list of conditions and the following disclaimer. 92362SN/A * 100SN/A * 2. Redistributions in binary form must reproduce the above copyright notice, 110SN/A * this list of conditions and the following disclaimer in the documentation 120SN/A * and/or other materials provided with the distribution. 130SN/A * 140SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS 150SN/A * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 160SN/A * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 170SN/A * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 180SN/A * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 190SN/A * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 200SN/A * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 212362SN/A * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 222362SN/A * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 232362SN/A * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 240SN/A * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 250SN/A */ 260SN/A 270SN/A/** 289749SN/A * guard.cc: Functions for thread-safe static initialisation. 299749SN/A * 309749SN/A * Static values in C++ can be initialised lazily their first use. This file 319749SN/A * contains functions that are used to ensure that two threads attempting to 320SN/A * initialize the same static do not call the constructor twice. This is 330SN/A * important because constructors can have side effects, so calling the 340SN/A * constructor twice may be very bad. 354475SN/A * 360SN/A * Statics that require initialisation are protected by a 64-bit value. Any 370SN/A * platform that can do 32-bit atomic test and set operations can use this 380SN/A * value as a low-overhead lock. Because statics (in most sane code) are 390SN/A * accessed far more times than they are initialised, this lock implementation 404475SN/A * is heavily optimised towards the case where the static has already been 414475SN/A * initialised. 420SN/A */ 439749SN/A#include <stdint.h> 449749SN/A#include <stdlib.h> 450SN/A#include <stdio.h> 460SN/A#include <pthread.h> 470SN/A#include <assert.h> 480SN/A#include "atomic.h" 490SN/A 500SN/A// Older GCC doesn't define __LITTLE_ENDIAN__ 514475SN/A#ifndef __LITTLE_ENDIAN__ 524475SN/A // If __BYTE_ORDER__ is defined, use that instead 534475SN/A# ifdef __BYTE_ORDER__ 540SN/A# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 550SN/A# define __LITTLE_ENDIAN__ 560SN/A# endif 570SN/A // x86 and ARM are the most common little-endian CPUs, so let's have a 580SN/A // special case for them (ARM is already special cased). Assume everything 590SN/A // else is big endian. 600SN/A# elif defined(__x86_64) || defined(__i386) 619749SN/A# define __LITTLE_ENDIAN__ 629749SN/A# endif 639749SN/A#endif 649749SN/A 659749SN/A 669749SN/A/* 679749SN/A * The least significant bit of the guard variable indicates that the object 689749SN/A * has been initialised, the most significant bit is used for a spinlock. 699749SN/A */ 709749SN/A#ifdef __arm__ 710SN/A// ARM ABI - 32-bit guards. 720SN/Atypedef uint32_t guard_t; 730SN/Atypedef uint32_t guard_lock_t; 740SN/Astatic const uint32_t LOCKED = static_cast<guard_t>(1) << 31; 750SN/Astatic const uint32_t INITIALISED = 1; 760SN/A#define LOCK_PART(guard) (guard) 770SN/A#define INIT_PART(guard) (guard) 780SN/A#elif defined(_LP64) 790SN/Atypedef uint64_t guard_t; 800SN/Atypedef uint64_t guard_lock_t; 810SN/A# if defined(__LITTLE_ENDIAN__) 820SN/Astatic const guard_t LOCKED = static_cast<guard_t>(1) << 63; 830SN/Astatic const guard_t INITIALISED = 1; 840SN/A# else 850SN/Astatic const guard_t LOCKED = 1; 860SN/Astatic const guard_t INITIALISED = static_cast<guard_t>(1) << 56; 870SN/A# endif 880SN/A#define LOCK_PART(guard) (guard) 890SN/A#define INIT_PART(guard) (guard) 900SN/A#else 910SN/Atypedef uint32_t guard_lock_t; 920SN/A# if defined(__LITTLE_ENDIAN__) 930SN/Atypedef struct { 940SN/A uint32_t init_half; 950SN/A uint32_t lock_half; 960SN/A} guard_t; 970SN/Astatic const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31; 980SN/Astatic const uint32_t INITIALISED = 1; 990SN/A# else 1000SN/Atypedef struct { 1010SN/A uint32_t init_half; 1020SN/A uint32_t lock_half; 1030SN/A} guard_t; 1040SN/A_Static_assert(sizeof(guard_t) == sizeof(uint64_t), ""); 1050SN/Astatic const uint32_t LOCKED = 1; 1064475SN/Astatic const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24; 10715065Srobm# endif 1084475SN/A#define LOCK_PART(guard) (&(guard)->lock_half) 1090SN/A#define INIT_PART(guard) (&(guard)->init_half) 11015065Srobm#endif 1110SN/Astatic const guard_lock_t INITIAL = 0; 1120SN/A 113/** 114 * Acquires a lock on a guard, returning 0 if the object has already been 115 * initialised, and 1 if it has not. If the object is already constructed then 116 * this function just needs to read a byte from memory and return. 117 */ 118extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object) 119{ 120 guard_lock_t old; 121 // Not an atomic read, doesn't establish a happens-before relationship, but 122 // if one is already established and we end up seeing an initialised state 123 // then it's a fast path, otherwise we'll do something more expensive than 124 // this test anyway... 125 if (INITIALISED == *INIT_PART(guard_object)) 126 return 0; 127 // Spin trying to do the initialisation 128 for (;;) 129 { 130 // Loop trying to move the value of the guard from 0 (not 131 // locked, not initialised) to the locked-uninitialised 132 // position. 133 old = __sync_val_compare_and_swap(LOCK_PART(guard_object), 134 INITIAL, LOCKED); 135 if (old == INITIAL) { 136 // Lock obtained. If lock and init bit are 137 // in separate words, check for init race. 138 if (INIT_PART(guard_object) == LOCK_PART(guard_object)) 139 return 1; 140 if (INITIALISED != *INIT_PART(guard_object)) 141 return 1; 142 143 // No need for a memory barrier here, 144 // see first comment. 145 *LOCK_PART(guard_object) = INITIAL; 146 return 0; 147 } 148 // If lock and init bit are in the same word, check again 149 // if we are done. 150 if (INIT_PART(guard_object) == LOCK_PART(guard_object) && 151 old == INITIALISED) 152 return 0; 153 154 assert(old == LOCKED); 155 // Another thread holds the lock. 156 // If lock and init bit are in different words, check 157 // if we are done before yielding and looping. 158 if (INIT_PART(guard_object) != LOCK_PART(guard_object) && 159 INITIALISED == *INIT_PART(guard_object)) 160 return 0; 161 sched_yield(); 162 } 163} 164 165/** 166 * Releases the lock without marking the object as initialised. This function 167 * is called if initialising a static causes an exception to be thrown. 168 */ 169extern "C" void __cxa_guard_abort(volatile guard_t *guard_object) 170{ 171 __attribute__((unused)) 172 bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object), 173 LOCKED, INITIAL); 174 assert(reset); 175} 176/** 177 * Releases the guard and marks the object as initialised. This function is 178 * called after successful initialisation of a static. 179 */ 180extern "C" void __cxa_guard_release(volatile guard_t *guard_object) 181{ 182 guard_lock_t old; 183 if (INIT_PART(guard_object) == LOCK_PART(guard_object)) 184 old = LOCKED; 185 else 186 old = INITIAL; 187 __attribute__((unused)) 188 bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object), 189 old, INITIALISED); 190 assert(reset); 191 if (INIT_PART(guard_object) != LOCK_PART(guard_object)) 192 *LOCK_PART(guard_object) = INITIAL; 193} 194