guard.cc revision 278724
10SN/A/*
24475SN/A * Copyright 2010-2012 PathScale, Inc. All rights reserved.
30SN/A *
40SN/A * Redistribution and use in source and binary forms, with or without
50SN/A * modification, are permitted provided that the following conditions are met:
60SN/A *
72362SN/A * 1. Redistributions of source code must retain the above copyright notice,
80SN/A *    this list of conditions and the following disclaimer.
92362SN/A *
100SN/A * 2. Redistributions in binary form must reproduce the above copyright notice,
110SN/A *    this list of conditions and the following disclaimer in the documentation
120SN/A *    and/or other materials provided with the distribution.
130SN/A *
140SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
150SN/A * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
160SN/A * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
170SN/A * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
180SN/A * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
190SN/A * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
200SN/A * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
212362SN/A * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
222362SN/A * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
232362SN/A * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
240SN/A * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
250SN/A */
260SN/A
270SN/A/**
289749SN/A * guard.cc: Functions for thread-safe static initialisation.
299749SN/A *
309749SN/A * Static values in C++ can be initialised lazily their first use.  This file
319749SN/A * contains functions that are used to ensure that two threads attempting to
320SN/A * initialize the same static do not call the constructor twice.  This is
330SN/A * important because constructors can have side effects, so calling the
340SN/A * constructor twice may be very bad.
354475SN/A *
360SN/A * Statics that require initialisation are protected by a 64-bit value.  Any
370SN/A * platform that can do 32-bit atomic test and set operations can use this
380SN/A * value as a low-overhead lock.  Because statics (in most sane code) are
390SN/A * accessed far more times than they are initialised, this lock implementation
404475SN/A * is heavily optimised towards the case where the static has already been
414475SN/A * initialised.
420SN/A */
439749SN/A#include <stdint.h>
449749SN/A#include <stdlib.h>
450SN/A#include <stdio.h>
460SN/A#include <pthread.h>
470SN/A#include <assert.h>
480SN/A#include "atomic.h"
490SN/A
500SN/A// Older GCC doesn't define __LITTLE_ENDIAN__
514475SN/A#ifndef __LITTLE_ENDIAN__
524475SN/A	// If __BYTE_ORDER__ is defined, use that instead
534475SN/A#	ifdef __BYTE_ORDER__
540SN/A#		if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
550SN/A#			define __LITTLE_ENDIAN__
560SN/A#		endif
570SN/A	// x86 and ARM are the most common little-endian CPUs, so let's have a
580SN/A	// special case for them (ARM is already special cased).  Assume everything
590SN/A	// else is big endian.
600SN/A#	elif defined(__x86_64) || defined(__i386)
619749SN/A#		define __LITTLE_ENDIAN__
629749SN/A#	endif
639749SN/A#endif
649749SN/A
659749SN/A
669749SN/A/*
679749SN/A * The least significant bit of the guard variable indicates that the object
689749SN/A * has been initialised, the most significant bit is used for a spinlock.
699749SN/A */
709749SN/A#ifdef __arm__
710SN/A// ARM ABI - 32-bit guards.
720SN/Atypedef uint32_t guard_t;
730SN/Atypedef uint32_t guard_lock_t;
740SN/Astatic const uint32_t LOCKED = static_cast<guard_t>(1) << 31;
750SN/Astatic const uint32_t INITIALISED = 1;
760SN/A#define LOCK_PART(guard) (guard)
770SN/A#define INIT_PART(guard) (guard)
780SN/A#elif defined(_LP64)
790SN/Atypedef uint64_t guard_t;
800SN/Atypedef uint64_t guard_lock_t;
810SN/A#	if defined(__LITTLE_ENDIAN__)
820SN/Astatic const guard_t LOCKED = static_cast<guard_t>(1) << 63;
830SN/Astatic const guard_t INITIALISED = 1;
840SN/A#	else
850SN/Astatic const guard_t LOCKED = 1;
860SN/Astatic const guard_t INITIALISED = static_cast<guard_t>(1) << 56;
870SN/A#	endif
880SN/A#define LOCK_PART(guard) (guard)
890SN/A#define INIT_PART(guard) (guard)
900SN/A#else
910SN/Atypedef uint32_t guard_lock_t;
920SN/A#	if defined(__LITTLE_ENDIAN__)
930SN/Atypedef struct {
940SN/A	uint32_t init_half;
950SN/A	uint32_t lock_half;
960SN/A} guard_t;
970SN/Astatic const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31;
980SN/Astatic const uint32_t INITIALISED = 1;
990SN/A#	else
1000SN/Atypedef struct {
1010SN/A	uint32_t init_half;
1020SN/A	uint32_t lock_half;
1030SN/A} guard_t;
1040SN/A_Static_assert(sizeof(guard_t) == sizeof(uint64_t), "");
1050SN/Astatic const uint32_t LOCKED = 1;
1064475SN/Astatic const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24;
10715065Srobm#	endif
1084475SN/A#define LOCK_PART(guard) (&(guard)->lock_half)
1090SN/A#define INIT_PART(guard) (&(guard)->init_half)
11015065Srobm#endif
1110SN/Astatic const guard_lock_t INITIAL = 0;
1120SN/A
113/**
114 * Acquires a lock on a guard, returning 0 if the object has already been
115 * initialised, and 1 if it has not.  If the object is already constructed then
116 * this function just needs to read a byte from memory and return.
117 */
118extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
119{
120	guard_lock_t old;
121	// Not an atomic read, doesn't establish a happens-before relationship, but
122	// if one is already established and we end up seeing an initialised state
123	// then it's a fast path, otherwise we'll do something more expensive than
124	// this test anyway...
125	if (INITIALISED == *INIT_PART(guard_object))
126		return 0;
127	// Spin trying to do the initialisation
128	for (;;)
129	{
130		// Loop trying to move the value of the guard from 0 (not
131		// locked, not initialised) to the locked-uninitialised
132		// position.
133		old = __sync_val_compare_and_swap(LOCK_PART(guard_object),
134		    INITIAL, LOCKED);
135		if (old == INITIAL) {
136			// Lock obtained.  If lock and init bit are
137			// in separate words, check for init race.
138			if (INIT_PART(guard_object) == LOCK_PART(guard_object))
139				return 1;
140			if (INITIALISED != *INIT_PART(guard_object))
141				return 1;
142
143			// No need for a memory barrier here,
144			// see first comment.
145			*LOCK_PART(guard_object) = INITIAL;
146			return 0;
147		}
148		// If lock and init bit are in the same word, check again
149		// if we are done.
150		if (INIT_PART(guard_object) == LOCK_PART(guard_object) &&
151		    old == INITIALISED)
152			return 0;
153
154		assert(old == LOCKED);
155		// Another thread holds the lock.
156		// If lock and init bit are in different words, check
157		// if we are done before yielding and looping.
158		if (INIT_PART(guard_object) != LOCK_PART(guard_object) &&
159		    INITIALISED == *INIT_PART(guard_object))
160			return 0;
161		sched_yield();
162	}
163}
164
165/**
166 * Releases the lock without marking the object as initialised.  This function
167 * is called if initialising a static causes an exception to be thrown.
168 */
169extern "C" void __cxa_guard_abort(volatile guard_t *guard_object)
170{
171	__attribute__((unused))
172	bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object),
173	    LOCKED, INITIAL);
174	assert(reset);
175}
176/**
177 * Releases the guard and marks the object as initialised.  This function is
178 * called after successful initialisation of a static.
179 */
180extern "C" void __cxa_guard_release(volatile guard_t *guard_object)
181{
182	guard_lock_t old;
183	if (INIT_PART(guard_object) == LOCK_PART(guard_object))
184		old = LOCKED;
185	else
186		old = INITIAL;
187	__attribute__((unused))
188	bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object),
189	    old, INITIALISED);
190	assert(reset);
191	if (INIT_PART(guard_object) != LOCK_PART(guard_object))
192		*LOCK_PART(guard_object) = INITIAL;
193}
194