1/*
2 * Copyright 2010-2012 PathScale, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice,
8 *    this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 *    this list of conditions and the following disclaimer in the documentation
12 *    and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
15 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/**
28 * guard.cc: Functions for thread-safe static initialisation.
29 *
30 * Static values in C++ can be initialised lazily their first use.  This file
31 * contains functions that are used to ensure that two threads attempting to
32 * initialize the same static do not call the constructor twice.  This is
33 * important because constructors can have side effects, so calling the
34 * constructor twice may be very bad.
35 *
36 * Statics that require initialisation are protected by a 64-bit value.  Any
37 * platform that can do 32-bit atomic test and set operations can use this
38 * value as a low-overhead lock.  Because statics (in most sane code) are
39 * accessed far more times than they are initialised, this lock implementation
40 * is heavily optimised towards the case where the static has already been
41 * initialised.
42 */
43#include <stdint.h>
44#include <stdlib.h>
45#include <stdio.h>
46#include <pthread.h>
47#include <assert.h>
48#include "atomic.h"
49
50// Older GCC doesn't define __LITTLE_ENDIAN__
51#ifndef __LITTLE_ENDIAN__
52	// If __BYTE_ORDER__ is defined, use that instead
53#	ifdef __BYTE_ORDER__
54#		if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
55#			define __LITTLE_ENDIAN__
56#		endif
57	// x86 and ARM are the most common little-endian CPUs, so let's have a
58	// special case for them (ARM is already special cased).  Assume everything
59	// else is big endian.
60#	elif defined(__x86_64) || defined(__i386)
61#		define __LITTLE_ENDIAN__
62#	endif
63#endif
64
65
66/*
67 * The least significant bit of the guard variable indicates that the object
68 * has been initialised, the most significant bit is used for a spinlock.
69 */
70#ifdef __arm__
71// ARM ABI - 32-bit guards.
72typedef uint32_t guard_t;
73typedef uint32_t guard_lock_t;
74static const uint32_t LOCKED = static_cast<guard_t>(1) << 31;
75static const uint32_t INITIALISED = 1;
76#define LOCK_PART(guard) (guard)
77#define INIT_PART(guard) (guard)
78#elif defined(_LP64)
79typedef uint64_t guard_t;
80typedef uint64_t guard_lock_t;
81#	if defined(__LITTLE_ENDIAN__)
82static const guard_t LOCKED = static_cast<guard_t>(1) << 63;
83static const guard_t INITIALISED = 1;
84#	else
85static const guard_t LOCKED = 1;
86static const guard_t INITIALISED = static_cast<guard_t>(1) << 56;
87#	endif
88#define LOCK_PART(guard) (guard)
89#define INIT_PART(guard) (guard)
90#else
91typedef uint32_t guard_lock_t;
92#	if defined(__LITTLE_ENDIAN__)
93typedef struct {
94	uint32_t init_half;
95	uint32_t lock_half;
96} guard_t;
97static const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31;
98static const uint32_t INITIALISED = 1;
99#	else
100typedef struct {
101	uint32_t init_half;
102	uint32_t lock_half;
103} guard_t;
104static_assert(sizeof(guard_t) == sizeof(uint64_t), "");
105static const uint32_t LOCKED = 1;
106static const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24;
107#	endif
108#define LOCK_PART(guard) (&(guard)->lock_half)
109#define INIT_PART(guard) (&(guard)->init_half)
110#endif
111static const guard_lock_t INITIAL = 0;
112
113/**
114 * Acquires a lock on a guard, returning 0 if the object has already been
115 * initialised, and 1 if it has not.  If the object is already constructed then
116 * this function just needs to read a byte from memory and return.
117 */
118extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
119{
120	guard_lock_t old;
121	// Not an atomic read, doesn't establish a happens-before relationship, but
122	// if one is already established and we end up seeing an initialised state
123	// then it's a fast path, otherwise we'll do something more expensive than
124	// this test anyway...
125	if (INITIALISED == *INIT_PART(guard_object))
126		return 0;
127	// Spin trying to do the initialisation
128	for (;;)
129	{
130		// Loop trying to move the value of the guard from 0 (not
131		// locked, not initialised) to the locked-uninitialised
132		// position.
133		old = __sync_val_compare_and_swap(LOCK_PART(guard_object),
134		    INITIAL, LOCKED);
135		if (old == INITIAL) {
136			// Lock obtained.  If lock and init bit are
137			// in separate words, check for init race.
138			if (INIT_PART(guard_object) == LOCK_PART(guard_object))
139				return 1;
140			if (INITIALISED != *INIT_PART(guard_object))
141				return 1;
142
143			// No need for a memory barrier here,
144			// see first comment.
145			*LOCK_PART(guard_object) = INITIAL;
146			return 0;
147		}
148		// If lock and init bit are in the same word, check again
149		// if we are done.
150		if (INIT_PART(guard_object) == LOCK_PART(guard_object) &&
151		    old == INITIALISED)
152			return 0;
153
154		assert(old == LOCKED);
155		// Another thread holds the lock.
156		// If lock and init bit are in different words, check
157		// if we are done before yielding and looping.
158		if (INIT_PART(guard_object) != LOCK_PART(guard_object) &&
159		    INITIALISED == *INIT_PART(guard_object))
160			return 0;
161		sched_yield();
162	}
163}
164
165/**
166 * Releases the lock without marking the object as initialised.  This function
167 * is called if initialising a static causes an exception to be thrown.
168 */
169extern "C" void __cxa_guard_abort(volatile guard_t *guard_object)
170{
171	__attribute__((unused))
172	bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object),
173	    LOCKED, INITIAL);
174	assert(reset);
175}
176/**
177 * Releases the guard and marks the object as initialised.  This function is
178 * called after successful initialisation of a static.
179 */
180extern "C" void __cxa_guard_release(volatile guard_t *guard_object)
181{
182	guard_lock_t old;
183	if (INIT_PART(guard_object) == LOCK_PART(guard_object))
184		old = LOCKED;
185	else
186		old = INITIAL;
187	__attribute__((unused))
188	bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object),
189	    old, INITIALISED);
190	assert(reset);
191	if (INIT_PART(guard_object) != LOCK_PART(guard_object))
192		*LOCK_PART(guard_object) = INITIAL;
193}
194