guard.cc (235798) | guard.cc (253222) |
---|---|
1/* 2 * Copyright 2010-2012 PathScale, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. --- 27 unchanged lines hidden (view full) --- 36 * Statics that require initialisation are protected by a 64-bit value. Any 37 * platform that can do 32-bit atomic test and set operations can use this 38 * value as a low-overhead lock. Because statics (in most sane code) are 39 * accessed far more times than they are initialised, this lock implementation 40 * is heavily optimised towards the case where the static has already been 41 * initialised. 42 */ 43#include <stdint.h> | 1/* 2 * Copyright 2010-2012 PathScale, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. --- 27 unchanged lines hidden (view full) --- 36 * Statics that require initialisation are protected by a 64-bit value. Any 37 * platform that can do 32-bit atomic test and set operations can use this 38 * value as a low-overhead lock. Because statics (in most sane code) are 39 * accessed far more times than they are initialised, this lock implementation 40 * is heavily optimised towards the case where the static has already been 41 * initialised. 42 */ 43#include <stdint.h> |
44#include <stdlib.h> 45#include <stdio.h> |
|
44#include <pthread.h> 45#include <assert.h> | 46#include <pthread.h> 47#include <assert.h> |
48#include "atomic.h" |
|
46 | 49 |
47#ifdef __arm__ 48// ARM ABI - 32-bit guards. | 50// Older GCC doesn't define __LITTLE_ENDIAN__ 51#ifndef __LITTLE_ENDIAN__ 52 // If __BYTE_ORDER__ is defined, use that instead 53# ifdef __BYTE_ORDER__ 54# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 55# define __LITTLE_ENDIAN__ 56# endif 57 // x86 and ARM are the most common little-endian CPUs, so let's have a 58 // special case for them (ARM is already special cased). Assume everything 59 // else is big endian. 60# elif defined(__x86_64) || defined(__i386) 61# define __LITTLE_ENDIAN__ 62# endif 63#endif |
49 | 64 |
50/** 51 * Acquires a lock on a guard, returning 0 if the object has already been 52 * initialised, and 1 if it has not. If the object is already constructed then 53 * this function just needs to read a byte from memory and return. 54 */ 55extern "C" int __cxa_guard_acquire(volatile int32_t *guard_object) 56{ 57 if ((1<<31) == *guard_object) { return 0; } 58 // If we can atomically move the value from 0 -> 1, then this is 59 // uninitialised. 60 if (__sync_bool_compare_and_swap(guard_object, 0, 1)) 61 { 62 return 1; 63 } 64 // If the value is not 0, some other thread was initialising this. Spin 65 // until it's finished. 66 while (__sync_bool_compare_and_swap(guard_object, (1<<31), (1<<31))) 67 { 68 // If the other thread aborted, then we grab the lock 69 if (__sync_bool_compare_and_swap(guard_object, 0, 1)) 70 { 71 return 1; 72 } 73 sched_yield(); 74 } 75 return 0; 76} | |
77 | 65 |
78/** 79 * Releases the lock without marking the object as initialised. This function 80 * is called if initialising a static causes an exception to be thrown. | 66/* 67 * The least significant bit of the guard variable indicates that the object 68 * has been initialised, the most significant bit is used for a spinlock. |
81 */ | 69 */ |
82extern "C" void __cxa_guard_abort(int32_t *guard_object) 83{ 84 assert(__sync_bool_compare_and_swap(guard_object, 1, 0)); 85} 86/** 87 * Releases the guard and marks the object as initialised. This function is 88 * called after successful initialisation of a static. 89 */ 90extern "C" void __cxa_guard_release(int32_t *guard_object) 91{ 92 assert(__sync_bool_compare_and_swap(guard_object, 1, (1<<31))); 93} 94 95 | 70#ifdef __arm__ 71// ARM ABI - 32-bit guards. 72typedef uint32_t guard_t; 73static const uint32_t LOCKED = ((guard_t)1) << 31; 74static const uint32_t INITIALISED = 1; |
96#else | 75#else |
97// Itanium ABI: 64-bit guards | 76typedef uint64_t guard_t; 77# if defined(__LITTLE_ENDIAN__) 78static const guard_t LOCKED = ((guard_t)1) << 63; 79static const guard_t INITIALISED = 1; 80# else 81static const guard_t LOCKED = 1; 82static const guard_t INITIALISED = ((guard_t)1) << 56; 83# endif 84#endif |
98 99/** | 85 86/** |
100 * Returns a pointer to the low 32 bits in a 64-bit value, respecting the 101 * platform's byte order. 102 */ 103static int32_t *low_32_bits(volatile int64_t *ptr) 104{ 105 int32_t *low= (int32_t*)ptr; 106 // Test if the machine is big endian - constant propagation at compile time 107 // should eliminate this completely. 108 int one = 1; 109 if (*(char*)&one != 1) 110 { 111 low++; 112 } 113 return low; 114} 115 116/** | |
117 * Acquires a lock on a guard, returning 0 if the object has already been 118 * initialised, and 1 if it has not. If the object is already constructed then 119 * this function just needs to read a byte from memory and return. 120 */ | 87 * Acquires a lock on a guard, returning 0 if the object has already been 88 * initialised, and 1 if it has not. If the object is already constructed then 89 * this function just needs to read a byte from memory and return. 90 */ |
121extern "C" int __cxa_guard_acquire(volatile int64_t *guard_object) | 91extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object) |
122{ | 92{ |
123 char first_byte = (*guard_object) >> 56; 124 if (1 == first_byte) { return 0; } 125 int32_t *lock = low_32_bits(guard_object); 126 // Simple spin lock using the low 32 bits. We assume that concurrent 127 // attempts to initialize statics are very rare, so we don't need to 128 // optimise for the case where we have lots of threads trying to acquire 129 // the lock at the same time. 130 while (!__sync_bool_compare_and_swap_4(lock, 0, 1)) | 93 // Not an atomic read, doesn't establish a happens-before relationship, but 94 // if one is already established and we end up seeing an initialised state 95 // then it's a fast path, otherwise we'll do something more expensive than 96 // this test anyway... 97 if ((INITIALISED == *guard_object)) { return 0; } 98 // Spin trying to do the initialisation 99 while (1) |
131 { | 100 { |
132 if (1 == ((*guard_object) >> 56)) | 101 // Loop trying to move the value of the guard from 0 (not 102 // locked, not initialised) to the locked-uninitialised 103 // position. 104 switch (__sync_val_compare_and_swap(guard_object, 0, LOCKED)) |
133 { | 105 { |
134 break; | 106 // If the old value was 0, we succeeded, so continue 107 // initialising 108 case 0: 109 return 1; 110 // If this was already initialised, return and let the caller skip 111 // initialising it again. 112 case INITIALISED: 113 return 0; 114 // If it is locked by another thread, relinquish the CPU and try 115 // again later. 116 case LOCKED: 117 case LOCKED | INITIALISED: 118 sched_yield(); 119 break; 120 // If it is some other value, then something has gone badly wrong. 121 // Give up. 122 default: 123 fprintf(stderr, "Invalid state detected attempting to lock static initialiser.\n"); 124 abort(); |
135 } | 125 } |
136 sched_yield(); | |
137 } | 126 } |
138 // We have to test the guard again, in case another thread has performed 139 // the initialisation while we were trying to acquire the lock. 140 first_byte = (*guard_object) >> 56; 141 return (1 != first_byte); | 127 //__builtin_unreachable(); 128 return 0; |
142} 143 144/** 145 * Releases the lock without marking the object as initialised. This function 146 * is called if initialising a static causes an exception to be thrown. 147 */ | 129} 130 131/** 132 * Releases the lock without marking the object as initialised. This function 133 * is called if initialising a static causes an exception to be thrown. 134 */ |
148extern "C" void __cxa_guard_abort(int64_t *guard_object) | 135extern "C" void __cxa_guard_abort(volatile guard_t *guard_object) |
149{ | 136{ |
150 int32_t *lock = low_32_bits(guard_object); 151 *lock = 0; | 137 __attribute__((unused)) 138 bool reset = __sync_bool_compare_and_swap(guard_object, LOCKED, 0); 139 assert(reset); |
152} 153/** 154 * Releases the guard and marks the object as initialised. This function is 155 * called after successful initialisation of a static. 156 */ | 140} 141/** 142 * Releases the guard and marks the object as initialised. This function is 143 * called after successful initialisation of a static. 144 */ |
157extern "C" void __cxa_guard_release(int64_t *guard_object) | 145extern "C" void __cxa_guard_release(volatile guard_t *guard_object) |
158{ | 146{ |
159 // Set the first byte to 1 160 *guard_object |= ((int64_t)1) << 56; 161 __cxa_guard_abort(guard_object); | 147 __attribute__((unused)) 148 bool reset = __sync_bool_compare_and_swap(guard_object, LOCKED, INITIALISED); 149 assert(reset); |
162} 163 | 150} 151 |
164#endif | 152 |