1/* $NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $ */ 2 3/* epoch.c - epoch based memory reclamation */ 4/* $OpenLDAP$ */ 5/* This work is part of OpenLDAP Software <http://www.openldap.org/>. 6 * 7 * Copyright 2018-2021 The OpenLDAP Foundation. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted only as authorized by the OpenLDAP 12 * Public License. 13 * 14 * A copy of this license is available in the file LICENSE in the 15 * top-level directory of the distribution or, alternatively, at 16 * <http://www.OpenLDAP.org/license.html>. 17 */ 18 19/** @file epoch.c 20 * 21 * Implementation of epoch based memory reclamation, in principle 22 * similar to the algorithm presented in 23 * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf 24 * 25 * Not completely lock-free at the moment. 26 * 27 * Also the problems with epoch based memory reclamation are still 28 * present - a thread actively observing an epoch getting stuck will 29 * prevent managed objects (in our case connections and operations) 30 * from being freed, potentially running out of memory. 31 */ 32 33#include <sys/cdefs.h> 34__RCSID("$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $"); 35 36#include "portable.h" 37 38#include "lload.h" 39#include <epoch.h> 40 41/* Has to be >= 3 */ 42#define EPOCH_MASK ( 1 << 2 ) 43#define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK ) 44#define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK ) 45 46struct pending_ref { 47 void *object; 48 dispose_cb *dispose; 49 struct pending_ref *next; 50}; 51 52ldap_pvt_thread_rdwr_t epoch_mutex; 53 54static epoch_t current_epoch; 55static uintptr_t epoch_threads[EPOCH_MASK]; 56static struct pending_ref *references[EPOCH_MASK]; 57 58void 59epoch_init( void ) 60{ 61 epoch_t epoch; 62 63 current_epoch = 0; 64 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 65 assert( !epoch_threads[epoch] ); 66 assert( !references[epoch] ); 67 } 68 69 ldap_pvt_thread_rdwr_init( &epoch_mutex ); 70} 71 72void 73epoch_shutdown( void ) 74{ 75 epoch_t epoch; 76 struct pending_ref *old, *next; 77 78 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 79 assert( !epoch_threads[epoch] ); 80 } 81 82 /* 83 * Even with the work in epoch_leave(), shutdown code doesn't currently 84 * observe any epoch, so there might still be references left to free. 85 */ 86 epoch = EPOCH_PREV(current_epoch); 87 next = references[epoch]; 88 references[epoch] = NULL; 89 for ( old = next; old; old = next ) { 90 next = old->next; 91 92 old->dispose( old->object ); 93 ch_free( old ); 94 } 95 96 epoch = current_epoch; 97 next = references[epoch]; 98 references[epoch] = NULL; 99 for ( old = next; old; old = next ) { 100 next = old->next; 101 102 old->dispose( old->object ); 103 ch_free( old ); 104 } 105 106 /* No references should exist anywhere now */ 107 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 108 assert( !references[epoch] ); 109 } 110 111 ldap_pvt_thread_rdwr_destroy( &epoch_mutex ); 112} 113 114epoch_t 115epoch_join( void ) 116{ 117 epoch_t epoch; 118 struct pending_ref *old, *ref = NULL; 119 120retry: 121 /* TODO: make this completely lock-free */ 122 ldap_pvt_thread_rdwr_rlock( &epoch_mutex ); 123 epoch = current_epoch; 124 __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ); 125 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 126 127 if ( __atomic_load_n( 128 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) { 129 return epoch; 130 } 131 132 __atomic_exchange( 133 &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL ); 134 135 Debug( LDAP_DEBUG_TRACE, "epoch_join: " 136 "advancing epoch to %zu with %s objects to free\n", 137 EPOCH_NEXT(epoch), ref ? "some" : "no" ); 138 139 ldap_pvt_thread_rdwr_wlock( &epoch_mutex ); 140 current_epoch = EPOCH_NEXT(epoch); 141 ldap_pvt_thread_rdwr_wunlock( &epoch_mutex ); 142 143 if ( !ref ) { 144 return epoch; 145 } 146 147 /* 148 * The below is now safe to free outside epochs and we don't want to make 149 * the current epoch last any longer than necessary. 150 * 151 * Looks like there might be fairness issues in massively parallel 152 * environments but they haven't been observed on 32-core machines. 153 */ 154 epoch_leave( epoch ); 155 156 for ( old = ref; old; old = ref ) { 157 ref = old->next; 158 159 old->dispose( old->object ); 160 ch_free( old ); 161 } 162 163 goto retry; 164} 165 166void 167epoch_leave( epoch_t epoch ) 168{ 169 struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL; 170 171 /* Are there other threads observing our epoch? */ 172 if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) { 173 return; 174 } 175 176 /* 177 * Optimisation for the case when we're mostly idle. Otherwise we won't 178 * release resources until another thread comes by and joins the epoch 179 * (twice), and there's no idea how soon (or late) that is going to happen. 180 * 181 * NB. There is no limit to the number of threads executing the following 182 * code in parallel. 183 */ 184 ldap_pvt_thread_rdwr_rlock( &epoch_mutex ); 185 /* 186 * Anything could happen between the subtract and the lock being acquired 187 * above, so check again. But once we hold this lock (and confirm no more 188 * threads still observe either prospective epoch), noone will be able to 189 * finish epoch_join until we've released epoch_mutex since it holds that: 190 * 191 * epoch_threads[EPOCH_PREV(current_epoch)] == 0 192 * 193 * and that leads epoch_join() to acquire a write lock on &epoch_mutex. 194 */ 195 if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) { 196 /* Epoch counter has run full circle */ 197 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 198 return; 199 } else if ( epoch == current_epoch ) { 200 if ( __atomic_load_n( 201 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) { 202 /* There is another (older) thread still running */ 203 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 204 return; 205 } 206 207 /* We're all alone, it's safe to claim all references and free them. */ 208 __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs, 209 &old_refs, __ATOMIC_ACQ_REL ); 210 __atomic_exchange( &references[epoch], ¤t_refs, ¤t_refs, 211 __ATOMIC_ACQ_REL ); 212 } else if ( epoch == EPOCH_PREV(current_epoch) ) { 213 if ( __atomic_load_n( 214 &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) { 215 /* There is another (newer) thread still running */ 216 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 217 return; 218 } 219 220 /* We're all alone, it's safe to claim all references and free them. */ 221 __atomic_exchange( 222 &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL ); 223 __atomic_exchange( &references[EPOCH_NEXT(epoch)], ¤t_refs, 224 ¤t_refs, __ATOMIC_ACQ_REL ); 225 } 226 /* 227 * Else the current_epoch has moved far enough that no references remain to 228 * be freed. 229 */ 230 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 231 232 /* 233 * Trigger a memory-independent read fence to make sure we're reading the 234 * state after all threads actually finished - which might have happened 235 * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not 236 * catch everything. 237 * 238 * TODO is to confirm the below: 239 * It might be that the tests and exchanges above only enforce a fence for 240 * the locations affected, so we could still read stale memory for 241 * unrelated locations? At least that's the only explanation I've been able 242 * to establish for repeated crashes that seem to have gone away with this 243 * in place. 244 * 245 * But then that's contrary to the second example in Acquire/Release 246 * section here: 247 * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync 248 */ 249 __atomic_thread_fence( __ATOMIC_ACQUIRE ); 250 251 for ( p = old_refs; p; p = next ) { 252 next = p->next; 253 254 p->dispose( p->object ); 255 ch_free( p ); 256 } 257 258 for ( p = current_refs; p; p = next ) { 259 next = p->next; 260 261 p->dispose( p->object ); 262 ch_free( p ); 263 } 264} 265 266/* 267 * Add the object to the "current global epoch", not the epoch our thread 268 * entered. 269 */ 270void 271epoch_append( void *ptr, dispose_cb *cb ) 272{ 273 struct pending_ref *new; 274 epoch_t epoch = __atomic_load_n( ¤t_epoch, __ATOMIC_ACQUIRE ); 275 276 /* 277 * BTW, the following is not appropriate here: 278 * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ); 279 * 280 * We might be a thread lagging behind in the "previous epoch" with no 281 * other threads executing at all. 282 */ 283 284 new = ch_malloc( sizeof(struct pending_ref) ); 285 new->object = ptr; 286 new->dispose = cb; 287 new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE ); 288 289 while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0, 290 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ) 291 /* iterate until we succeed */; 292} 293 294int 295acquire_ref( uintptr_t *refp ) 296{ 297 uintptr_t refcnt, new_refcnt; 298 299 refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE ); 300 301 /* 302 * If we just incremented the refcnt and checked for zero after, another 303 * thread might falsely believe the object was going to stick around. 304 * 305 * Checking whether the object is still dead at disposal time might not be 306 * able to distinguish it from being freed in a later epoch. 307 */ 308 do { 309 if ( !refcnt ) { 310 return refcnt; 311 } 312 313 new_refcnt = refcnt + 1; 314 } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0, 315 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ); 316 assert( new_refcnt == refcnt + 1 ); 317 318 return refcnt; 319} 320 321int 322try_release_ref( uintptr_t *refp, void *object, dispose_cb *cb ) 323{ 324 uintptr_t refcnt, new_refcnt; 325 326 refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE ); 327 328 /* We promise the caller that we won't decrease refcnt below 0 */ 329 do { 330 if ( !refcnt ) { 331 return refcnt; 332 } 333 334 new_refcnt = refcnt - 1; 335 } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0, 336 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ); 337 assert( new_refcnt == refcnt - 1 ); 338 339 if ( !new_refcnt ) { 340 epoch_append( object, cb ); 341 } 342 343 return refcnt; 344} 345