10SN/A/* $NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $ */ 211447Sserb 30SN/A/* epoch.c - epoch based memory reclamation */ 40SN/A/* $OpenLDAP$ */ 50SN/A/* This work is part of OpenLDAP Software <http://www.openldap.org/>. 60SN/A * 72362SN/A * Copyright 2018-2021 The OpenLDAP Foundation. 80SN/A * All rights reserved. 92362SN/A * 100SN/A * Redistribution and use in source and binary forms, with or without 110SN/A * modification, are permitted only as authorized by the OpenLDAP 120SN/A * Public License. 130SN/A * 140SN/A * A copy of this license is available in the file LICENSE in the 150SN/A * top-level directory of the distribution or, alternatively, at 160SN/A * <http://www.OpenLDAP.org/license.html>. 170SN/A */ 180SN/A 190SN/A/** @file epoch.c 200SN/A * 212362SN/A * Implementation of epoch based memory reclamation, in principle 222362SN/A * similar to the algorithm presented in 232362SN/A * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf 240SN/A * 250SN/A * Not completely lock-free at the moment. 260SN/A * 270SN/A * Also the problems with epoch based memory reclamation are still 280SN/A * present - a thread actively observing an epoch getting stuck will 290SN/A * prevent managed objects (in our case connections and operations) 300SN/A * from being freed, potentially running out of memory. 310SN/A */ 320SN/A 330SN/A#include <sys/cdefs.h> 340SN/A__RCSID("$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $"); 350SN/A 360SN/A#include "portable.h" 370SN/A 380SN/A#include "lload.h" 390SN/A#include <epoch.h> 400SN/A 410SN/A/* Has to be >= 3 */ 420SN/A#define EPOCH_MASK ( 1 << 2 ) 430SN/A#define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK ) 440SN/A#define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK ) 450SN/A 460SN/Astruct pending_ref { 470SN/A void *object; 480SN/A dispose_cb *dispose; 490SN/A struct pending_ref *next; 500SN/A}; 510SN/A 520SN/Aldap_pvt_thread_rdwr_t epoch_mutex; 530SN/A 540SN/Astatic epoch_t current_epoch; 550SN/Astatic uintptr_t epoch_threads[EPOCH_MASK]; 560SN/Astatic struct pending_ref *references[EPOCH_MASK]; 570SN/A 580SN/Avoid 590SN/Aepoch_init( void ) 600SN/A{ 610SN/A epoch_t epoch; 620SN/A 630SN/A current_epoch = 0; 640SN/A for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 650SN/A assert( !epoch_threads[epoch] ); 660SN/A assert( !references[epoch] ); 670SN/A } 680SN/A 690SN/A ldap_pvt_thread_rdwr_init( &epoch_mutex ); 700SN/A} 710SN/A 720SN/Avoid 730SN/Aepoch_shutdown( void ) 740SN/A{ 750SN/A epoch_t epoch; 760SN/A struct pending_ref *old, *next; 770SN/A 780SN/A for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 790SN/A assert( !epoch_threads[epoch] ); 800SN/A } 810SN/A 820SN/A /* 830SN/A * Even with the work in epoch_leave(), shutdown code doesn't currently 840SN/A * observe any epoch, so there might still be references left to free. 850SN/A */ 860SN/A epoch = EPOCH_PREV(current_epoch); 870SN/A next = references[epoch]; 880SN/A references[epoch] = NULL; 890SN/A for ( old = next; old; old = next ) { 900SN/A next = old->next; 910SN/A 920SN/A old->dispose( old->object ); 930SN/A ch_free( old ); 940SN/A } 950SN/A 960SN/A epoch = current_epoch; 970SN/A next = references[epoch]; 980SN/A references[epoch] = NULL; 990SN/A for ( old = next; old; old = next ) { 1000SN/A next = old->next; 1010SN/A 1020SN/A old->dispose( old->object ); 1030SN/A ch_free( old ); 1040SN/A } 1050SN/A 1060SN/A /* No references should exist anywhere now */ 1070SN/A for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 1089464SN/A assert( !references[epoch] ); 1090SN/A } 1100SN/A 1110SN/A ldap_pvt_thread_rdwr_destroy( &epoch_mutex ); 1120SN/A} 1130SN/A 1140SN/Aepoch_t 1150SN/Aepoch_join( void ) 1160SN/A{ 1170SN/A epoch_t epoch; 1180SN/A struct pending_ref *old, *ref = NULL; 1190SN/A 1200SN/Aretry: 1210SN/A /* TODO: make this completely lock-free */ 1220SN/A ldap_pvt_thread_rdwr_rlock( &epoch_mutex ); 1230SN/A epoch = current_epoch; 1240SN/A __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ); 1250SN/A ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 1260SN/A 1270SN/A if ( __atomic_load_n( 1280SN/A &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) { 1290SN/A return epoch; 1300SN/A } 1310SN/A 1320SN/A __atomic_exchange( 1330SN/A &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL ); 1340SN/A 1350SN/A Debug( LDAP_DEBUG_TRACE, "epoch_join: " 1360SN/A "advancing epoch to %zu with %s objects to free\n", 1370SN/A EPOCH_NEXT(epoch), ref ? "some" : "no" ); 1380SN/A 1390SN/A ldap_pvt_thread_rdwr_wlock( &epoch_mutex ); 1400SN/A current_epoch = EPOCH_NEXT(epoch); 1410SN/A ldap_pvt_thread_rdwr_wunlock( &epoch_mutex ); 1420SN/A 1430SN/A if ( !ref ) { 1440SN/A return epoch; 1450SN/A } 1460SN/A 1470SN/A /* 1480SN/A * The below is now safe to free outside epochs and we don't want to make 1490SN/A * the current epoch last any longer than necessary. 1500SN/A * 1510SN/A * Looks like there might be fairness issues in massively parallel 1520SN/A * environments but they haven't been observed on 32-core machines. 1530SN/A */ 1540SN/A epoch_leave( epoch ); 1550SN/A 1560SN/A for ( old = ref; old; old = ref ) { 1570SN/A ref = old->next; 1580SN/A 1590SN/A old->dispose( old->object ); 1600SN/A ch_free( old ); 1610SN/A } 1620SN/A 1630SN/A goto retry; 1640SN/A} 1650SN/A 1660SN/Avoid 1670SN/Aepoch_leave( epoch_t epoch ) 1680SN/A{ 1690SN/A struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL; 1700SN/A 1710SN/A /* Are there other threads observing our epoch? */ 1720SN/A if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) { 1730SN/A return; 1740SN/A } 1750SN/A 1760SN/A /* 1770SN/A * Optimisation for the case when we're mostly idle. Otherwise we won't 1785336SN/A * release resources until another thread comes by and joins the epoch 1795336SN/A * (twice), and there's no idea how soon (or late) that is going to happen. 18010128SN/A * 1815336SN/A * NB. There is no limit to the number of threads executing the following 1825336SN/A * code in parallel. 1835336SN/A */ 1846927SN/A ldap_pvt_thread_rdwr_rlock( &epoch_mutex ); 1850SN/A /* 1860SN/A * Anything could happen between the subtract and the lock being acquired 1870SN/A * above, so check again. But once we hold this lock (and confirm no more 1880SN/A * threads still observe either prospective epoch), noone will be able to 1890SN/A * finish epoch_join until we've released epoch_mutex since it holds that: 1900SN/A * 1910SN/A * epoch_threads[EPOCH_PREV(current_epoch)] == 0 1920SN/A * 1930SN/A * and that leads epoch_join() to acquire a write lock on &epoch_mutex. 1940SN/A */ 1950SN/A if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) { 1960SN/A /* Epoch counter has run full circle */ 1970SN/A ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 1980SN/A return; 1990SN/A } else if ( epoch == current_epoch ) { 2006927SN/A if ( __atomic_load_n( 2016927SN/A &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) { 2020SN/A /* There is another (older) thread still running */ 2030SN/A ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 2040SN/A return; 2050SN/A } 2066927SN/A 2070SN/A /* We're all alone, it's safe to claim all references and free them. */ 2080SN/A __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs, 2090SN/A &old_refs, __ATOMIC_ACQ_REL ); 2100SN/A __atomic_exchange( &references[epoch], ¤t_refs, ¤t_refs, 2110SN/A __ATOMIC_ACQ_REL ); 2120SN/A } else if ( epoch == EPOCH_PREV(current_epoch) ) { 2130SN/A if ( __atomic_load_n( 2140SN/A &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) { 2150SN/A /* There is another (newer) thread still running */ 2160SN/A ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 2170SN/A return; 2180SN/A } 2190SN/A 2200SN/A /* We're all alone, it's safe to claim all references and free them. */ 2210SN/A __atomic_exchange( 2220SN/A &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL ); 2230SN/A __atomic_exchange( &references[EPOCH_NEXT(epoch)], ¤t_refs, 2240SN/A ¤t_refs, __ATOMIC_ACQ_REL ); 2250SN/A } 2260SN/A /* 2270SN/A * Else the current_epoch has moved far enough that no references remain to 2280SN/A * be freed. 2290SN/A */ 2300SN/A ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 2310SN/A 2320SN/A /* 2330SN/A * Trigger a memory-independent read fence to make sure we're reading the 2340SN/A * state after all threads actually finished - which might have happened 2350SN/A * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not 2360SN/A * catch everything. 2370SN/A * 2380SN/A * TODO is to confirm the below: 2390SN/A * It might be that the tests and exchanges above only enforce a fence for 2400SN/A * the locations affected, so we could still read stale memory for 2410SN/A * unrelated locations? At least that's the only explanation I've been able 2420SN/A * to establish for repeated crashes that seem to have gone away with this 2430SN/A * in place. 2440SN/A * 2450SN/A * But then that's contrary to the second example in Acquire/Release 2460SN/A * section here: 2470SN/A * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync 2480SN/A */ 2490SN/A __atomic_thread_fence( __ATOMIC_ACQUIRE ); 2500SN/A 2510SN/A for ( p = old_refs; p; p = next ) { 2520SN/A next = p->next; 2530SN/A 2540SN/A p->dispose( p->object ); 2550SN/A ch_free( p ); 2560SN/A } 2570SN/A 2580SN/A for ( p = current_refs; p; p = next ) { 2590SN/A next = p->next; 2600SN/A 2610SN/A p->dispose( p->object ); 2620SN/A ch_free( p ); 2630SN/A } 2640SN/A} 2650SN/A 2660SN/A/* 2670SN/A * Add the object to the "current global epoch", not the epoch our thread 2680SN/A * entered. 2690SN/A */ 2700SN/Avoid 2710SN/Aepoch_append( void *ptr, dispose_cb *cb ) 2720SN/A{ 2730SN/A struct pending_ref *new; 2740SN/A epoch_t epoch = __atomic_load_n( ¤t_epoch, __ATOMIC_ACQUIRE ); 2750SN/A 2760SN/A /* 2770SN/A * BTW, the following is not appropriate here: 2780SN/A * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ); 2790SN/A * 2800SN/A * We might be a thread lagging behind in the "previous epoch" with no 2810SN/A * other threads executing at all. 2820SN/A */ 2830SN/A 2840SN/A new = ch_malloc( sizeof(struct pending_ref) ); 2850SN/A new->object = ptr; 2860SN/A new->dispose = cb; 2870SN/A new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE ); 2880SN/A 2890SN/A while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0, 2900SN/A __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ) 2910SN/A /* iterate until we succeed */; 2920SN/A} 2930SN/A 2940SN/Aint 2950SN/Aacquire_ref( uintptr_t *refp ) 2960SN/A{ 2970SN/A uintptr_t refcnt, new_refcnt; 2980SN/A 2990SN/A refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE ); 3000SN/A 3010SN/A /* 3020SN/A * If we just incremented the refcnt and checked for zero after, another 3030SN/A * thread might falsely believe the object was going to stick around. 3040SN/A * 3050SN/A * Checking whether the object is still dead at disposal time might not be 3060SN/A * able to distinguish it from being freed in a later epoch. 3070SN/A */ 3080SN/A do { 3090SN/A if ( !refcnt ) { 3100SN/A return refcnt; 3110SN/A } 3120SN/A 3130SN/A new_refcnt = refcnt + 1; 3140SN/A } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0, 3150SN/A __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ); 3160SN/A assert( new_refcnt == refcnt + 1 ); 3170SN/A 3180SN/A return refcnt; 3190SN/A} 3209177SN/A 3210SN/Aint 3220SN/Atry_release_ref( uintptr_t *refp, void *object, dispose_cb *cb ) 3230SN/A{ 3240SN/A uintptr_t refcnt, new_refcnt; 3250SN/A 3260SN/A refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE ); 3270SN/A 3280SN/A /* We promise the caller that we won't decrease refcnt below 0 */ 3290SN/A do { 3300SN/A if ( !refcnt ) { 3310SN/A return refcnt; 3320SN/A } 3330SN/A 3340SN/A new_refcnt = refcnt - 1; 3350SN/A } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0, 3360SN/A __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ); 3370SN/A assert( new_refcnt == refcnt - 1 ); 3380SN/A 3390SN/A if ( !new_refcnt ) { 3400SN/A epoch_append( object, cb ); 3410SN/A } 3420SN/A 3430SN/A return refcnt; 3440SN/A} 3450SN/A