1/*	$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $	*/
2
3/* epoch.c - epoch based memory reclamation */
4/* $OpenLDAP$ */
5/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6 *
7 * Copyright 2018-2021 The OpenLDAP Foundation.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted only as authorized by the OpenLDAP
12 * Public License.
13 *
14 * A copy of this license is available in the file LICENSE in the
15 * top-level directory of the distribution or, alternatively, at
16 * <http://www.OpenLDAP.org/license.html>.
17 */
18
19/** @file epoch.c
20 *
21 * Implementation of epoch based memory reclamation, in principle
22 * similar to the algorithm presented in
23 * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf
24 *
25 * Not completely lock-free at the moment.
26 *
27 * Also the problems with epoch based memory reclamation are still
28 * present - a thread actively observing an epoch getting stuck will
29 * prevent managed objects (in our case connections and operations)
30 * from being freed, potentially running out of memory.
31 */
32
33#include <sys/cdefs.h>
34__RCSID("$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $");
35
36#include "portable.h"
37
38#include "lload.h"
39#include <epoch.h>
40
41/* Has to be >= 3 */
42#define EPOCH_MASK ( 1 << 2 )
43#define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK )
44#define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK )
45
46struct pending_ref {
47    void *object;
48    dispose_cb *dispose;
49    struct pending_ref *next;
50};
51
52ldap_pvt_thread_rdwr_t epoch_mutex;
53
54static epoch_t current_epoch;
55static uintptr_t epoch_threads[EPOCH_MASK];
56static struct pending_ref *references[EPOCH_MASK];
57
58void
59epoch_init( void )
60{
61    epoch_t epoch;
62
63    current_epoch = 0;
64    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
65        assert( !epoch_threads[epoch] );
66        assert( !references[epoch] );
67    }
68
69    ldap_pvt_thread_rdwr_init( &epoch_mutex );
70}
71
72void
73epoch_shutdown( void )
74{
75    epoch_t epoch;
76    struct pending_ref *old, *next;
77
78    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
79        assert( !epoch_threads[epoch] );
80    }
81
82    /*
83     * Even with the work in epoch_leave(), shutdown code doesn't currently
84     * observe any epoch, so there might still be references left to free.
85     */
86    epoch = EPOCH_PREV(current_epoch);
87    next = references[epoch];
88    references[epoch] = NULL;
89    for ( old = next; old; old = next ) {
90        next = old->next;
91
92        old->dispose( old->object );
93        ch_free( old );
94    }
95
96    epoch = current_epoch;
97    next = references[epoch];
98    references[epoch] = NULL;
99    for ( old = next; old; old = next ) {
100        next = old->next;
101
102        old->dispose( old->object );
103        ch_free( old );
104    }
105
106    /* No references should exist anywhere now */
107    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
108        assert( !references[epoch] );
109    }
110
111    ldap_pvt_thread_rdwr_destroy( &epoch_mutex );
112}
113
114epoch_t
115epoch_join( void )
116{
117    epoch_t epoch;
118    struct pending_ref *old, *ref = NULL;
119
120retry:
121    /* TODO: make this completely lock-free */
122    ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
123    epoch = current_epoch;
124    __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL );
125    ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
126
127    if ( __atomic_load_n(
128                 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) {
129        return epoch;
130    }
131
132    __atomic_exchange(
133            &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL );
134
135    Debug( LDAP_DEBUG_TRACE, "epoch_join: "
136            "advancing epoch to %zu with %s objects to free\n",
137            EPOCH_NEXT(epoch), ref ? "some" : "no" );
138
139    ldap_pvt_thread_rdwr_wlock( &epoch_mutex );
140    current_epoch = EPOCH_NEXT(epoch);
141    ldap_pvt_thread_rdwr_wunlock( &epoch_mutex );
142
143    if ( !ref ) {
144        return epoch;
145    }
146
147    /*
148     * The below is now safe to free outside epochs and we don't want to make
149     * the current epoch last any longer than necessary.
150     *
151     * Looks like there might be fairness issues in massively parallel
152     * environments but they haven't been observed on 32-core machines.
153     */
154    epoch_leave( epoch );
155
156    for ( old = ref; old; old = ref ) {
157        ref = old->next;
158
159        old->dispose( old->object );
160        ch_free( old );
161    }
162
163    goto retry;
164}
165
166void
167epoch_leave( epoch_t epoch )
168{
169    struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL;
170
171    /* Are there other threads observing our epoch? */
172    if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) {
173        return;
174    }
175
176    /*
177     * Optimisation for the case when we're mostly idle. Otherwise we won't
178     * release resources until another thread comes by and joins the epoch
179     * (twice), and there's no idea how soon (or late) that is going to happen.
180     *
181     * NB. There is no limit to the number of threads executing the following
182     * code in parallel.
183     */
184    ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
185    /*
186     * Anything could happen between the subtract and the lock being acquired
187     * above, so check again. But once we hold this lock (and confirm no more
188     * threads still observe either prospective epoch), noone will be able to
189     * finish epoch_join until we've released epoch_mutex since it holds that:
190     *
191     * epoch_threads[EPOCH_PREV(current_epoch)] == 0
192     *
193     * and that leads epoch_join() to acquire a write lock on &epoch_mutex.
194     */
195    if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) {
196        /* Epoch counter has run full circle */
197        ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
198        return;
199    } else if ( epoch == current_epoch ) {
200        if ( __atomic_load_n(
201                     &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) {
202            /* There is another (older) thread still running */
203            ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
204            return;
205        }
206
207        /* We're all alone, it's safe to claim all references and free them. */
208        __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs,
209                &old_refs, __ATOMIC_ACQ_REL );
210        __atomic_exchange( &references[epoch], &current_refs, &current_refs,
211                __ATOMIC_ACQ_REL );
212    } else if ( epoch == EPOCH_PREV(current_epoch) ) {
213        if ( __atomic_load_n(
214                     &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) {
215            /* There is another (newer) thread still running */
216            ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
217            return;
218        }
219
220        /* We're all alone, it's safe to claim all references and free them. */
221        __atomic_exchange(
222                &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL );
223        __atomic_exchange( &references[EPOCH_NEXT(epoch)], &current_refs,
224                &current_refs, __ATOMIC_ACQ_REL );
225    }
226    /*
227     * Else the current_epoch has moved far enough that no references remain to
228     * be freed.
229     */
230    ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
231
232    /*
233     * Trigger a memory-independent read fence to make sure we're reading the
234     * state after all threads actually finished - which might have happened
235     * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not
236     * catch everything.
237     *
238     * TODO is to confirm the below:
239     * It might be that the tests and exchanges above only enforce a fence for
240     * the locations affected, so we could still read stale memory for
241     * unrelated locations? At least that's the only explanation I've been able
242     * to establish for repeated crashes that seem to have gone away with this
243     * in place.
244     *
245     * But then that's contrary to the second example in Acquire/Release
246     * section here:
247     * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync
248     */
249    __atomic_thread_fence( __ATOMIC_ACQUIRE );
250
251    for ( p = old_refs; p; p = next ) {
252        next = p->next;
253
254        p->dispose( p->object );
255        ch_free( p );
256    }
257
258    for ( p = current_refs; p; p = next ) {
259        next = p->next;
260
261        p->dispose( p->object );
262        ch_free( p );
263    }
264}
265
266/*
267 * Add the object to the "current global epoch", not the epoch our thread
268 * entered.
269 */
270void
271epoch_append( void *ptr, dispose_cb *cb )
272{
273    struct pending_ref *new;
274    epoch_t epoch = __atomic_load_n( &current_epoch, __ATOMIC_ACQUIRE );
275
276    /*
277     * BTW, the following is not appropriate here:
278     * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) );
279     *
280     * We might be a thread lagging behind in the "previous epoch" with no
281     * other threads executing at all.
282     */
283
284    new = ch_malloc( sizeof(struct pending_ref) );
285    new->object = ptr;
286    new->dispose = cb;
287    new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE );
288
289    while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0,
290            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) )
291        /* iterate until we succeed */;
292}
293
294int
295acquire_ref( uintptr_t *refp )
296{
297    uintptr_t refcnt, new_refcnt;
298
299    refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
300
301    /*
302     * If we just incremented the refcnt and checked for zero after, another
303     * thread might falsely believe the object was going to stick around.
304     *
305     * Checking whether the object is still dead at disposal time might not be
306     * able to distinguish it from being freed in a later epoch.
307     */
308    do {
309        if ( !refcnt ) {
310            return refcnt;
311        }
312
313        new_refcnt = refcnt + 1;
314    } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
315            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
316    assert( new_refcnt == refcnt + 1 );
317
318    return refcnt;
319}
320
321int
322try_release_ref( uintptr_t *refp, void *object, dispose_cb *cb )
323{
324    uintptr_t refcnt, new_refcnt;
325
326    refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
327
328    /* We promise the caller that we won't decrease refcnt below 0 */
329    do {
330        if ( !refcnt ) {
331            return refcnt;
332        }
333
334        new_refcnt = refcnt - 1;
335    } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
336            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
337    assert( new_refcnt == refcnt - 1 );
338
339    if ( !new_refcnt ) {
340        epoch_append( object, cb );
341    }
342
343    return refcnt;
344}
345