servers/lloadd/epoch.c

0SN/A/*	$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $	*/
11447Sserb
0SN/A/* epoch.c - epoch based memory reclamation */
0SN/A/* $OpenLDAP$ */
0SN/A/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
0SN/A *
2362SN/A * Copyright 2018-2021 The OpenLDAP Foundation.
0SN/A * All rights reserved.
2362SN/A *
0SN/A * Redistribution and use in source and binary forms, with or without
0SN/A * modification, are permitted only as authorized by the OpenLDAP
0SN/A * Public License.
0SN/A *
0SN/A * A copy of this license is available in the file LICENSE in the
0SN/A * top-level directory of the distribution or, alternatively, at
0SN/A * <http://www.OpenLDAP.org/license.html>.
0SN/A */
0SN/A
0SN/A/** @file epoch.c
0SN/A *
2362SN/A * Implementation of epoch based memory reclamation, in principle
2362SN/A * similar to the algorithm presented in
2362SN/A * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf
0SN/A *
0SN/A * Not completely lock-free at the moment.
0SN/A *
0SN/A * Also the problems with epoch based memory reclamation are still
0SN/A * present - a thread actively observing an epoch getting stuck will
0SN/A * prevent managed objects (in our case connections and operations)
0SN/A * from being freed, potentially running out of memory.
0SN/A */
0SN/A
0SN/A#include <sys/cdefs.h>
0SN/A__RCSID("$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $");
0SN/A
0SN/A#include "portable.h"
0SN/A
0SN/A#include "lload.h"
0SN/A#include <epoch.h>
0SN/A
0SN/A/* Has to be >= 3 */
0SN/A#define EPOCH_MASK ( 1 << 2 )
0SN/A#define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK )
0SN/A#define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK )
0SN/A
0SN/Astruct pending_ref {
0SN/A    void *object;
0SN/A    dispose_cb *dispose;
0SN/A    struct pending_ref *next;
0SN/A};
0SN/A
0SN/Aldap_pvt_thread_rdwr_t epoch_mutex;
0SN/A
0SN/Astatic epoch_t current_epoch;
0SN/Astatic uintptr_t epoch_threads[EPOCH_MASK];
0SN/Astatic struct pending_ref *references[EPOCH_MASK];
0SN/A
0SN/Avoid
0SN/Aepoch_init( void )
0SN/A{
0SN/A    epoch_t epoch;
0SN/A
0SN/A    current_epoch = 0;
0SN/A    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
0SN/A        assert( !epoch_threads[epoch] );
0SN/A        assert( !references[epoch] );
0SN/A    }
0SN/A
0SN/A    ldap_pvt_thread_rdwr_init( &epoch_mutex );
0SN/A}
0SN/A
0SN/Avoid
0SN/Aepoch_shutdown( void )
0SN/A{
0SN/A    epoch_t epoch;
0SN/A    struct pending_ref *old, *next;
0SN/A
0SN/A    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
0SN/A        assert( !epoch_threads[epoch] );
0SN/A    }
0SN/A
0SN/A    /*
0SN/A     * Even with the work in epoch_leave(), shutdown code doesn't currently
0SN/A     * observe any epoch, so there might still be references left to free.
0SN/A     */
0SN/A    epoch = EPOCH_PREV(current_epoch);
0SN/A    next = references[epoch];
0SN/A    references[epoch] = NULL;
0SN/A    for ( old = next; old; old = next ) {
0SN/A        next = old->next;
0SN/A
0SN/A        old->dispose( old->object );
0SN/A        ch_free( old );
0SN/A    }
0SN/A
0SN/A    epoch = current_epoch;
0SN/A    next = references[epoch];
0SN/A    references[epoch] = NULL;
0SN/A    for ( old = next; old; old = next ) {
0SN/A        next = old->next;
0SN/A
0SN/A        old->dispose( old->object );
0SN/A        ch_free( old );
0SN/A    }
0SN/A
0SN/A    /* No references should exist anywhere now */
0SN/A    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
9464SN/A        assert( !references[epoch] );
0SN/A    }
0SN/A
0SN/A    ldap_pvt_thread_rdwr_destroy( &epoch_mutex );
0SN/A}
0SN/A
0SN/Aepoch_t
0SN/Aepoch_join( void )
0SN/A{
0SN/A    epoch_t epoch;
0SN/A    struct pending_ref *old, *ref = NULL;
0SN/A
0SN/Aretry:
0SN/A    /* TODO: make this completely lock-free */
0SN/A    ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
0SN/A    epoch = current_epoch;
0SN/A    __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL );
0SN/A    ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
0SN/A
0SN/A    if ( __atomic_load_n(
0SN/A                 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) {
0SN/A        return epoch;
0SN/A    }
0SN/A
0SN/A    __atomic_exchange(
0SN/A            &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL );
0SN/A
0SN/A    Debug( LDAP_DEBUG_TRACE, "epoch_join: "
0SN/A            "advancing epoch to %zu with %s objects to free\n",
0SN/A            EPOCH_NEXT(epoch), ref ? "some" : "no" );
0SN/A
0SN/A    ldap_pvt_thread_rdwr_wlock( &epoch_mutex );
0SN/A    current_epoch = EPOCH_NEXT(epoch);
0SN/A    ldap_pvt_thread_rdwr_wunlock( &epoch_mutex );
0SN/A
0SN/A    if ( !ref ) {
0SN/A        return epoch;
0SN/A    }
0SN/A
0SN/A    /*
0SN/A     * The below is now safe to free outside epochs and we don't want to make
0SN/A     * the current epoch last any longer than necessary.
0SN/A     *
0SN/A     * Looks like there might be fairness issues in massively parallel
0SN/A     * environments but they haven't been observed on 32-core machines.
0SN/A     */
0SN/A    epoch_leave( epoch );
0SN/A
0SN/A    for ( old = ref; old; old = ref ) {
0SN/A        ref = old->next;
0SN/A
0SN/A        old->dispose( old->object );
0SN/A        ch_free( old );
0SN/A    }
0SN/A
0SN/A    goto retry;
0SN/A}
0SN/A
0SN/Avoid
0SN/Aepoch_leave( epoch_t epoch )
0SN/A{
0SN/A    struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL;
0SN/A
0SN/A    /* Are there other threads observing our epoch? */
0SN/A    if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) {
0SN/A        return;
0SN/A    }
0SN/A
0SN/A    /*
0SN/A     * Optimisation for the case when we're mostly idle. Otherwise we won't
5336SN/A     * release resources until another thread comes by and joins the epoch
5336SN/A     * (twice), and there's no idea how soon (or late) that is going to happen.
10128SN/A     *
5336SN/A     * NB. There is no limit to the number of threads executing the following
5336SN/A     * code in parallel.
5336SN/A     */
6927SN/A    ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
0SN/A    /*
0SN/A     * Anything could happen between the subtract and the lock being acquired
0SN/A     * above, so check again. But once we hold this lock (and confirm no more
0SN/A     * threads still observe either prospective epoch), noone will be able to
0SN/A     * finish epoch_join until we've released epoch_mutex since it holds that:
0SN/A     *
0SN/A     * epoch_threads[EPOCH_PREV(current_epoch)] == 0
0SN/A     *
0SN/A     * and that leads epoch_join() to acquire a write lock on &epoch_mutex.
0SN/A     */
0SN/A    if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) {
0SN/A        /* Epoch counter has run full circle */
0SN/A        ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
0SN/A        return;
0SN/A    } else if ( epoch == current_epoch ) {
6927SN/A        if ( __atomic_load_n(
6927SN/A                     &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) {
0SN/A            /* There is another (older) thread still running */
0SN/A            ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
0SN/A            return;
0SN/A        }
6927SN/A
0SN/A        /* We're all alone, it's safe to claim all references and free them. */
0SN/A        __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs,
0SN/A                &old_refs, __ATOMIC_ACQ_REL );
0SN/A        __atomic_exchange( &references[epoch], &current_refs, &current_refs,
0SN/A                __ATOMIC_ACQ_REL );
0SN/A    } else if ( epoch == EPOCH_PREV(current_epoch) ) {
0SN/A        if ( __atomic_load_n(
0SN/A                     &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) {
0SN/A            /* There is another (newer) thread still running */
0SN/A            ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
0SN/A            return;
0SN/A        }
0SN/A
0SN/A        /* We're all alone, it's safe to claim all references and free them. */
0SN/A        __atomic_exchange(
0SN/A                &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL );
0SN/A        __atomic_exchange( &references[EPOCH_NEXT(epoch)], &current_refs,
0SN/A                &current_refs, __ATOMIC_ACQ_REL );
0SN/A    }
0SN/A    /*
0SN/A     * Else the current_epoch has moved far enough that no references remain to
0SN/A     * be freed.
0SN/A     */
0SN/A    ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
0SN/A
0SN/A    /*
0SN/A     * Trigger a memory-independent read fence to make sure we're reading the
0SN/A     * state after all threads actually finished - which might have happened
0SN/A     * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not
0SN/A     * catch everything.
0SN/A     *
0SN/A     * TODO is to confirm the below:
0SN/A     * It might be that the tests and exchanges above only enforce a fence for
0SN/A     * the locations affected, so we could still read stale memory for
0SN/A     * unrelated locations? At least that's the only explanation I've been able
0SN/A     * to establish for repeated crashes that seem to have gone away with this
0SN/A     * in place.
0SN/A     *
0SN/A     * But then that's contrary to the second example in Acquire/Release
0SN/A     * section here:
0SN/A     * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync
0SN/A     */
0SN/A    __atomic_thread_fence( __ATOMIC_ACQUIRE );
0SN/A
0SN/A    for ( p = old_refs; p; p = next ) {
0SN/A        next = p->next;
0SN/A
0SN/A        p->dispose( p->object );
0SN/A        ch_free( p );
0SN/A    }
0SN/A
0SN/A    for ( p = current_refs; p; p = next ) {
0SN/A        next = p->next;
0SN/A
0SN/A        p->dispose( p->object );
0SN/A        ch_free( p );
0SN/A    }
0SN/A}
0SN/A
0SN/A/*
0SN/A * Add the object to the "current global epoch", not the epoch our thread
0SN/A * entered.
0SN/A */
0SN/Avoid
0SN/Aepoch_append( void *ptr, dispose_cb *cb )
0SN/A{
0SN/A    struct pending_ref *new;
0SN/A    epoch_t epoch = __atomic_load_n( &current_epoch, __ATOMIC_ACQUIRE );
0SN/A
0SN/A    /*
0SN/A     * BTW, the following is not appropriate here:
0SN/A     * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) );
0SN/A     *
0SN/A     * We might be a thread lagging behind in the "previous epoch" with no
0SN/A     * other threads executing at all.
0SN/A     */
0SN/A
0SN/A    new = ch_malloc( sizeof(struct pending_ref) );
0SN/A    new->object = ptr;
0SN/A    new->dispose = cb;
0SN/A    new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE );
0SN/A
0SN/A    while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0,
0SN/A            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) )
0SN/A        /* iterate until we succeed */;
0SN/A}
0SN/A
0SN/Aint
0SN/Aacquire_ref( uintptr_t *refp )
0SN/A{
0SN/A    uintptr_t refcnt, new_refcnt;
0SN/A
0SN/A    refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
0SN/A
0SN/A    /*
0SN/A     * If we just incremented the refcnt and checked for zero after, another
0SN/A     * thread might falsely believe the object was going to stick around.
0SN/A     *
0SN/A     * Checking whether the object is still dead at disposal time might not be
0SN/A     * able to distinguish it from being freed in a later epoch.
0SN/A     */
0SN/A    do {
0SN/A        if ( !refcnt ) {
0SN/A            return refcnt;
0SN/A        }
0SN/A
0SN/A        new_refcnt = refcnt + 1;
0SN/A    } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
0SN/A            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
0SN/A    assert( new_refcnt == refcnt + 1 );
0SN/A
0SN/A    return refcnt;
0SN/A}
9177SN/A
0SN/Aint
0SN/Atry_release_ref( uintptr_t *refp, void *object, dispose_cb *cb )
0SN/A{
0SN/A    uintptr_t refcnt, new_refcnt;
0SN/A
0SN/A    refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
0SN/A
0SN/A    /* We promise the caller that we won't decrease refcnt below 0 */
0SN/A    do {
0SN/A        if ( !refcnt ) {
0SN/A            return refcnt;
0SN/A        }
0SN/A
0SN/A        new_refcnt = refcnt - 1;
0SN/A    } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
0SN/A            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
0SN/A    assert( new_refcnt == refcnt - 1 );
0SN/A
0SN/A    if ( !new_refcnt ) {
0SN/A        epoch_append( object, cb );
0SN/A    }
0SN/A
0SN/A    return refcnt;
0SN/A}
0SN/A