// Written in the D programming language. /** `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations and preserving a low degree of fragmentation by means of aligned allocations. Source: $(PHOBOSSRC std/experimental/allocator/building_blocks/aligned_block_list.d) */ module std.experimental.allocator.building_blocks.aligned_block_list; import std.experimental.allocator.common; import std.experimental.allocator.building_blocks.null_allocator; // Common function implementation for thread local and shared AlignedBlockList private mixin template AlignedBlockListImpl(bool isShared) { import std.traits : hasMember; import std.typecons : Ternary; static if (isShared) import core.internal.spinlock : SpinLock; private: // Doubly linked list of 'AlignedBlockNode' // Each node contains an `Allocator` followed by its payload static struct AlignedBlockNode { AlignedBlockNode* next, prev; Allocator bAlloc; static if (isShared) { shared(size_t) bytesUsed; // Since the lock is not taken when allocating, this acts like a refcount // keeping the node alive uint keepAlive; } else { size_t bytesUsed; } } // Root of the internal doubly linked list AlignedBlockNode* root; // Number of active nodes uint numNodes; // If the numNodes exceeds this limit, we will start deallocating nodes enum uint maxNodes = 64; // This lock is always taken when changing the list // To improve performance, the lock is not taken when the allocation logic is called static if (isShared) SpinLock lock = SpinLock(SpinLock.Contention.brief); // Moves a node to the front of the list, allowing for quick allocations void moveToFront(AlignedBlockNode* tmp) { auto localRoot = cast(AlignedBlockNode*) root; if (tmp == localRoot) return; if (tmp.prev) tmp.prev.next = tmp.next; if (tmp.next) tmp.next.prev = tmp.prev; if (localRoot) localRoot.prev = tmp; tmp.next = localRoot; tmp.prev = null; root = cast(typeof(root)) tmp; } // Removes a node from the list, including its payload // The payload is deallocated by calling 'parent.deallocate' void removeNode(AlignedBlockNode* tmp) { auto next = tmp.next; if (tmp.prev) tmp.prev.next = tmp.next; if (tmp.next) tmp.next.prev = tmp.prev; parent.deallocate((cast(void*) tmp)[0 .. theAlignment]); if (tmp == cast(AlignedBlockNode*) root) root = cast(typeof(root)) next; static if (isShared) { import core.atomic : atomicOp; atomicOp!"-="(numNodes, 1); } else { numNodes--; } } // If the nodes do not have available space, a new node is created // by drawing memory from the parent allocator with aligned allocations. // The new node is inserted at the front of the list bool insertNewNode() { void[] buf = parent.alignedAllocate(theAlignment, theAlignment); if (buf is null) return false; auto localRoot = cast(AlignedBlockNode*) root; auto newNode = cast(AlignedBlockNode*) buf; // The first part of the allocation represent the node contents // followed by the actual payload ubyte[] payload = cast(ubyte[]) buf[AlignedBlockNode.sizeof .. $]; newNode.bAlloc = Allocator(payload); newNode.next = localRoot; newNode.prev = null; if (localRoot) localRoot.prev = newNode; root = cast(typeof(root)) newNode; static if (isShared) { import core.atomic : atomicOp; atomicOp!"+="(numNodes, 1); } else { numNodes++; } return true; } public: static if (stateSize!ParentAllocator) ParentAllocator parent; else alias parent = ParentAllocator.instance; enum ulong alignment = Allocator.alignment; // Since all memory is drawn from ParentAllocator, we can // forward this to the parent static if (hasMember!(ParentAllocator, "owns")) Ternary owns(void[] b) { return parent.owns(b); } // Use `theAlignment` to find the node which allocated this block bool deallocate(void[] b) { if (b is null) return true; // Round buffer to nearest `theAlignment` multiple to quickly find // the `parent` `AlignedBlockNode` enum ulong mask = ~(theAlignment - 1); ulong ptr = ((cast(ulong) b.ptr) & mask); AlignedBlockNode *node = cast(AlignedBlockNode*) ptr; if (node.bAlloc.deallocate(b)) { static if (isShared) { import core.atomic : atomicOp; atomicOp!"-="(node.bytesUsed, b.length); } else { node.bytesUsed -= b.length; } return true; } return false; } // Allocate works only if memory can be provided via `alignedAllocate` from the parent static if (hasMember!(ParentAllocator, "alignedAllocate")) void[] allocate(size_t n) { static if (isShared) import core.atomic : atomicOp, atomicLoad; if (n == 0 || n > theAlignment) return null; static if (isShared) { lock.lock(); scope(exit) lock.unlock(); } auto tmp = cast(AlignedBlockNode*) root; // Iterate through list and find first node which has memory available while (tmp) { auto next = tmp.next; static if (isShared) { // Allocations can happen outside the lock // Make sure nobody deletes this node while using it tmp.keepAlive++; if (next) next.keepAlive++; lock.unlock(); } auto result = tmp.bAlloc.allocate(n); if (result.length == n) { // Success static if (isShared) { atomicOp!"+="(tmp.bytesUsed, n); lock.lock(); } else { tmp.bytesUsed += n; } // Most likely this node has memory for more allocations // Move it to the front moveToFront(tmp); static if (isShared) { tmp.keepAlive--; if (next) next.keepAlive--; } return result; } // This node can now be removed if necessary static if (isShared) { lock.lock(); tmp.keepAlive--; if (next) next.keepAlive--; } if (!next) break; tmp = next; next = tmp.next; // If there are too many nodes, free memory by removing empty nodes static if (isShared) { if (atomicLoad(numNodes) > maxNodes && atomicLoad(tmp.bytesUsed) == 0 && tmp.keepAlive == 0) { removeNode(tmp); } } else { if (numNodes > maxNodes && tmp.bytesUsed == 0) { removeNode(tmp); } } tmp = next; } // Cannot create new AlignedBlockNode. Most likely the ParentAllocator ran out of resources if (!insertNewNode()) return null; tmp = cast(typeof(tmp)) root; void[] result = tmp.bAlloc.allocate(n); static if (isShared) { atomicOp!"+="(root.bytesUsed, result.length); } else { root.bytesUsed += result.length; } return result; } // goodAllocSize should not use state size_t goodAllocSize(const size_t n) { Allocator a = null; return a.goodAllocSize(n); } } /** `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations and preserving a low degree of fragmentation. The allocator holds internally a doubly linked list of `Allocator` objects, which will serve allocations in a most-recently-used fashion. Most recent allocators used for `allocate` calls, will be moved to the front of the list. Although allocations are in theory served in linear searching time, `deallocate` calls take $(BIGOH 1) time, by using aligned allocations. `ParentAllocator` must implement `alignedAllocate` and it must be able to allocate `theAlignment` bytes at the same alignment. Each aligned allocation done by `ParentAllocator` will contain metadata for an `Allocator`, followed by its payload. Params: Allocator = the allocator which is used to manage each node; it must have a constructor which receives `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator` ParentAllocator = each node draws memory from the parent allocator; it must support `alignedAllocate` theAlignment = alignment of each block and at the same time length of each node */ struct AlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21)) { version (StdDdoc) { import std.typecons : Ternary; import std.traits : hasMember; /** Returns a chunk of memory of size `n` It finds the first node in the `AlignedBlockNode` list which has available memory, and moves it to the front of the list. All empty nodes which cannot return new memory, are removed from the list. Params: n = bytes to allocate Returns: A chunk of memory of the required length or `null` on failure or */ static if (hasMember!(ParentAllocator, "alignedAllocate")) void[] allocate(size_t n); /** Deallocates the buffer `b` given as parameter. Deallocations take place in constant time, regardless of the number of nodes in the list. `b.ptr` is rounded down to the nearest multiple of the `alignment` to quickly find the corresponding `AlignedBlockNode`. Params: b = buffer candidate for deallocation Returns: `true` on success and `false` on failure */ bool deallocate(void[] b); /** Returns `Ternary.yes` if the buffer belongs to the parent allocator and `Ternary.no` otherwise. Params: b = buffer tested if owned by this allocator Returns: `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise */ static if (hasMember!(ParentAllocator, "owns")) Ternary owns(void[] b); } else { import std.math.traits : isPowerOf2; static assert(isPowerOf2(alignment)); mixin AlignedBlockListImpl!false; } } /// @system unittest { import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator; import std.experimental.allocator.building_blocks.segregator : Segregator; import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock; import std.typecons : Ternary; /* In this example we use 'AlignedBlockList' in conjunction with other allocators in order to create a more complex allocator. The 'SuperAllocator' uses a 'Segregator' to distribute allocations to sub-allocators, based on the requested size. Each sub-allocator is represented by an 'AlignedBlockList' of 'BitmappedBlocks'. Each 'AlignedBlockList' draws memory from a root allocator which in this case is an 'AscendingPageAllocator' Such an allocator not only provides good performance, but also a low degree of memory fragmentation. */ alias SuperAllocator = Segregator!( 32, AlignedBlockList!(BitmappedBlock!32, AscendingPageAllocator*, 1 << 12), Segregator!( 64, AlignedBlockList!(BitmappedBlock!64, AscendingPageAllocator*, 1 << 12), Segregator!( 128, AlignedBlockList!(BitmappedBlock!128, AscendingPageAllocator*, 1 << 12), AscendingPageAllocator* ))); SuperAllocator a; auto pageAlloc = AscendingPageAllocator(128 * 4096); // Set the parent allocator for all the sub allocators a.allocatorForSize!256 = &pageAlloc; a.allocatorForSize!128.parent = &pageAlloc; a.allocatorForSize!64.parent = &pageAlloc; a.allocatorForSize!32.parent = &pageAlloc; enum testNum = 10; void[][testNum] buf; // Allocations of size 32 will go to the first 'AlignedBlockList' foreach (j; 0 .. testNum) { buf[j] = a.allocate(32); assert(buf[j].length == 32); // This is owned by the first 'AlignedBlockList' assert(a.allocatorForSize!32.owns(buf[j]) == Ternary.yes); } // Free the memory foreach (j; 0 .. testNum) assert(a.deallocate(buf[j])); // Allocations of size 64 will go to the second 'AlignedBlockList' foreach (j; 0 .. testNum) { buf[j] = a.allocate(64); assert(buf[j].length == 64); // This is owned by the second 'AlignedBlockList' assert(a.allocatorForSize!64.owns(buf[j]) == Ternary.yes); } // Free the memory foreach (j; 0 .. testNum) assert(a.deallocate(buf[j])); // Allocations of size 128 will go to the third 'AlignedBlockList' foreach (j; 0 .. testNum) { buf[j] = a.allocate(128); assert(buf[j].length == 128); // This is owned by the third 'AlignedBlockList' assert(a.allocatorForSize!128.owns(buf[j]) == Ternary.yes); } // Free the memory foreach (j; 0 .. testNum) assert(a.deallocate(buf[j])); // Allocations which exceed 128, will go to the 'AscendingPageAllocator*' void[] b = a.allocate(256); assert(b.length == 256); a.deallocate(b); } /** `SharedAlignedBlockList` is the threadsafe version of `AlignedBlockList`. The `Allocator` template parameter must refer a shared allocator. Also, `ParentAllocator` must be a shared allocator, supporting `alignedAllocate`. Params: Allocator = the shared allocator which is used to manage each node; it must have a constructor which receives `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator` ParentAllocator = each node draws memory from the parent allocator; it must be shared and support `alignedAllocate` theAlignment = alignment of each block and at the same time length of each node */ shared struct SharedAlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21)) { version (StdDdoc) { import std.typecons : Ternary; import std.traits : hasMember; /** Returns a chunk of memory of size `n` It finds the first node in the `AlignedBlockNode` list which has available memory, and moves it to the front of the list. All empty nodes which cannot return new memory, are removed from the list. Params: n = bytes to allocate Returns: A chunk of memory of the required length or `null` on failure or */ static if (hasMember!(ParentAllocator, "alignedAllocate")) void[] allocate(size_t n); /** Deallocates the buffer `b` given as parameter. Deallocations take place in constant time, regardless of the number of nodes in the list. `b.ptr` is rounded down to the nearest multiple of the `alignment` to quickly find the corresponding `AlignedBlockNode`. Params: b = buffer candidate for deallocation Returns: `true` on success and `false` on failure */ bool deallocate(void[] b); /** Returns `Ternary.yes` if the buffer belongs to the parent allocator and `Ternary.no` otherwise. Params: b = buffer tested if owned by this allocator Returns: `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise */ static if (hasMember!(ParentAllocator, "owns")) Ternary owns(void[] b); } else { import std.math.traits : isPowerOf2; static assert(isPowerOf2(alignment)); mixin AlignedBlockListImpl!true; } } /// @system unittest { import std.experimental.allocator.building_blocks.region : SharedRegion; import std.experimental.allocator.building_blocks.ascending_page_allocator : SharedAscendingPageAllocator; import std.experimental.allocator.building_blocks.null_allocator : NullAllocator; import core.thread : ThreadGroup; enum numThreads = 8; enum size = 2048; enum maxIter = 10; /* In this example we use 'SharedAlignedBlockList' together with 'SharedRegion', in order to create a fast, thread-safe allocator. */ alias SuperAllocator = SharedAlignedBlockList!( SharedRegion!(NullAllocator, 1), SharedAscendingPageAllocator, 4096); SuperAllocator a; // The 'SuperAllocator' will draw memory from a 'SharedAscendingPageAllocator' a.parent = SharedAscendingPageAllocator(4096 * 1024); // Launch 'numThreads', each performing allocations void fun() { foreach (i; 0 .. maxIter) { void[] b = a.allocate(size); assert(b.length == size); } } auto tg = new ThreadGroup; foreach (i; 0 .. numThreads) { tg.create(&fun); } tg.joinAll(); } version (StdUnittest) { static void testrw(void[] b) { ubyte* buf = cast(ubyte*) b.ptr; size_t len = (b.length).roundUpToMultipleOf(4096); for (int i = 0; i < len; i += 4096) { buf[i] = (cast(ubyte) i % 256); assert(buf[i] == (cast(ubyte) i % 256)); } } } @system unittest { import std.experimental.allocator.building_blocks.region; import std.experimental.allocator.building_blocks.ascending_page_allocator; import std.random; import std.algorithm.sorting : sort; import core.thread : ThreadGroup; import core.internal.spinlock : SpinLock; enum pageSize = 4096; enum numThreads = 10; enum maxIter = 20; enum totalAllocs = maxIter * numThreads; size_t count = 0; SpinLock lock = SpinLock(SpinLock.Contention.brief); alias SuperAllocator = SharedAlignedBlockList!( SharedRegion!(NullAllocator, 1), SharedAscendingPageAllocator, 1 << 16); void[][totalAllocs] buf; SuperAllocator a; a.parent = SharedAscendingPageAllocator(4096 * 1024); void fun() { auto rnd = Random(1000); foreach (i; 0 .. maxIter) { auto size = uniform(1, pageSize + 1, rnd); void[] b = a.allocate(size); assert(b.length == size); testrw(b); lock.lock(); buf[count++] = b; lock.unlock(); } } auto tg = new ThreadGroup; foreach (i; 0 .. numThreads) { tg.create(&fun); } tg.joinAll(); sort!((a, b) => a.ptr < b.ptr)(buf[0 .. totalAllocs]); foreach (i; 0 .. totalAllocs - 1) { assert(buf[i].ptr + a.goodAllocSize(buf[i].length) <= buf[i + 1].ptr); } foreach (i; 0 .. totalAllocs) { assert(a.deallocate(buf[totalAllocs - 1 - i])); } } @system unittest { import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator; import std.experimental.allocator.building_blocks.segregator : Segregator; import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock; import std.random; alias SuperAllocator = Segregator!( 256, AlignedBlockList!(BitmappedBlock!256, AscendingPageAllocator*, 1 << 16), Segregator!( 512, AlignedBlockList!(BitmappedBlock!512, AscendingPageAllocator*, 1 << 16), Segregator!( 1024, AlignedBlockList!(BitmappedBlock!1024, AscendingPageAllocator*, 1 << 16), Segregator!( 2048, AlignedBlockList!(BitmappedBlock!2048, AscendingPageAllocator*, 1 << 16), AscendingPageAllocator* )))); SuperAllocator a; auto pageAlloc = AscendingPageAllocator(4096 * 4096); a.allocatorForSize!4096 = &pageAlloc; a.allocatorForSize!2048.parent = &pageAlloc; a.allocatorForSize!1024.parent = &pageAlloc; a.allocatorForSize!512.parent = &pageAlloc; a.allocatorForSize!256.parent = &pageAlloc; auto rnd = Random(1000); size_t maxIter = 10; enum testNum = 10; void[][testNum] buf; int maxSize = 8192; foreach (i; 0 .. maxIter) { foreach (j; 0 .. testNum) { auto size = uniform(1, maxSize + 1, rnd); buf[j] = a.allocate(size); assert(buf[j].length == size); testrw(buf[j]); } randomShuffle(buf[]); foreach (j; 0 .. testNum) { assert(a.deallocate(buf[j])); } } }