1// Written in the D programming language.
2/**
3`AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations
4and preserving a low degree of fragmentation by means of aligned allocations.
5
6Source: $(PHOBOSSRC std/experimental/allocator/building_blocks/aligned_block_list.d)
7*/
8module std.experimental.allocator.building_blocks.aligned_block_list;
9
10import std.experimental.allocator.common;
11import std.experimental.allocator.building_blocks.null_allocator;
12
13// Common function implementation for thread local and shared AlignedBlockList
14private mixin template AlignedBlockListImpl(bool isShared)
15{
16    import std.traits : hasMember;
17    import std.typecons : Ternary;
18
19    static if (isShared)
20    import core.internal.spinlock : SpinLock;
21
22private:
23    // Doubly linked list of 'AlignedBlockNode'
24    // Each node contains an `Allocator` followed by its payload
25    static struct AlignedBlockNode
26    {
27        AlignedBlockNode* next, prev;
28        Allocator bAlloc;
29
30        static if (isShared)
31        {
32            shared(size_t) bytesUsed;
33            // Since the lock is not taken when allocating, this acts like a refcount
34            // keeping the node alive
35            uint keepAlive;
36        }
37        else
38        {
39            size_t bytesUsed;
40        }
41    }
42
43    // Root of the internal doubly linked list
44    AlignedBlockNode* root;
45
46    // Number of active nodes
47    uint numNodes;
48
49    // If the numNodes exceeds this limit, we will start deallocating nodes
50    enum uint maxNodes = 64;
51
52    // This lock is always taken when changing the list
53    // To improve performance, the lock is not taken when the allocation logic is called
54    static if (isShared)
55    SpinLock lock = SpinLock(SpinLock.Contention.brief);
56
57    // Moves a node to the front of the list, allowing for quick allocations
58    void moveToFront(AlignedBlockNode* tmp)
59    {
60        auto localRoot = cast(AlignedBlockNode*) root;
61        if (tmp == localRoot)
62            return;
63
64        if (tmp.prev) tmp.prev.next = tmp.next;
65        if (tmp.next) tmp.next.prev = tmp.prev;
66        if (localRoot) localRoot.prev = tmp;
67        tmp.next = localRoot;
68        tmp.prev = null;
69
70        root = cast(typeof(root)) tmp;
71    }
72
73    // Removes a node from the list, including its payload
74    // The payload is deallocated by calling 'parent.deallocate'
75    void removeNode(AlignedBlockNode* tmp)
76    {
77        auto next = tmp.next;
78        if (tmp.prev) tmp.prev.next = tmp.next;
79        if (tmp.next) tmp.next.prev = tmp.prev;
80        parent.deallocate((cast(void*) tmp)[0 .. theAlignment]);
81
82        if (tmp == cast(AlignedBlockNode*) root)
83            root = cast(typeof(root)) next;
84
85        static if (isShared)
86        {
87            import core.atomic : atomicOp;
88            atomicOp!"-="(numNodes, 1);
89        }
90        else
91        {
92            numNodes--;
93        }
94    }
95
96    // If the nodes do not have available space, a new node is created
97    // by drawing memory from the parent allocator with aligned allocations.
98    // The new node is inserted at the front of the list
99    bool insertNewNode()
100    {
101        void[] buf = parent.alignedAllocate(theAlignment, theAlignment);
102        if (buf is null)
103            return false;
104
105        auto localRoot = cast(AlignedBlockNode*) root;
106        auto newNode = cast(AlignedBlockNode*) buf;
107
108        // The first part of the allocation represent the node contents
109        // followed by the actual payload
110        ubyte[] payload = cast(ubyte[]) buf[AlignedBlockNode.sizeof .. $];
111        newNode.bAlloc = Allocator(payload);
112
113        newNode.next = localRoot;
114        newNode.prev = null;
115        if (localRoot)
116            localRoot.prev = newNode;
117        root = cast(typeof(root)) newNode;
118
119        static if (isShared)
120        {
121            import core.atomic : atomicOp;
122            atomicOp!"+="(numNodes, 1);
123        }
124        else
125        {
126            numNodes++;
127        }
128
129        return true;
130    }
131
132public:
133    static if (stateSize!ParentAllocator) ParentAllocator parent;
134    else alias parent = ParentAllocator.instance;
135
136    enum ulong alignment = Allocator.alignment;
137
138    // Since all memory is drawn from ParentAllocator, we can
139    // forward this to the parent
140    static if (hasMember!(ParentAllocator, "owns"))
141    Ternary owns(void[] b)
142    {
143        return parent.owns(b);
144    }
145
146    // Use `theAlignment` to find the node which allocated this block
147    bool deallocate(void[] b)
148    {
149        if (b is null)
150            return true;
151
152        // Round buffer to nearest `theAlignment` multiple to quickly find
153        // the `parent` `AlignedBlockNode`
154        enum ulong mask = ~(theAlignment - 1);
155        ulong ptr = ((cast(ulong) b.ptr) & mask);
156        AlignedBlockNode *node = cast(AlignedBlockNode*) ptr;
157        if (node.bAlloc.deallocate(b))
158        {
159            static if (isShared)
160            {
161                import core.atomic : atomicOp;
162                atomicOp!"-="(node.bytesUsed, b.length);
163            }
164            else
165            {
166                node.bytesUsed -= b.length;
167            }
168            return true;
169        }
170        return false;
171    }
172
173    // Allocate works only if memory can be provided via `alignedAllocate` from the parent
174    static if (hasMember!(ParentAllocator, "alignedAllocate"))
175    void[] allocate(size_t n)
176    {
177        static if (isShared)
178        import core.atomic : atomicOp, atomicLoad;
179
180        if (n == 0 || n > theAlignment)
181            return null;
182
183        static if (isShared)
184        {
185            lock.lock();
186            scope(exit) lock.unlock();
187        }
188
189        auto tmp = cast(AlignedBlockNode*) root;
190
191        // Iterate through list and find first node which has memory available
192        while (tmp)
193        {
194            auto next = tmp.next;
195            static if (isShared)
196            {
197                // Allocations can happen outside the lock
198                // Make sure nobody deletes this node while using it
199                tmp.keepAlive++;
200                if (next) next.keepAlive++;
201                lock.unlock();
202            }
203
204            auto result = tmp.bAlloc.allocate(n);
205            if (result.length == n)
206            {
207                // Success
208                static if (isShared)
209                {
210                    atomicOp!"+="(tmp.bytesUsed, n);
211                    lock.lock();
212                }
213                else
214                {
215                    tmp.bytesUsed += n;
216                }
217
218                // Most likely this node has memory for more allocations
219                // Move it to the front
220                moveToFront(tmp);
221
222                static if (isShared)
223                {
224                    tmp.keepAlive--;
225                    if (next) next.keepAlive--;
226                }
227
228                return result;
229            }
230
231            // This node can now be removed if necessary
232            static if (isShared)
233            {
234                lock.lock();
235                tmp.keepAlive--;
236                if (next) next.keepAlive--;
237            }
238
239            if (!next)
240                break;
241
242            tmp = next;
243            next = tmp.next;
244
245            // If there are too many nodes, free memory by removing empty nodes
246            static if (isShared)
247            {
248                if (atomicLoad(numNodes) > maxNodes &&
249                    atomicLoad(tmp.bytesUsed) == 0 &&
250                    tmp.keepAlive == 0)
251                {
252                    removeNode(tmp);
253                }
254            }
255            else
256            {
257                if (numNodes > maxNodes && tmp.bytesUsed == 0)
258                {
259                    removeNode(tmp);
260                }
261            }
262
263            tmp = next;
264        }
265
266        // Cannot create new AlignedBlockNode. Most likely the ParentAllocator ran out of resources
267        if (!insertNewNode())
268            return null;
269
270        tmp = cast(typeof(tmp)) root;
271        void[] result = tmp.bAlloc.allocate(n);
272
273        static if (isShared)
274        {
275            atomicOp!"+="(root.bytesUsed, result.length);
276        }
277        else
278        {
279            root.bytesUsed += result.length;
280        }
281
282        return result;
283    }
284
285    // goodAllocSize should not use state
286    size_t goodAllocSize(const size_t n)
287    {
288        Allocator a = null;
289        return a.goodAllocSize(n);
290    }
291}
292
293/**
294`AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations
295and preserving a low degree of fragmentation.
296The allocator holds internally a doubly linked list of `Allocator` objects, which will serve allocations
297in a most-recently-used fashion. Most recent allocators used for `allocate` calls, will be
298moved to the front of the list.
299
300Although allocations are in theory served in linear searching time, `deallocate` calls take
301$(BIGOH 1) time, by using aligned allocations. `ParentAllocator` must implement `alignedAllocate`
302and it must be able to allocate `theAlignment` bytes at the same alignment. Each aligned allocation
303done by `ParentAllocator` will contain metadata for an `Allocator`, followed by its payload.
304
305Params:
306    Allocator = the allocator which is used to manage each node; it must have a constructor which receives
307        `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator`
308    ParentAllocator = each node draws memory from the parent allocator; it must support `alignedAllocate`
309    theAlignment = alignment of each block and at the same time length of each node
310*/
311struct AlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21))
312{
313    version (StdDdoc)
314    {
315        import std.typecons : Ternary;
316        import std.traits : hasMember;
317
318        /**
319        Returns a chunk of memory of size `n`
320        It finds the first node in the `AlignedBlockNode` list which has available memory,
321        and moves it to the front of the list.
322
323        All empty nodes which cannot return new memory, are removed from the list.
324
325        Params:
326            n = bytes to allocate
327        Returns:
328            A chunk of memory of the required length or `null` on failure or
329        */
330        static if (hasMember!(ParentAllocator, "alignedAllocate"))
331        void[] allocate(size_t n);
332
333        /**
334        Deallocates the buffer `b` given as parameter. Deallocations take place in constant
335        time, regardless of the number of nodes in the list. `b.ptr` is rounded down
336        to the nearest multiple of the `alignment` to quickly find the corresponding
337        `AlignedBlockNode`.
338
339        Params:
340            b = buffer candidate for deallocation
341        Returns:
342            `true` on success and `false` on failure
343        */
344        bool deallocate(void[] b);
345
346        /**
347        Returns `Ternary.yes` if the buffer belongs to the parent allocator and
348        `Ternary.no` otherwise.
349
350        Params:
351            b = buffer tested if owned by this allocator
352        Returns:
353            `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise
354        */
355        static if (hasMember!(ParentAllocator, "owns"))
356        Ternary owns(void[] b);
357    }
358    else
359    {
360        import std.math.traits : isPowerOf2;
361        static assert(isPowerOf2(alignment));
362        mixin AlignedBlockListImpl!false;
363    }
364}
365
366///
367@system unittest
368{
369    import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator;
370    import std.experimental.allocator.building_blocks.segregator : Segregator;
371    import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock;
372    import std.typecons : Ternary;
373
374    /*
375    In this example we use 'AlignedBlockList' in conjunction with other allocators
376    in order to create a more complex allocator.
377
378    The 'SuperAllocator' uses a 'Segregator' to distribute allocations to sub-allocators,
379    based on the requested size.
380
381    Each sub-allocator is represented by an 'AlignedBlockList' of 'BitmappedBlocks'.
382    Each 'AlignedBlockList' draws memory from a root allocator which in this case is an 'AscendingPageAllocator'
383
384    Such an allocator not only provides good performance, but also a low degree of memory fragmentation.
385    */
386    alias SuperAllocator = Segregator!(
387        32,
388        AlignedBlockList!(BitmappedBlock!32, AscendingPageAllocator*, 1 << 12),
389        Segregator!(
390
391        64,
392        AlignedBlockList!(BitmappedBlock!64, AscendingPageAllocator*, 1 << 12),
393        Segregator!(
394
395        128,
396        AlignedBlockList!(BitmappedBlock!128, AscendingPageAllocator*, 1 << 12),
397        AscendingPageAllocator*
398    )));
399
400    SuperAllocator a;
401    auto pageAlloc = AscendingPageAllocator(128 * 4096);
402
403    // Set the parent allocator for all the sub allocators
404    a.allocatorForSize!256 = &pageAlloc;
405    a.allocatorForSize!128.parent = &pageAlloc;
406    a.allocatorForSize!64.parent = &pageAlloc;
407    a.allocatorForSize!32.parent = &pageAlloc;
408
409    enum testNum = 10;
410    void[][testNum] buf;
411
412    // Allocations of size 32 will go to the first 'AlignedBlockList'
413    foreach (j; 0 .. testNum)
414    {
415        buf[j] = a.allocate(32);
416        assert(buf[j].length == 32);
417
418        // This is owned by the first 'AlignedBlockList'
419        assert(a.allocatorForSize!32.owns(buf[j]) == Ternary.yes);
420    }
421
422    // Free the memory
423    foreach (j; 0 .. testNum)
424        assert(a.deallocate(buf[j]));
425
426    // Allocations of size 64 will go to the second 'AlignedBlockList'
427    foreach (j; 0 .. testNum)
428    {
429        buf[j] = a.allocate(64);
430        assert(buf[j].length == 64);
431
432        // This is owned by the second 'AlignedBlockList'
433        assert(a.allocatorForSize!64.owns(buf[j]) == Ternary.yes);
434    }
435
436    // Free the memory
437    foreach (j; 0 .. testNum)
438        assert(a.deallocate(buf[j]));
439
440    // Allocations of size 128 will go to the third 'AlignedBlockList'
441    foreach (j; 0 .. testNum)
442    {
443        buf[j] = a.allocate(128);
444        assert(buf[j].length == 128);
445
446        // This is owned by the third 'AlignedBlockList'
447        assert(a.allocatorForSize!128.owns(buf[j]) == Ternary.yes);
448    }
449
450    // Free the memory
451    foreach (j; 0 .. testNum)
452        assert(a.deallocate(buf[j]));
453
454    // Allocations which exceed 128, will go to the 'AscendingPageAllocator*'
455    void[] b = a.allocate(256);
456    assert(b.length == 256);
457    a.deallocate(b);
458}
459
460/**
461`SharedAlignedBlockList` is the threadsafe version of `AlignedBlockList`.
462The `Allocator` template parameter must refer a shared allocator.
463Also, `ParentAllocator` must be a shared allocator, supporting `alignedAllocate`.
464
465Params:
466    Allocator = the shared allocator which is used to manage each node; it must have a constructor which receives
467        `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator`
468    ParentAllocator = each node draws memory from the parent allocator; it must be shared and support `alignedAllocate`
469    theAlignment = alignment of each block and at the same time length of each node
470*/
471shared struct SharedAlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21))
472{
473    version (StdDdoc)
474    {
475        import std.typecons : Ternary;
476        import std.traits : hasMember;
477
478        /**
479        Returns a chunk of memory of size `n`
480        It finds the first node in the `AlignedBlockNode` list which has available memory,
481        and moves it to the front of the list.
482
483        All empty nodes which cannot return new memory, are removed from the list.
484
485        Params:
486            n = bytes to allocate
487        Returns:
488            A chunk of memory of the required length or `null` on failure or
489        */
490        static if (hasMember!(ParentAllocator, "alignedAllocate"))
491        void[] allocate(size_t n);
492
493        /**
494        Deallocates the buffer `b` given as parameter. Deallocations take place in constant
495        time, regardless of the number of nodes in the list. `b.ptr` is rounded down
496        to the nearest multiple of the `alignment` to quickly find the corresponding
497        `AlignedBlockNode`.
498
499        Params:
500            b = buffer candidate for deallocation
501        Returns:
502            `true` on success and `false` on failure
503        */
504        bool deallocate(void[] b);
505
506        /**
507        Returns `Ternary.yes` if the buffer belongs to the parent allocator and
508        `Ternary.no` otherwise.
509
510        Params:
511            b = buffer tested if owned by this allocator
512        Returns:
513            `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise
514        */
515        static if (hasMember!(ParentAllocator, "owns"))
516        Ternary owns(void[] b);
517    }
518    else
519    {
520        import std.math.traits : isPowerOf2;
521        static assert(isPowerOf2(alignment));
522        mixin AlignedBlockListImpl!true;
523    }
524}
525
526///
527@system unittest
528{
529    import std.experimental.allocator.building_blocks.region : SharedRegion;
530    import std.experimental.allocator.building_blocks.ascending_page_allocator : SharedAscendingPageAllocator;
531    import std.experimental.allocator.building_blocks.null_allocator : NullAllocator;
532    import core.thread : ThreadGroup;
533
534    enum numThreads = 8;
535    enum size = 2048;
536    enum maxIter = 10;
537
538    /*
539    In this example we use 'SharedAlignedBlockList' together with 'SharedRegion',
540    in order to create a fast, thread-safe allocator.
541    */
542    alias SuperAllocator = SharedAlignedBlockList!(
543            SharedRegion!(NullAllocator, 1),
544            SharedAscendingPageAllocator,
545            4096);
546
547    SuperAllocator a;
548    // The 'SuperAllocator' will draw memory from a 'SharedAscendingPageAllocator'
549    a.parent = SharedAscendingPageAllocator(4096 * 1024);
550
551    // Launch 'numThreads', each performing allocations
552    void fun()
553    {
554        foreach (i; 0 .. maxIter)
555        {
556            void[] b = a.allocate(size);
557            assert(b.length == size);
558        }
559    }
560
561    auto tg = new ThreadGroup;
562    foreach (i; 0 .. numThreads)
563    {
564        tg.create(&fun);
565    }
566    tg.joinAll();
567}
568
569version (StdUnittest)
570{
571    static void testrw(void[] b)
572    {
573        ubyte* buf = cast(ubyte*) b.ptr;
574        size_t len = (b.length).roundUpToMultipleOf(4096);
575        for (int i = 0; i < len; i += 4096)
576        {
577            buf[i] =  (cast(ubyte) i % 256);
578            assert(buf[i] == (cast(ubyte) i % 256));
579        }
580    }
581}
582
583@system unittest
584{
585    import std.experimental.allocator.building_blocks.region;
586    import std.experimental.allocator.building_blocks.ascending_page_allocator;
587    import std.random;
588    import std.algorithm.sorting : sort;
589    import core.thread : ThreadGroup;
590    import core.internal.spinlock : SpinLock;
591
592    enum pageSize = 4096;
593    enum numThreads = 10;
594    enum maxIter = 20;
595    enum totalAllocs = maxIter * numThreads;
596    size_t count = 0;
597    SpinLock lock = SpinLock(SpinLock.Contention.brief);
598
599    alias SuperAllocator = SharedAlignedBlockList!(
600            SharedRegion!(NullAllocator, 1),
601            SharedAscendingPageAllocator,
602            1 << 16);
603    void[][totalAllocs] buf;
604
605    SuperAllocator a;
606    a.parent = SharedAscendingPageAllocator(4096 * 1024);
607
608    void fun()
609    {
610        auto rnd = Random(1000);
611
612        foreach (i; 0 .. maxIter)
613        {
614            auto size = uniform(1, pageSize + 1, rnd);
615            void[] b = a.allocate(size);
616            assert(b.length == size);
617            testrw(b);
618
619            lock.lock();
620            buf[count++] = b;
621            lock.unlock();
622        }
623    }
624    auto tg = new ThreadGroup;
625    foreach (i; 0 .. numThreads)
626    {
627        tg.create(&fun);
628    }
629    tg.joinAll();
630
631    sort!((a, b) => a.ptr < b.ptr)(buf[0 .. totalAllocs]);
632    foreach (i; 0 .. totalAllocs - 1)
633    {
634        assert(buf[i].ptr + a.goodAllocSize(buf[i].length) <= buf[i + 1].ptr);
635    }
636
637    foreach (i; 0 .. totalAllocs)
638    {
639        assert(a.deallocate(buf[totalAllocs - 1 - i]));
640    }
641}
642
643@system unittest
644{
645    import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator;
646    import std.experimental.allocator.building_blocks.segregator : Segregator;
647    import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock;
648    import std.random;
649
650    alias SuperAllocator = Segregator!(
651        256,
652        AlignedBlockList!(BitmappedBlock!256, AscendingPageAllocator*, 1 << 16),
653        Segregator!(
654
655        512,
656        AlignedBlockList!(BitmappedBlock!512, AscendingPageAllocator*, 1 << 16),
657        Segregator!(
658
659        1024,
660        AlignedBlockList!(BitmappedBlock!1024, AscendingPageAllocator*, 1 << 16),
661        Segregator!(
662
663        2048,
664        AlignedBlockList!(BitmappedBlock!2048, AscendingPageAllocator*, 1 << 16),
665        AscendingPageAllocator*
666    ))));
667
668    SuperAllocator a;
669    auto pageAlloc = AscendingPageAllocator(4096 * 4096);
670    a.allocatorForSize!4096 = &pageAlloc;
671    a.allocatorForSize!2048.parent = &pageAlloc;
672    a.allocatorForSize!1024.parent = &pageAlloc;
673    a.allocatorForSize!512.parent = &pageAlloc;
674    a.allocatorForSize!256.parent = &pageAlloc;
675
676    auto rnd = Random(1000);
677
678    size_t maxIter = 10;
679    enum testNum = 10;
680    void[][testNum] buf;
681    int maxSize = 8192;
682    foreach (i; 0 .. maxIter)
683    {
684        foreach (j; 0 .. testNum)
685        {
686            auto size = uniform(1, maxSize + 1, rnd);
687            buf[j] = a.allocate(size);
688            assert(buf[j].length == size);
689            testrw(buf[j]);
690        }
691
692        randomShuffle(buf[]);
693
694        foreach (j; 0 .. testNum)
695        {
696            assert(a.deallocate(buf[j]));
697        }
698    }
699}
700