1/*
2    Title:      Multi-Threaded Garbage Collector
3
4    Copyright (c) 2010-12, 2019, 2020 David C. J. Matthews
5
6    Based on the original garbage collector code
7        Copyright 2000-2008
8        Cambridge University Technical Services Limited
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23
24*/
25#ifdef HAVE_CONFIG_H
26#include "config.h"
27#elif defined(_WIN32)
28#include "winconfig.h"
29#else
30#error "No configuration file"
31#endif
32
33#ifdef HAVE_ASSERT_H
34#include <assert.h>
35#define ASSERT(x)   assert(x)
36#else
37#define ASSERT(x)
38#endif
39
40#include "globals.h"
41#include "run_time.h"
42#include "machine_dep.h"
43#include "diagnostics.h"
44#include "processes.h"
45#include "timing.h"
46#include "gc.h"
47#include "scanaddrs.h"
48#include "check_objects.h"
49#include "osmem.h"
50#include "bitmap.h"
51#include "rts_module.h"
52#include "memmgr.h"
53#include "gctaskfarm.h"
54#include "mpoly.h"
55#include "statistics.h"
56#include "profiling.h"
57#include "heapsizing.h"
58#include "gc_progress.h"
59
60static GCTaskFarm gTaskFarm; // Global task farm.
61GCTaskFarm *gpTaskFarm = &gTaskFarm;
62
63// If the GC converts a weak ref from SOME to NONE it sets this ref.  It can be
64// cleared by the signal handler thread.  There's no need for a lock since it
65// is only set during GC and only cleared when not GCing.
66bool convertedWeak = false;
67
68/*
69    How the garbage collector works.
70    The GC has two phases.  The minor (quick) GC is a copying collector that
71    copies data from the allocation area into the mutable and immutable area.
72    The major collector is started when either the mutable or the immutable
73    area is full.  The major collector uses a mark/sweep scheme.
74    The GC has three phases:
75
76    1.  Mark phase.
77    Working from the roots; which are the the permanent mutable segments and
78    the RTS roots (e.g. thread stacks), mark all reachable cells.
79    Marking involves setting bits in the bitmap for reachable words.
80
81    2. Compact phase.
82    Marked objects are copied to try to compact, upwards, the heap segments.  When
83    an object is moved the length word of the object in the old location is set as
84    a tombstone that points to its new location.  In particular this means that we
85    cannot reuse the space where an object previously was during the compaction phase.
86    Immutable objects are moved into immutable segments.  When an object is moved
87    to a new location the bits are set in the bitmap as though the object had been
88    marked at that location.
89
90    3. Update phase.
91    The roots and objects marked during the first two phases are scanned and any
92    addresses for moved objects are updated.  The lowest address used in the area
93    then becomes the base of the area for future allocations.
94
95    There is a sharing phase which may be performed before the mark phase.  This
96    merges immutable cells with the same contents with the aim of reducing the
97    size of the live data.  It is expensive so is not performed by default.
98
99    Updated DCJM 12/06/12
100
101*/
102static bool doGC(const POLYUNSIGNED wordsRequiredToAllocate)
103{
104    gHeapSizeParameters.RecordAtStartOfMajorGC();
105    gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeStart);
106    globalStats.incCount(PSC_GC_FULLGC);
107
108    // Remove any empty spaces.  There will not normally be any except
109    // if we have triggered a full GC as a result of detecting paging in the
110    // minor GC but in that case we want to try to stop the system writing
111    // out areas that are now empty.
112    gMem.RemoveEmptyLocals();
113
114    if (debugOptions & DEBUG_GC)
115        Log("GC: Full GC, %lu words required %" PRI_SIZET " spaces\n", wordsRequiredToAllocate, gMem.lSpaces.size());
116
117    if (debugOptions & DEBUG_HEAPSIZE)
118        gMem.ReportHeapSizes("Full GC (before)");
119
120    // Data sharing pass.
121    if (gHeapSizeParameters.PerformSharingPass())
122    {
123        globalStats.incCount(PSC_GC_SHARING);
124        GCSharingPhase();
125    }
126
127    gcProgressBeginMajorGC(); // The GC sharing phase is treated separately
128
129/*
130 * There is a really weird bug somewhere.  An extra bit may be set in the bitmap during
131 * the mark phase.  It seems to be related to heavy swapping activity.  Duplicating the
132 * bitmap causes it to occur only in one copy and write-protecting the bitmap apart from
133 * when it is actually being updated does not result in a seg-fault.  So far I've only
134 * seen it on 64-bit Linux but it may be responsible for other crashes.  The work-around
135 * is to check the number of bits set in the bitmap and repeat the mark phase if it does
136 * not match.
137 */
138
139    for (unsigned p = 3; p > 0; p--)
140    {
141        for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
142        {
143            LocalMemSpace *lSpace = *i;
144            ASSERT (lSpace->top >= lSpace->upperAllocPtr);
145            ASSERT (lSpace->upperAllocPtr >= lSpace->lowerAllocPtr);
146            ASSERT (lSpace->lowerAllocPtr >= lSpace->bottom);
147            // Set upper and lower limits of weak refs.
148            lSpace->highestWeak = lSpace->bottom;
149            lSpace->lowestWeak = lSpace->top;
150            lSpace->fullGCLowerLimit = lSpace->top;
151            // Put dummy objects in the unused space.  This allows
152            // us to scan over the whole of the space.
153            gMem.FillUnusedSpace(lSpace->lowerAllocPtr,
154                lSpace->upperAllocPtr-lSpace->lowerAllocPtr);
155        }
156
157        // Set limits of weak refs.
158        for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
159        {
160            PermanentMemSpace *pSpace = *i;
161            pSpace->highestWeak = pSpace->bottom;
162            pSpace->lowestWeak = pSpace->top;
163        }
164
165        /* Mark phase */
166        GCMarkPhase();
167
168        uintptr_t bitCount = 0, markCount = 0;
169
170        for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
171        {
172            LocalMemSpace *lSpace = *i;
173            markCount += lSpace->i_marked + lSpace->m_marked;
174            bitCount += lSpace->bitmap.CountSetBits(lSpace->spaceSize());
175        }
176
177        if (markCount == bitCount)
178            break;
179        else
180        {
181            // Report an error.  If this happens again we crash.
182            Log("GC: Count error mark count %lu, bitCount %lu\n", markCount, bitCount);
183            if (p == 1)
184            {
185                ASSERT(markCount == bitCount);
186            }
187        }
188    }
189    for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
190    {
191        LocalMemSpace *lSpace = *i;
192        // Reset the allocation pointers.  They will be set to the
193        // limits of the retained data.
194#ifdef POLYML32IN64
195        lSpace->lowerAllocPtr = lSpace->bottom+1; // Must be odd-word aligned
196        lSpace->lowerAllocPtr[-1] = PolyWord::FromUnsigned(0);
197#else
198        lSpace->lowerAllocPtr = lSpace->bottom;
199#endif
200        lSpace->upperAllocPtr = lSpace->top;
201    }
202
203	gcProgressSetPercent(25);
204
205    if (debugOptions & DEBUG_GC) Log("GC: Check weak refs\n");
206    /* Detect unreferenced streams, windows etc. */
207    GCheckWeakRefs();
208	gcProgressSetPercent(50);
209
210    // Check that the heap is not overfull.  We make sure the marked
211    // mutable and immutable data is no more than 90% of the
212    // corresponding areas.  This is a very coarse adjustment.
213    {
214        uintptr_t iMarked = 0, mMarked = 0;
215        uintptr_t iSpace = 0, mSpace = 0;
216        for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
217        {
218            LocalMemSpace *lSpace = *i;
219            iMarked += lSpace->i_marked;
220            mMarked += lSpace->m_marked;
221            if (! lSpace->allocationSpace)
222            {
223                if (lSpace->isMutable)
224                    mSpace += lSpace->spaceSize();
225                else
226                    iSpace += lSpace->spaceSize();
227            }
228        }
229        // Add space if necessary and possible.
230        while (iMarked > iSpace - iSpace/10 && gHeapSizeParameters.AddSpaceBeforeCopyPhase(false) != 0)
231            iSpace += gMem.DefaultSpaceSize();
232        while (mMarked > mSpace - mSpace/10 && gHeapSizeParameters.AddSpaceBeforeCopyPhase(true) != 0)
233            mSpace += gMem.DefaultSpaceSize();
234    }
235
236    /* Compact phase */
237    GCCopyPhase();
238
239    gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Copy");
240	gcProgressSetPercent(75);
241
242    // Update Phase.
243    if (debugOptions & DEBUG_GC) Log("GC: Update\n");
244    GCUpdatePhase();
245
246    gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Update");
247
248    {
249        uintptr_t iUpdated = 0, mUpdated = 0, iMarked = 0, mMarked = 0;
250        for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
251        {
252            LocalMemSpace *lSpace = *i;
253            iMarked += lSpace->i_marked;
254            mMarked += lSpace->m_marked;
255            if (lSpace->isMutable)
256                mUpdated += lSpace->updated;
257            else
258                iUpdated += lSpace->updated;
259        }
260        ASSERT(iUpdated+mUpdated == iMarked+mMarked);
261    }
262
263    // Delete empty spaces.
264    gMem.RemoveEmptyLocals();
265
266    if (debugOptions & DEBUG_GC_ENHANCED)
267    {
268        for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
269        {
270            LocalMemSpace *lSpace = *i;
271            Log("GC: %s space %p %" PRI_SIZET " free in %" PRI_SIZET " words %2.1f%% full\n", lSpace->spaceTypeString(),
272                lSpace, lSpace->freeSpace(), lSpace->spaceSize(),
273                ((float)lSpace->allocatedSpace()) * 100 / (float)lSpace->spaceSize());
274        }
275    }
276
277    // Compute values for statistics
278    globalStats.setSize(PSS_AFTER_LAST_GC, 0);
279    globalStats.setSize(PSS_AFTER_LAST_FULLGC, 0);
280    globalStats.setSize(PSS_ALLOCATION, 0);
281    globalStats.setSize(PSS_ALLOCATION_FREE, 0);
282
283    for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
284    {
285        LocalMemSpace *space = *i;
286        uintptr_t free = space->freeSpace();
287        globalStats.incSize(PSS_AFTER_LAST_GC, free*sizeof(PolyWord));
288        globalStats.incSize(PSS_AFTER_LAST_FULLGC, free*sizeof(PolyWord));
289        if (space->allocationSpace)
290        {
291            if (space->allocatedSpace() > space->freeSpace()) // It's more than half full
292                gMem.ConvertAllocationSpaceToLocal(space);
293            else
294            {
295                globalStats.incSize(PSS_ALLOCATION, free*sizeof(PolyWord));
296                globalStats.incSize(PSS_ALLOCATION_FREE, free*sizeof(PolyWord));
297            }
298        }
299#ifdef FILL_UNUSED_MEMORY
300        memset(space->bottom, 0xaa, (char*)space->upperAllocPtr - (char*)space->bottom);
301#endif
302        if (debugOptions & DEBUG_GC_ENHANCED)
303            Log("GC: %s space %p %" PRI_SIZET " free in %" PRI_SIZET " words %2.1f%% full\n", space->spaceTypeString(),
304                space, space->freeSpace(), space->spaceSize(),
305                ((float)space->allocatedSpace()) * 100 / (float)space->spaceSize());
306    }
307
308    // End of garbage collection
309    gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeEnd);
310
311    // Now we've finished we can adjust the heap sizes.
312    gHeapSizeParameters.AdjustSizeAfterMajorGC(wordsRequiredToAllocate);
313    gHeapSizeParameters.resetMajorTimingData();
314
315    bool haveSpace = gMem.CheckForAllocation(wordsRequiredToAllocate);
316
317    // Invariant: the bitmaps are completely clean.
318    if (debugOptions & DEBUG_GC)
319    {
320        if (haveSpace)
321            Log("GC: Completed successfully\n");
322        else Log("GC: Completed with insufficient space\n");
323    }
324
325    if (debugOptions & DEBUG_HEAPSIZE)
326        gMem.ReportHeapSizes("Full GC (after)");
327
328//    if (profileMode == kProfileLiveData || profileMode == kProfileLiveMutables)
329//        printprofile();
330
331    CheckMemory();
332
333    return haveSpace; // Completed
334}
335
336// Create the initial heap.  hsize, isize and msize are the requested heap sizes
337// from the user arguments in units of kbytes.
338// Fills in the defaults and attempts to allocate the heap.  If the heap size
339// is too large it allocates as much as it can.  The default heap size is half the
340// physical memory.
341void CreateHeap()
342{
343    // Create an initial allocation space.
344    if (gMem.CreateAllocationSpace(gMem.DefaultSpaceSize()) == 0)
345        Exit("Insufficient memory to allocate the heap");
346
347    // Create the task farm if required
348    if (userOptions.gcthreads != 1)
349    {
350        if (! gTaskFarm.Initialise(userOptions.gcthreads, 100))
351            Crash("Unable to initialise the GC task farm");
352    }
353    // Set up the stacks for the mark phase.
354    initialiseMarkerTables();
355}
356
357class FullGCRequest: public MainThreadRequest
358{
359public:
360    FullGCRequest(): MainThreadRequest(MTP_GCPHASEMARK) {}
361    virtual void Perform()
362    {
363        doGC (0);
364    }
365};
366
367class QuickGCRequest: public MainThreadRequest
368{
369public:
370    QuickGCRequest(POLYUNSIGNED words): MainThreadRequest(MTP_GCPHASEMARK), wordsRequired(words) {}
371
372    virtual void Perform()
373    {
374        result =
375#ifndef DEBUG_ONLY_FULL_GC
376// If DEBUG_ONLY_FULL_GC is defined then we skip the partial GC.
377            RunQuickGC(wordsRequired) ||
378#endif
379            doGC (wordsRequired);
380    }
381
382    bool result;
383    POLYUNSIGNED wordsRequired;
384};
385
386// Perform a full garbage collection.  This is called either from ML via the full_gc RTS call
387// or from various RTS functions such as open_file to try to recover dropped file handles.
388void FullGC(TaskData *taskData)
389{
390    FullGCRequest request;
391    processes->MakeRootRequest(taskData, &request);
392
393    if (convertedWeak)
394        // Notify the signal thread to broadcast on the condition var when
395        // the GC is complete.  We mustn't call SignalArrived within the GC
396        // because it locks schedLock and the main GC thread already holds schedLock.
397        processes->SignalArrived();
398}
399
400// This is the normal call when memory is exhausted and we need to garbage collect.
401bool QuickGC(TaskData *taskData, POLYUNSIGNED wordsRequiredToAllocate)
402{
403    QuickGCRequest request(wordsRequiredToAllocate);
404    processes->MakeRootRequest(taskData, &request);
405
406    if (convertedWeak)
407        processes->SignalArrived();
408
409    return request.result;
410}
411
412// Called in RunShareData.  This is called as a root function
413void FullGCForShareCommonData(void)
414{
415    doGC(0);
416}
417
418// RTS module for the GC.  Only used for ForkChild.
419class GarbageCollectModule : public RtsModule
420{
421public:
422    virtual void ForkChild(void);
423};
424
425// Set single threaded mode. This is only used in a child process after
426// Posix fork in case there is a GC before the exec.
427void GarbageCollectModule::ForkChild(void)
428{
429    gpTaskFarm->SetSingleThreaded();
430    initialiseMarkerTables();
431}
432
433// Declare this.  It will be automatically added to the table.
434static GarbageCollectModule gcModule;
435