1/*
2    Title:      Address scanner
3
4    Copyright (c) 2006-8, 2012 David C.J. Matthews
5
6    This library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with this library; if not, write to the Free Software
18    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19
20*/
21#ifdef HAVE_CONFIG_H
22#include "config.h"
23#elif defined(_WIN32)
24#include "winconfig.h"
25#else
26#error "No configuration file"
27#endif
28
29#ifdef HAVE_ASSERT_H
30#include <assert.h>
31#define ASSERT(x) assert(x)
32#else
33#define ASSERT(x) 0
34#endif
35
36#include <new>
37
38#include "globals.h"
39#include "scanaddrs.h"
40#include "machine_dep.h"
41#include "diagnostics.h"
42#include "memmgr.h"
43
44// Process the value at a given location and update it as necessary.
45POLYUNSIGNED ScanAddress::ScanAddressAt(PolyWord *pt)
46{
47    PolyWord val = *pt;
48    PolyWord newVal = val;
49    if (IS_INT(val) || val == PolyWord::FromUnsigned(0))
50    {
51        // We can get zeros in the constant area if we garbage collect
52        //  while compiling some code. */
53    }
54    else
55    {
56        ASSERT(OBJ_IS_DATAPTR(val));
57        // Any sort of address
58        newVal = ScanObjectAddress(val.AsObjPtr());
59    }
60    if (newVal != val) // Only update if we need to.
61        *pt = newVal;
62    return 0;
63}
64
65// General purpose object processor,  Processes all the addresses in an object.
66// Handles the various kinds of object that may contain addresses.
67void ScanAddress::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord)
68{
69    do
70    {
71        ASSERT (OBJ_IS_LENGTH(lengthWord));
72
73        if (OBJ_IS_BYTE_OBJECT(lengthWord))
74            return; /* Nothing more to do */
75
76        POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord);
77        PolyWord *baseAddr = (PolyWord*)obj;
78
79        if (OBJ_IS_CODE_OBJECT(lengthWord))
80        {
81            // Scan constants within the code.
82            machineDependent->ScanConstantsWithinCode(obj, obj, length, this);
83
84            // Skip to the constants and get ready to scan them.
85            obj->GetConstSegmentForCode(length, baseAddr, length);
86
87        } // else it's a normal object,
88
89        PolyWord *endWord = baseAddr + length;
90
91        // We want to minimise the actual recursion we perform so we try to
92        // use tail recursion if we can.  We first scan from the end and
93        // remove any words that don't need recursion.
94        POLYUNSIGNED lastLengthWord = 0;
95        while (endWord != baseAddr)
96        {
97            PolyWord wordAt = endWord[-1];
98            if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0))
99                endWord--; // Don't need to look at this.
100            else if ((lastLengthWord = ScanAddressAt(endWord-1)) != 0)
101                // We need to process this one
102                break;
103            else endWord--; // We're not interested in this.
104        }
105
106        if (endWord == baseAddr)
107            return; // We've done everything.
108
109        // There is at least one word that needs to be processed, the
110        // one at endWord-1.
111        // Now process from the beginning forward to see if there are
112        // any words before this that need to be handled.  This way we are more
113        // likely to handle the head of a list by recursion and the
114        // tail by looping (tail recursion).
115        while (baseAddr < endWord-1)
116        {
117            PolyWord wordAt = *baseAddr;
118            if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0))
119                baseAddr++; // Don't need to look at this.
120            else
121            {
122                POLYUNSIGNED lengthWord = ScanAddressAt(baseAddr);
123                if (lengthWord != 0)
124                {
125                    wordAt = *baseAddr; // Reload because it may have been side-effected
126                     // We really have to process this recursively.
127                    ASSERT(wordAt.IsDataPtr());
128                    ScanAddressesInObject(wordAt.AsObjPtr(), lengthWord);
129                    baseAddr++;
130                }
131                else baseAddr++;
132            }
133        }
134
135        // Finally process the last word we found that has to be processed.
136        // Do this by looping rather than recursion.
137        PolyWord wordAt = *baseAddr; // Last word to do.
138        // This must be an address
139        ASSERT(wordAt.IsDataPtr());
140        obj = wordAt.AsObjPtr();
141
142        lengthWord = lastLengthWord;
143
144    } while(1);
145}
146
147void ScanAddress::ScanAddressesInRegion(PolyWord *region, PolyWord *end)
148{
149    PolyWord *pt = region;
150    while (pt < end)
151    {
152        pt++; // Skip length word.
153        // pt actually points AT the object here.
154        PolyObject *obj = (PolyObject*)pt;
155        if (obj->ContainsForwardingPtr())    /* skip over moved object */
156        {
157            // We can now get multiple forwarding pointers as a result
158            // of applying ShareData repeatedly.  Perhaps we should
159            // turn the forwarding pointers back into normal words in
160            // an extra pass.
161            obj = obj->FollowForwardingChain();
162            ASSERT(obj->ContainsNormalLengthWord());
163            pt += obj->Length();
164        }
165        else
166        {
167            ASSERT(obj->ContainsNormalLengthWord());
168            POLYUNSIGNED length = obj->Length();
169            if (pt+length > end)
170                Crash("Malformed object at %p - length %lu\n", pt, length);
171            if (length != 0)
172                ScanAddressesInObject(obj);
173            pt += length;
174        }
175    }
176}
177
178// Extract a constant from the code.
179PolyWord ScanAddress::GetConstantValue(byte *addressOfConstant, ScanRelocationKind code)
180{
181    switch (code)
182    {
183    case PROCESS_RELOC_DIRECT: // 32 or 64 bit address of target
184        {
185            POLYUNSIGNED valu;
186            unsigned i;
187            byte *pt = addressOfConstant;
188            if (pt[3] & 0x80) valu = 0-1; else valu = 0;
189            for (i = sizeof(PolyWord); i > 0; i--)
190                valu = (valu << 8) | pt[i-1];
191
192            return PolyWord::FromUnsigned(valu);
193        }
194    case PROCESS_RELOC_I386RELATIVE:         // 32 bit relative address
195        {
196            POLYSIGNED disp;
197            byte *pt = addressOfConstant;
198            // Get the displacement. This is signed.
199            if (pt[3] & 0x80) disp = -1; else disp = 0; // Set the sign just in case.
200            for(unsigned i = 4; i > 0; i--) disp = (disp << 8) | pt[i-1];
201
202            byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant
203
204            return PolyWord::FromCodePtr(absAddr);
205        }
206    default:
207        ASSERT(false);
208        return TAGGED(0);
209    }
210}
211
212// Store a constant value.  Also used with a patch table when importing a saved heap which has
213// been exported using the C exporter.
214void ScanAddress::SetConstantValue(byte *addressOfConstant, PolyWord p, ScanRelocationKind code)
215{
216
217    switch (code)
218    {
219    case PROCESS_RELOC_DIRECT: // 32 or 64 bit address of target
220        {
221            POLYUNSIGNED valu = p.AsUnsigned();
222            for (unsigned i = 0; i < sizeof(PolyWord); i++)
223            {
224                addressOfConstant[i] = (byte)(valu & 255);
225                valu >>= 8;
226            }
227        }
228        break;
229    case PROCESS_RELOC_I386RELATIVE:         // 32 bit relative address
230        {
231            POLYSIGNED newDisp = p.AsCodePtr() - addressOfConstant - 4;
232#if (SIZEOF_VOIDP != 4)
233            ASSERT(newDisp < 0x80000000 && newDisp >= -(POLYSIGNED)0x80000000);
234#endif
235            for (unsigned i = 0; i < 4; i++) {
236                addressOfConstant[i] = (byte)(newDisp & 0xff);
237                newDisp >>= 8;
238            }
239        }
240        break;
241    }
242}
243
244// The default action is to call the DEFAULT ScanAddressAt NOT the virtual which means that it calls
245// ScanObjectAddress for the base address of the object referred to.
246void ScanAddress::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code)
247{
248    PolyWord p = GetConstantValue(addressOfConstant, code);
249
250    if (! IS_INT(p))
251    {
252        PolyWord oldValue = p;
253        ScanAddress::ScanAddressAt(&p);
254        if (p != oldValue) // Update it if it has changed.
255            SetConstantValue(addressOfConstant, p, code);
256    }
257}
258
259void ScanAddress::ScanRuntimeWord(PolyWord *w)
260{
261    if (w->IsTagged()) {} // Don't need to do anything
262    else {
263        ASSERT(w->IsDataPtr());
264        *w = ScanObjectAddress(w->AsObjPtr());
265    }
266}
267
268// This gets called in two circumstances.  It may be called for the roots
269// in which case the stack will be empty and we want to process it completely
270// or it is called for a constant address in which case it will have been
271// called from RecursiveScan::ScanAddressesInObject and that can process
272// any addresses.
273PolyObject *RecursiveScan::ScanObjectAddress(PolyObject *obj)
274{
275    PolyWord pWord = obj;
276    // Test to see if this needs to be scanned.
277    // It may update the word.
278    bool test = TestForScan(&pWord);
279    obj = pWord.AsObjPtr();
280
281    if (test)
282    {
283        MarkAsScanning(obj);
284        if (obj->IsByteObject())
285            Completed(obj); // Don't need to put it on the stack
286        // If we already have something on the stack we must being called
287        // recursively to process a constant in a code segment.  Just push
288        // it on the stack and let the caller deal with it.
289        else if (StackIsEmpty())
290            RecursiveScan::ScanAddressesInObject(obj, obj->LengthWord());
291        else
292            PushToStack(obj, (PolyWord*)obj);
293    }
294
295    return obj;
296}
297
298// This is called via ScanAddressesInRegion to process the permanent mutables.  It is
299// also called from ScanObjectAddress to process root addresses.
300// It processes all the addresses reachable from the object.
301void RecursiveScan::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord)
302{
303    if (OBJ_IS_BYTE_OBJECT(lengthWord))
304        return; // Ignore byte cells and don't call Completed on them
305
306    PolyWord *baseAddr = (PolyWord*)obj;
307
308    while (true)
309    {
310        ASSERT (OBJ_IS_LENGTH(lengthWord));
311
312        // Get the length and base address.  N.B.  If this is a code segment
313        // these will be side-effected by GetConstSegmentForCode.
314        POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord);
315
316        if (OBJ_IS_CODE_OBJECT(lengthWord))
317        {
318            // It's better to process the whole code object in one go.
319            ScanAddress::ScanAddressesInObject(obj, lengthWord);
320            length = 0; // Finished
321        }
322
323        // else it's a normal object,
324
325        // If there are only two addresses in this cell that need to be
326        // followed we follow them immediately and treat this cell as done.
327        // If there are more than two we push the address of this cell on
328        // the stack, follow the first address and then rescan it.  That way
329        // list cells are processed once only but we don't overflow the
330        // stack by pushing all the addresses in a very large vector.
331        PolyWord *endWord = (PolyWord*)obj + length;
332        PolyObject *firstWord = 0;
333        PolyObject *secondWord = 0;
334        PolyWord *restartFrom = baseAddr;
335
336        while (baseAddr != endWord)
337        {
338            PolyWord wordAt = *baseAddr;
339
340            if (wordAt.IsDataPtr() && wordAt != PolyWord::FromUnsigned(0))
341            {
342                // Normal address.  We can have words of all zeros at least in the
343                // situation where we have a partially constructed code segment where
344                // the constants at the end of the code have not yet been filled in.
345                if (TestForScan(baseAddr)) // Test value at baseAddr (may side-effect it)
346                {
347                    PolyObject *wObj = (*baseAddr).AsObjPtr();
348                    if (wObj->IsByteObject())
349                    {
350                        // Can do this now - don't need to push it
351                        MarkAsScanning(wObj);
352                        Completed(wObj);
353                    }
354                    else if (firstWord == 0)
355                    {
356                        firstWord = wObj;
357                        // We mark the word immediately.  We can have
358                        // two words in an object that are the same
359                        // and we don't want to process it again.
360                        MarkAsScanning(firstWord);
361                    }
362                    else if (secondWord == 0)
363                    {
364                        secondWord = wObj;
365                        restartFrom = baseAddr;
366                    }
367                    else break;  // More than two words.
368                }
369            }
370            baseAddr++;
371        }
372
373        if (baseAddr == endWord)
374        {
375            // We have done everything except possibly firstWord and secondWord.
376            // Note: Unfortunately the way that ScanAddressesInRegion works means that
377            // we call Completed on the addresses of cells in the permanent areas without
378            // having called TestForScan.
379            Completed(obj);
380            if (secondWord != 0)
381            {
382                MarkAsScanning(secondWord);
383                // Put this on the stack.  If this is a list node we will be
384                // pushing the tail.
385                PushToStack(secondWord, (PolyWord*)secondWord);
386            }
387        }
388        else // Put this back on the stack while we process the first word
389            PushToStack(obj, restartFrom);
390
391        if (firstWord != 0)
392        {
393            // Process it immediately.
394            obj = firstWord;
395            baseAddr = (PolyWord*)obj;
396        }
397        else if (StackIsEmpty())
398            return;
399        else
400            PopFromStack(obj, baseAddr);
401
402        lengthWord = obj->LengthWord();
403    }
404}
405
406// The stack is allocated as a series of blocks chained together.
407#define RSTACK_SEGMENT_SIZE 1000
408
409class RScanStack {
410public:
411    RScanStack(): nextStack(0), lastStack(0), sp(0) {}
412    ~RScanStack() { delete(nextStack); }
413
414    RScanStack *nextStack;
415    RScanStack *lastStack;
416    unsigned sp;
417    struct { PolyObject *obj; PolyWord *base; } stack[RSTACK_SEGMENT_SIZE];
418};
419
420RecursiveScanWithStack::~RecursiveScanWithStack()
421{
422    delete(stack);
423}
424
425bool RecursiveScanWithStack::StackIsEmpty(void)
426{
427    return stack == 0 || (stack->sp == 0 && stack->lastStack == 0);
428}
429
430void RecursiveScanWithStack::PushToStack(PolyObject *obj, PolyWord *base)
431{
432    if (stack == 0 || stack->sp == RSTACK_SEGMENT_SIZE)
433    {
434        if (stack != 0 && stack->nextStack != 0)
435            stack = stack->nextStack;
436        else
437        {
438            // Need a new segment
439            try {
440                RScanStack *s = new RScanStack;
441                s->lastStack = stack;
442                if (stack != 0)
443                    stack->nextStack = s;
444                stack = s;
445            }
446            catch (std::bad_alloc &) {
447                StackOverflow();
448                return;
449            }
450        }
451    }
452    stack->stack[stack->sp].obj = obj;
453    stack->stack[stack->sp].base = base;
454    stack->sp++;
455}
456
457void RecursiveScanWithStack::PopFromStack(PolyObject *&obj, PolyWord *&base)
458{
459    if (stack->sp == 0)
460    {
461        // Chain to the previous stack if any
462        ASSERT(stack->lastStack != 0);
463        // Before we do, delete any further one to free some memory
464        delete(stack->nextStack);
465        stack->nextStack = 0;
466        stack = stack->lastStack;
467        ASSERT(stack->sp == RSTACK_SEGMENT_SIZE);
468    }
469    --stack->sp;
470    obj = stack->stack[stack->sp].obj;
471    base = stack->stack[stack->sp].base;
472}
473