1/*
2    Title:      Profiling
3    Author:     Dave Matthews, Cambridge University Computer Laboratory
4
5    Copyright (c) 2000-7
6        Cambridge University Technical Services Limited
7    Further development copyright (c) David C.J. Matthews 2011, 2015
8
9    This library is free software; you can redistribute it and/or
10    modify it under the terms of the GNU Lesser General Public
11    License version 2.1 as published by the Free Software Foundation.
12
13    This library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
17
18    You should have received a copy of the GNU Lesser General Public
19    License along with this library; if not, write to the Free Software
20    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21
22*/
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#elif defined(_WIN32)
27#include "winconfig.h"
28#else
29#error "No configuration file"
30#endif
31
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
35
36#ifdef HAVE_MALLOC_H
37#include <malloc.h>
38#endif
39
40#ifdef HAVE_ASSERT_H
41#include <assert.h>
42#define ASSERT(x) assert(x)
43#else
44#define ASSERT(x) 0
45#endif
46
47#include "globals.h"
48#include "arb.h"
49#include "processes.h"
50#include "polystring.h"
51#include "profiling.h"
52#include "save_vec.h"
53#include "rts_module.h"
54#include "memmgr.h"
55#include "scanaddrs.h"
56#include "locking.h"
57#include "run_time.h"
58#include "sys.h"
59#include "rtsentry.h"
60
61extern "C" {
62    POLYEXTERNALSYMBOL POLYUNSIGNED PolyProfiling(PolyObject *threadId, PolyWord mode);
63}
64
65static POLYUNSIGNED mainThreadCounts[MTP_MAXENTRY];
66static const char* const mainThreadText[MTP_MAXENTRY] =
67{
68    "UNKNOWN",
69    "GARBAGE COLLECTION (sharing phase)",
70    "GARBAGE COLLECTION (mark phase)",
71    "GARBAGE COLLECTION (copy phase)",
72    "GARBAGE COLLECTION (update phase)",
73    "GARBAGE COLLECTION (minor collection)",
74    "Common data sharing",
75    "Exporting",
76    "Saving state",
77    "Loading saved state",
78    "Profiling",
79    "Setting signal handler",
80    "Cygwin spawn",
81    "Storing module",
82    "Loading module"
83};
84
85// Entries for store profiling
86enum _extraStore {
87    EST_CODE = 0,
88    EST_STRING,
89    EST_BYTE,
90    EST_WORD,
91    EST_MUTABLE,
92    EST_MUTABLEBYTE,
93    EST_MAX_ENTRY
94};
95
96static POLYUNSIGNED extraStoreCounts[EST_MAX_ENTRY];
97static const char * const extraStoreText[EST_MAX_ENTRY] =
98{
99    "Function code",
100    "Strings",
101    "Byte data (long precision ints etc)",
102    "Unidentified word data",
103    "Unidentified mutable data",
104    "Mutable byte data (profiling counts)"
105};
106
107// Poly strings for "standard" counts.  These are generated from the C strings
108// above the first time profiling is activated.
109static PolyWord psRTSString[MTP_MAXENTRY], psExtraStrings[EST_MAX_ENTRY], psGCTotal;
110
111ProfileMode profileMode;
112// If we are just profiling a single thread, this is the thread data.
113static TaskData *singleThreadProfile = 0;
114
115typedef struct _PROFENTRY
116{
117    POLYUNSIGNED count;
118    PolyWord functionName;
119    struct _PROFENTRY *nextEntry;
120} PROFENTRY, *PPROFENTRY;
121
122class ProfileRequest: public MainThreadRequest
123{
124public:
125    ProfileRequest(unsigned prof, TaskData *pTask):
126        MainThreadRequest(MTP_PROFILING), mode(prof), pCallingThread(pTask), pTab(0), errorMessage(0) {}
127    ~ProfileRequest();
128    virtual void Perform();
129    Handle extractAsList(TaskData *taskData);
130
131private:
132    void getResults(void);
133    void getProfileResults(PolyWord *bottom, PolyWord *top);
134    PPROFENTRY newProfileEntry(void);
135
136private:
137    unsigned mode;
138    TaskData *pCallingThread;
139    PPROFENTRY pTab;
140
141public:
142    const char *errorMessage;
143};
144
145ProfileRequest::~ProfileRequest()
146{
147    PPROFENTRY p = pTab;
148    while (p != 0)
149    {
150        PPROFENTRY toFree = p;
151        p = p->nextEntry;
152        free(toFree);
153    }
154}
155
156// Lock to serialise updates of counts. Only used during update.
157// Not required when we print the counts since there's only one thread
158// running then.
159static PLock countLock;
160
161// Get the profile object associated with a piece of code.  Returns null if
162// there isn't one, in particular if this is in the old format.
163static PolyObject *getProfileObjectForCode(PolyObject *code)
164{
165    ASSERT(code->IsCodeObject());
166    PolyWord *consts;
167    POLYUNSIGNED constCount;
168    code->GetConstSegmentForCode(consts, constCount);
169    if (constCount < 3 || ! consts[2].IsDataPtr()) return 0;
170    PolyObject *profObject = consts[2].AsObjPtr();
171    if (profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1)
172        return profObject;
173    else return 0;
174}
175
176// Adds incr to the profile count for the function pointed at by
177// pc or by one of its callers.
178// This is called from a signal handler in the case of time profiling.
179void add_count(TaskData *taskData, POLYCODEPTR fpc, POLYUNSIGNED incr)
180{
181    // Check that the pc value is within the heap.  It could be
182    // in the assembly code.
183    PolyObject *codeObj = gMem.FindCodeObject(fpc);
184    if (codeObj)
185    {
186        PolyObject *profObject = getProfileObjectForCode(codeObj);
187        PLocker locker(&countLock);
188        if (profObject)
189            profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + incr));
190        return;
191    }
192    // Didn't find it.
193    {
194        PLocker locker(&countLock);
195        mainThreadCounts[MTP_USER_CODE] += incr;
196    }
197}
198
199
200// newProfileEntry - Make a new entry in the list
201PPROFENTRY ProfileRequest::newProfileEntry(void)
202{
203    PPROFENTRY newEntry = (PPROFENTRY)malloc(sizeof(PROFENTRY));
204    if (newEntry == 0) { errorMessage = "Insufficient memory"; return 0; }
205    newEntry->nextEntry = pTab;
206    pTab = newEntry;
207    return newEntry;
208}
209
210// We don't use ScanAddress here because we're only interested in the
211// objects themselves not the addresses in them.
212// We have to build the list of results in C memory rather than directly in
213// ML memory because we can't allocate in ML memory in the root thread.
214void ProfileRequest::getProfileResults(PolyWord *bottom, PolyWord *top)
215{
216    PolyWord *ptr = bottom;
217
218    while (ptr < top)
219    {
220        ptr++; // Skip the length word
221        PolyObject *obj = (PolyObject*)ptr;
222        if (obj->ContainsForwardingPtr())
223        {
224            // This used to be necessary when code objects were held in the
225            // general heap.  Now that we only ever scan code and permanent
226            // areas it's probably not needed.
227            while (obj->ContainsForwardingPtr())
228                obj = obj->GetForwardingPtr();
229            ASSERT(obj->ContainsNormalLengthWord());
230            ptr += obj->Length();
231        }
232        else
233        {
234            ASSERT(obj->ContainsNormalLengthWord());
235
236            if (obj->IsCodeObject())
237            {
238                PolyWord *firstConstant = obj->ConstPtrForCode();
239                PolyWord name = firstConstant[0];
240                PolyObject *profCount = getProfileObjectForCode(obj);
241                if (profCount)
242                {
243                    POLYUNSIGNED count = profCount->Get(0).AsUnsigned();
244
245                    if (count != 0)
246                    {
247                        if (name != TAGGED(0))
248                        {
249                            PPROFENTRY pEnt = newProfileEntry();
250                            if (pEnt == 0) return;
251                            pEnt->count = count;
252                            pEnt->functionName = name;
253                        }
254
255                        profCount->Set(0, PolyWord::FromUnsigned(0));
256                    }
257                }
258            } /* code object */
259            ptr += obj->Length();
260        } /* else */
261    } /* while */
262}
263
264void ProfileRequest::getResults(void)
265// Print profiling information and reset profile counts.
266{
267    for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
268    {
269        MemSpace *space = *i;
270        // Permanent areas are filled with objects from the bottom.
271        getProfileResults(space->bottom, space->top); // Bottom to top
272    }
273    for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
274    {
275        CodeSpace *space = *i;
276        getProfileResults(space->bottom, space->top);
277    }
278
279    {
280        POLYUNSIGNED gc_count =
281            mainThreadCounts[MTP_GCPHASESHARING]+
282            mainThreadCounts[MTP_GCPHASEMARK]+
283            mainThreadCounts[MTP_GCPHASECOMPACT] +
284            mainThreadCounts[MTP_GCPHASEUPDATE] +
285            mainThreadCounts[MTP_GCQUICK];
286        if (gc_count)
287        {
288            PPROFENTRY pEnt = newProfileEntry();
289            if (pEnt == 0) return; // Report insufficient memory?
290            pEnt->count = gc_count;
291            pEnt->functionName = psGCTotal;
292        }
293    }
294
295    for (unsigned k = 0; k < MTP_MAXENTRY; k++)
296    {
297        if (mainThreadCounts[k])
298        {
299            PPROFENTRY pEnt = newProfileEntry();
300            if (pEnt == 0) return; // Report insufficient memory?
301            pEnt->count = mainThreadCounts[k];
302            pEnt->functionName = psRTSString[k];
303            mainThreadCounts[k] = 0;
304        }
305    }
306
307    for (unsigned l = 0; l < EST_MAX_ENTRY; l++)
308    {
309        if (extraStoreCounts[l])
310        {
311            PPROFENTRY pEnt = newProfileEntry();
312            if (pEnt == 0) return; // Report insufficient memory?
313            pEnt->count = extraStoreCounts[l];
314            pEnt->functionName = psExtraStrings[l];
315            extraStoreCounts[l] = 0;
316        }
317    }
318}
319
320// Extract the accumulated results as an ML list of pairs of the count and the string.
321Handle ProfileRequest::extractAsList(TaskData *taskData)
322{
323    Handle saved = taskData->saveVec.mark();
324    Handle list = taskData->saveVec.push(ListNull);
325
326    for (PPROFENTRY p = pTab; p != 0; p = p->nextEntry)
327    {
328        Handle pair = alloc_and_save(taskData, 2);
329        Handle countValue = Make_arbitrary_precision(taskData, p->count);
330        pair->WordP()->Set(0, countValue->Word());
331        pair->WordP()->Set(1, p->functionName);
332        Handle next  = alloc_and_save(taskData, sizeof(ML_Cons_Cell) / sizeof(PolyWord));
333        DEREFLISTHANDLE(next)->h = pair->Word();
334        DEREFLISTHANDLE(next)->t =list->Word();
335
336        taskData->saveVec.reset(saved);
337        list = taskData->saveVec.push(next->Word());
338    }
339
340    return list;
341}
342
343void handleProfileTrap(TaskData *taskData, SIGNALCONTEXT *context)
344{
345    if (singleThreadProfile != 0 && singleThreadProfile != taskData)
346        return;
347
348    /* If we are in the garbage-collector add the count to "gc_count"
349        otherwise try to find out where we are. */
350    if (mainThreadPhase == MTP_USER_CODE)
351    {
352        if (taskData == 0 || ! taskData->AddTimeProfileCount(context))
353            mainThreadCounts[MTP_USER_CODE]++;
354        // On Mac OS X all virtual timer interrupts seem to be directed to the root thread
355        // so all the counts will be "unknown".
356    }
357    else mainThreadCounts[mainThreadPhase]++;
358}
359
360// Called from the GC when allocation profiling is on.
361void AddObjectProfile(PolyObject *obj)
362{
363    ASSERT(obj->ContainsNormalLengthWord());
364    POLYUNSIGNED length = obj->Length();
365
366    if (obj->IsWordObject() && OBJ_HAS_PROFILE(obj->LengthWord()))
367    {
368        // It has a profile pointer.  The last word should point to the
369        // closure or code of the allocating function.  Add the size of this to the count.
370        ASSERT(length != 0);
371        PolyWord profWord = obj->Get(length-1);
372        ASSERT(profWord.IsDataPtr());
373        PolyObject *profObject = profWord.AsObjPtr();
374        ASSERT(profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1);
375        profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + length + 1));
376    }
377    // If it doesn't have a profile pointer add it to the appropriate count.
378    else if (obj->IsMutable())
379    {
380        if (obj->IsByteObject())
381            extraStoreCounts[EST_MUTABLEBYTE] += length+1;
382        else extraStoreCounts[EST_MUTABLE] += length+1;
383    }
384    else if (obj->IsCodeObject())
385        extraStoreCounts[EST_CODE] += length+1;
386    else if (obj->IsByteObject())
387    {
388        // Try to separate strings from other byte data.  This is only
389        // approximate.
390        if (OBJ_IS_NEGATIVE(obj->LengthWord()))
391            extraStoreCounts[EST_BYTE] += length+1;
392        else
393        {
394            PolyStringObject *possString = (PolyStringObject*)obj;
395            POLYUNSIGNED bytes = length * sizeof(PolyWord);
396            // If the length of the string as given in the first word is sufficient
397            // to fit in the exact number of words then it's probably a string.
398            if (length >= 2 &&
399                possString->length <= bytes - sizeof(POLYUNSIGNED) &&
400                possString->length > bytes - 2 * sizeof(POLYUNSIGNED))
401                    extraStoreCounts[EST_STRING] += length+1;
402            else
403            {
404                extraStoreCounts[EST_BYTE] += length+1;
405            }
406        }
407    }
408    else extraStoreCounts[EST_WORD] += length+1;
409}
410
411// Called from ML to control profiling.
412static Handle profilerc(TaskData *taskData, Handle mode_handle)
413/* Profiler - generates statistical profiles of the code.
414   The parameter is an integer which determines the value to be profiled.
415   When profiler is called it always resets the profiling and prints out any
416   values which have been accumulated.
417   If the parameter is 0 this is all it does,
418   if the parameter is 1 then it produces time profiling,
419   if the parameter is 2 it produces store profiling.
420   3 - arbitrary precision emulation traps. */
421{
422    unsigned mode = get_C_unsigned(taskData, mode_handle->Word());
423    {
424        // Create any strings we need.  We only need to do this once but
425        // it must be done by a non-root thread since it needs a taskData object.
426        // Don't bother locking.  At worst we'll create some garbage.
427        for (unsigned k = 0; k < MTP_MAXENTRY; k++)
428        {
429            if (psRTSString[k] == TAGGED(0))
430                psRTSString[k] = C_string_to_Poly(taskData, mainThreadText[k]);
431        }
432        for (unsigned k = 0; k < EST_MAX_ENTRY; k++)
433        {
434            if (psExtraStrings[k] == TAGGED(0))
435                psExtraStrings[k] = C_string_to_Poly(taskData, extraStoreText[k]);
436        }
437        if (psGCTotal == TAGGED(0))
438            psGCTotal = C_string_to_Poly(taskData, "GARBAGE COLLECTION (total)");
439    }
440    // All these actions are performed by the root thread.  Only profile
441    // printing needs to be performed with all the threads stopped but it's
442    // simpler to serialise all requests.
443    ProfileRequest request(mode, taskData);
444    processes->MakeRootRequest(taskData, &request);
445    if (request.errorMessage != 0) raise_exception_string(taskData, EXC_Fail, request.errorMessage);
446    return request.extractAsList(taskData);
447}
448
449POLYUNSIGNED PolyProfiling(PolyObject *threadId, PolyWord mode)
450{
451    TaskData *taskData = TaskData::FindTaskForId(threadId);
452    ASSERT(taskData != 0);
453    taskData->PreRTSCall();
454    Handle reset = taskData->saveVec.mark();
455    Handle pushedMode = taskData->saveVec.push(mode);
456    Handle result = 0;
457
458    try {
459        result = profilerc(taskData, pushedMode);
460    } catch (...) { } // If an ML exception is raised
461
462    taskData->saveVec.reset(reset);
463    taskData->PostRTSCall();
464    if (result == 0) return TAGGED(0).AsUnsigned();
465    else return result->Word().AsUnsigned();
466}
467
468// This is called from the root thread when all the ML threads have been paused.
469void ProfileRequest::Perform()
470{
471    if (mode != kProfileOff && profileMode != kProfileOff)
472    {
473        // Profiling must be stopped first.
474        errorMessage = "Profiling is currently active";
475        return;
476    }
477
478    singleThreadProfile = 0; // Unless kProfileTimeThread is given this should be 0
479
480    switch (mode)
481    {
482    case kProfileOff:
483        // Turn off old profiling mechanism and print out accumulated results
484        profileMode = kProfileOff;
485        processes->StopProfiling();
486        getResults();
487        // Remove all the bitmaps to free up memory
488        gMem.RemoveProfilingBitmaps();
489        break;
490
491    case kProfileTimeThread:
492        singleThreadProfile = pCallingThread;
493        // And drop through to kProfileTime
494
495    case kProfileTime:
496        profileMode = kProfileTime;
497        processes->StartProfiling();
498        break;
499
500    case kProfileStoreAllocation:
501        profileMode = kProfileStoreAllocation;
502        break;
503
504    case kProfileEmulation:
505        profileMode = kProfileEmulation;
506        break;
507
508    case kProfileLiveData:
509        profileMode = kProfileLiveData;
510        break;
511
512    case kProfileLiveMutables:
513        profileMode = kProfileLiveMutables;
514        break;
515
516    case kProfileMutexContention:
517        profileMode = kProfileMutexContention;
518        break;
519
520    default: /* do nothing */
521        break;
522    }
523
524}
525
526struct _entrypts profilingEPT[] =
527{
528    // Profiling
529    { "PolyProfiling",                  (polyRTSFunction)&PolyProfiling},
530
531    { NULL, NULL} // End of list.
532};
533
534
535class Profiling: public RtsModule
536{
537public:
538    virtual void Init(void);
539    virtual void GarbageCollect(ScanAddress *process);
540};
541
542// Declare this.  It will be automatically added to the table.
543static Profiling profileModule;
544
545void Profiling::Init(void)
546{
547    // Reset profiling counts.
548    profileMode = kProfileOff;
549    for (unsigned k = 0; k < MTP_MAXENTRY; k++) mainThreadCounts[k] = 0;
550}
551
552void Profiling::GarbageCollect(ScanAddress *process)
553{
554    // Process any strings in the table.
555    for (unsigned k = 0; k < MTP_MAXENTRY; k++)
556        process->ScanRuntimeWord(&psRTSString[k]);
557    for (unsigned k = 0; k < EST_MAX_ENTRY; k++)
558        process->ScanRuntimeWord(&psExtraStrings[k]);
559    process->ScanRuntimeWord(&psGCTotal);
560}
561