1/* 2 Title: Profiling 3 Author: Dave Matthews, Cambridge University Computer Laboratory 4 5 Copyright (c) 2000-7 6 Cambridge University Technical Services Limited 7 Further development copyright (c) David C.J. Matthews 2011, 2015 8 9 This library is free software; you can redistribute it and/or 10 modify it under the terms of the GNU Lesser General Public 11 License version 2.1 as published by the Free Software Foundation. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 21 22*/ 23 24#ifdef HAVE_CONFIG_H 25#include "config.h" 26#elif defined(_WIN32) 27#include "winconfig.h" 28#else 29#error "No configuration file" 30#endif 31 32#ifdef HAVE_STDLIB_H 33#include <stdlib.h> 34#endif 35 36#ifdef HAVE_MALLOC_H 37#include <malloc.h> 38#endif 39 40#ifdef HAVE_ASSERT_H 41#include <assert.h> 42#define ASSERT(x) assert(x) 43#else 44#define ASSERT(x) 0 45#endif 46 47#include "globals.h" 48#include "arb.h" 49#include "processes.h" 50#include "polystring.h" 51#include "profiling.h" 52#include "save_vec.h" 53#include "rts_module.h" 54#include "memmgr.h" 55#include "scanaddrs.h" 56#include "locking.h" 57#include "run_time.h" 58#include "sys.h" 59#include "rtsentry.h" 60 61extern "C" { 62 POLYEXTERNALSYMBOL POLYUNSIGNED PolyProfiling(PolyObject *threadId, PolyWord mode); 63} 64 65static POLYUNSIGNED mainThreadCounts[MTP_MAXENTRY]; 66static const char* const mainThreadText[MTP_MAXENTRY] = 67{ 68 "UNKNOWN", 69 "GARBAGE COLLECTION (sharing phase)", 70 "GARBAGE COLLECTION (mark phase)", 71 "GARBAGE COLLECTION (copy phase)", 72 "GARBAGE COLLECTION (update phase)", 73 "GARBAGE COLLECTION (minor collection)", 74 "Common data sharing", 75 "Exporting", 76 "Saving state", 77 "Loading saved state", 78 "Profiling", 79 "Setting signal handler", 80 "Cygwin spawn", 81 "Storing module", 82 "Loading module" 83}; 84 85// Entries for store profiling 86enum _extraStore { 87 EST_CODE = 0, 88 EST_STRING, 89 EST_BYTE, 90 EST_WORD, 91 EST_MUTABLE, 92 EST_MUTABLEBYTE, 93 EST_MAX_ENTRY 94}; 95 96static POLYUNSIGNED extraStoreCounts[EST_MAX_ENTRY]; 97static const char * const extraStoreText[EST_MAX_ENTRY] = 98{ 99 "Function code", 100 "Strings", 101 "Byte data (long precision ints etc)", 102 "Unidentified word data", 103 "Unidentified mutable data", 104 "Mutable byte data (profiling counts)" 105}; 106 107// Poly strings for "standard" counts. These are generated from the C strings 108// above the first time profiling is activated. 109static PolyWord psRTSString[MTP_MAXENTRY], psExtraStrings[EST_MAX_ENTRY], psGCTotal; 110 111ProfileMode profileMode; 112// If we are just profiling a single thread, this is the thread data. 113static TaskData *singleThreadProfile = 0; 114 115typedef struct _PROFENTRY 116{ 117 POLYUNSIGNED count; 118 PolyWord functionName; 119 struct _PROFENTRY *nextEntry; 120} PROFENTRY, *PPROFENTRY; 121 122class ProfileRequest: public MainThreadRequest 123{ 124public: 125 ProfileRequest(unsigned prof, TaskData *pTask): 126 MainThreadRequest(MTP_PROFILING), mode(prof), pCallingThread(pTask), pTab(0), errorMessage(0) {} 127 ~ProfileRequest(); 128 virtual void Perform(); 129 Handle extractAsList(TaskData *taskData); 130 131private: 132 void getResults(void); 133 void getProfileResults(PolyWord *bottom, PolyWord *top); 134 PPROFENTRY newProfileEntry(void); 135 136private: 137 unsigned mode; 138 TaskData *pCallingThread; 139 PPROFENTRY pTab; 140 141public: 142 const char *errorMessage; 143}; 144 145ProfileRequest::~ProfileRequest() 146{ 147 PPROFENTRY p = pTab; 148 while (p != 0) 149 { 150 PPROFENTRY toFree = p; 151 p = p->nextEntry; 152 free(toFree); 153 } 154} 155 156// Lock to serialise updates of counts. Only used during update. 157// Not required when we print the counts since there's only one thread 158// running then. 159static PLock countLock; 160 161// Get the profile object associated with a piece of code. Returns null if 162// there isn't one, in particular if this is in the old format. 163static PolyObject *getProfileObjectForCode(PolyObject *code) 164{ 165 ASSERT(code->IsCodeObject()); 166 PolyWord *consts; 167 POLYUNSIGNED constCount; 168 code->GetConstSegmentForCode(consts, constCount); 169 if (constCount < 3 || ! consts[2].IsDataPtr()) return 0; 170 PolyObject *profObject = consts[2].AsObjPtr(); 171 if (profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1) 172 return profObject; 173 else return 0; 174} 175 176// Adds incr to the profile count for the function pointed at by 177// pc or by one of its callers. 178// This is called from a signal handler in the case of time profiling. 179void add_count(TaskData *taskData, POLYCODEPTR fpc, POLYUNSIGNED incr) 180{ 181 // Check that the pc value is within the heap. It could be 182 // in the assembly code. 183 PolyObject *codeObj = gMem.FindCodeObject(fpc); 184 if (codeObj) 185 { 186 PolyObject *profObject = getProfileObjectForCode(codeObj); 187 PLocker locker(&countLock); 188 if (profObject) 189 profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + incr)); 190 return; 191 } 192 // Didn't find it. 193 { 194 PLocker locker(&countLock); 195 mainThreadCounts[MTP_USER_CODE] += incr; 196 } 197} 198 199 200// newProfileEntry - Make a new entry in the list 201PPROFENTRY ProfileRequest::newProfileEntry(void) 202{ 203 PPROFENTRY newEntry = (PPROFENTRY)malloc(sizeof(PROFENTRY)); 204 if (newEntry == 0) { errorMessage = "Insufficient memory"; return 0; } 205 newEntry->nextEntry = pTab; 206 pTab = newEntry; 207 return newEntry; 208} 209 210// We don't use ScanAddress here because we're only interested in the 211// objects themselves not the addresses in them. 212// We have to build the list of results in C memory rather than directly in 213// ML memory because we can't allocate in ML memory in the root thread. 214void ProfileRequest::getProfileResults(PolyWord *bottom, PolyWord *top) 215{ 216 PolyWord *ptr = bottom; 217 218 while (ptr < top) 219 { 220 ptr++; // Skip the length word 221 PolyObject *obj = (PolyObject*)ptr; 222 if (obj->ContainsForwardingPtr()) 223 { 224 // This used to be necessary when code objects were held in the 225 // general heap. Now that we only ever scan code and permanent 226 // areas it's probably not needed. 227 while (obj->ContainsForwardingPtr()) 228 obj = obj->GetForwardingPtr(); 229 ASSERT(obj->ContainsNormalLengthWord()); 230 ptr += obj->Length(); 231 } 232 else 233 { 234 ASSERT(obj->ContainsNormalLengthWord()); 235 236 if (obj->IsCodeObject()) 237 { 238 PolyWord *firstConstant = obj->ConstPtrForCode(); 239 PolyWord name = firstConstant[0]; 240 PolyObject *profCount = getProfileObjectForCode(obj); 241 if (profCount) 242 { 243 POLYUNSIGNED count = profCount->Get(0).AsUnsigned(); 244 245 if (count != 0) 246 { 247 if (name != TAGGED(0)) 248 { 249 PPROFENTRY pEnt = newProfileEntry(); 250 if (pEnt == 0) return; 251 pEnt->count = count; 252 pEnt->functionName = name; 253 } 254 255 profCount->Set(0, PolyWord::FromUnsigned(0)); 256 } 257 } 258 } /* code object */ 259 ptr += obj->Length(); 260 } /* else */ 261 } /* while */ 262} 263 264void ProfileRequest::getResults(void) 265// Print profiling information and reset profile counts. 266{ 267 for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) 268 { 269 MemSpace *space = *i; 270 // Permanent areas are filled with objects from the bottom. 271 getProfileResults(space->bottom, space->top); // Bottom to top 272 } 273 for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) 274 { 275 CodeSpace *space = *i; 276 getProfileResults(space->bottom, space->top); 277 } 278 279 { 280 POLYUNSIGNED gc_count = 281 mainThreadCounts[MTP_GCPHASESHARING]+ 282 mainThreadCounts[MTP_GCPHASEMARK]+ 283 mainThreadCounts[MTP_GCPHASECOMPACT] + 284 mainThreadCounts[MTP_GCPHASEUPDATE] + 285 mainThreadCounts[MTP_GCQUICK]; 286 if (gc_count) 287 { 288 PPROFENTRY pEnt = newProfileEntry(); 289 if (pEnt == 0) return; // Report insufficient memory? 290 pEnt->count = gc_count; 291 pEnt->functionName = psGCTotal; 292 } 293 } 294 295 for (unsigned k = 0; k < MTP_MAXENTRY; k++) 296 { 297 if (mainThreadCounts[k]) 298 { 299 PPROFENTRY pEnt = newProfileEntry(); 300 if (pEnt == 0) return; // Report insufficient memory? 301 pEnt->count = mainThreadCounts[k]; 302 pEnt->functionName = psRTSString[k]; 303 mainThreadCounts[k] = 0; 304 } 305 } 306 307 for (unsigned l = 0; l < EST_MAX_ENTRY; l++) 308 { 309 if (extraStoreCounts[l]) 310 { 311 PPROFENTRY pEnt = newProfileEntry(); 312 if (pEnt == 0) return; // Report insufficient memory? 313 pEnt->count = extraStoreCounts[l]; 314 pEnt->functionName = psExtraStrings[l]; 315 extraStoreCounts[l] = 0; 316 } 317 } 318} 319 320// Extract the accumulated results as an ML list of pairs of the count and the string. 321Handle ProfileRequest::extractAsList(TaskData *taskData) 322{ 323 Handle saved = taskData->saveVec.mark(); 324 Handle list = taskData->saveVec.push(ListNull); 325 326 for (PPROFENTRY p = pTab; p != 0; p = p->nextEntry) 327 { 328 Handle pair = alloc_and_save(taskData, 2); 329 Handle countValue = Make_arbitrary_precision(taskData, p->count); 330 pair->WordP()->Set(0, countValue->Word()); 331 pair->WordP()->Set(1, p->functionName); 332 Handle next = alloc_and_save(taskData, sizeof(ML_Cons_Cell) / sizeof(PolyWord)); 333 DEREFLISTHANDLE(next)->h = pair->Word(); 334 DEREFLISTHANDLE(next)->t =list->Word(); 335 336 taskData->saveVec.reset(saved); 337 list = taskData->saveVec.push(next->Word()); 338 } 339 340 return list; 341} 342 343void handleProfileTrap(TaskData *taskData, SIGNALCONTEXT *context) 344{ 345 if (singleThreadProfile != 0 && singleThreadProfile != taskData) 346 return; 347 348 /* If we are in the garbage-collector add the count to "gc_count" 349 otherwise try to find out where we are. */ 350 if (mainThreadPhase == MTP_USER_CODE) 351 { 352 if (taskData == 0 || ! taskData->AddTimeProfileCount(context)) 353 mainThreadCounts[MTP_USER_CODE]++; 354 // On Mac OS X all virtual timer interrupts seem to be directed to the root thread 355 // so all the counts will be "unknown". 356 } 357 else mainThreadCounts[mainThreadPhase]++; 358} 359 360// Called from the GC when allocation profiling is on. 361void AddObjectProfile(PolyObject *obj) 362{ 363 ASSERT(obj->ContainsNormalLengthWord()); 364 POLYUNSIGNED length = obj->Length(); 365 366 if (obj->IsWordObject() && OBJ_HAS_PROFILE(obj->LengthWord())) 367 { 368 // It has a profile pointer. The last word should point to the 369 // closure or code of the allocating function. Add the size of this to the count. 370 ASSERT(length != 0); 371 PolyWord profWord = obj->Get(length-1); 372 ASSERT(profWord.IsDataPtr()); 373 PolyObject *profObject = profWord.AsObjPtr(); 374 ASSERT(profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1); 375 profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + length + 1)); 376 } 377 // If it doesn't have a profile pointer add it to the appropriate count. 378 else if (obj->IsMutable()) 379 { 380 if (obj->IsByteObject()) 381 extraStoreCounts[EST_MUTABLEBYTE] += length+1; 382 else extraStoreCounts[EST_MUTABLE] += length+1; 383 } 384 else if (obj->IsCodeObject()) 385 extraStoreCounts[EST_CODE] += length+1; 386 else if (obj->IsByteObject()) 387 { 388 // Try to separate strings from other byte data. This is only 389 // approximate. 390 if (OBJ_IS_NEGATIVE(obj->LengthWord())) 391 extraStoreCounts[EST_BYTE] += length+1; 392 else 393 { 394 PolyStringObject *possString = (PolyStringObject*)obj; 395 POLYUNSIGNED bytes = length * sizeof(PolyWord); 396 // If the length of the string as given in the first word is sufficient 397 // to fit in the exact number of words then it's probably a string. 398 if (length >= 2 && 399 possString->length <= bytes - sizeof(POLYUNSIGNED) && 400 possString->length > bytes - 2 * sizeof(POLYUNSIGNED)) 401 extraStoreCounts[EST_STRING] += length+1; 402 else 403 { 404 extraStoreCounts[EST_BYTE] += length+1; 405 } 406 } 407 } 408 else extraStoreCounts[EST_WORD] += length+1; 409} 410 411// Called from ML to control profiling. 412static Handle profilerc(TaskData *taskData, Handle mode_handle) 413/* Profiler - generates statistical profiles of the code. 414 The parameter is an integer which determines the value to be profiled. 415 When profiler is called it always resets the profiling and prints out any 416 values which have been accumulated. 417 If the parameter is 0 this is all it does, 418 if the parameter is 1 then it produces time profiling, 419 if the parameter is 2 it produces store profiling. 420 3 - arbitrary precision emulation traps. */ 421{ 422 unsigned mode = get_C_unsigned(taskData, mode_handle->Word()); 423 { 424 // Create any strings we need. We only need to do this once but 425 // it must be done by a non-root thread since it needs a taskData object. 426 // Don't bother locking. At worst we'll create some garbage. 427 for (unsigned k = 0; k < MTP_MAXENTRY; k++) 428 { 429 if (psRTSString[k] == TAGGED(0)) 430 psRTSString[k] = C_string_to_Poly(taskData, mainThreadText[k]); 431 } 432 for (unsigned k = 0; k < EST_MAX_ENTRY; k++) 433 { 434 if (psExtraStrings[k] == TAGGED(0)) 435 psExtraStrings[k] = C_string_to_Poly(taskData, extraStoreText[k]); 436 } 437 if (psGCTotal == TAGGED(0)) 438 psGCTotal = C_string_to_Poly(taskData, "GARBAGE COLLECTION (total)"); 439 } 440 // All these actions are performed by the root thread. Only profile 441 // printing needs to be performed with all the threads stopped but it's 442 // simpler to serialise all requests. 443 ProfileRequest request(mode, taskData); 444 processes->MakeRootRequest(taskData, &request); 445 if (request.errorMessage != 0) raise_exception_string(taskData, EXC_Fail, request.errorMessage); 446 return request.extractAsList(taskData); 447} 448 449POLYUNSIGNED PolyProfiling(PolyObject *threadId, PolyWord mode) 450{ 451 TaskData *taskData = TaskData::FindTaskForId(threadId); 452 ASSERT(taskData != 0); 453 taskData->PreRTSCall(); 454 Handle reset = taskData->saveVec.mark(); 455 Handle pushedMode = taskData->saveVec.push(mode); 456 Handle result = 0; 457 458 try { 459 result = profilerc(taskData, pushedMode); 460 } catch (...) { } // If an ML exception is raised 461 462 taskData->saveVec.reset(reset); 463 taskData->PostRTSCall(); 464 if (result == 0) return TAGGED(0).AsUnsigned(); 465 else return result->Word().AsUnsigned(); 466} 467 468// This is called from the root thread when all the ML threads have been paused. 469void ProfileRequest::Perform() 470{ 471 if (mode != kProfileOff && profileMode != kProfileOff) 472 { 473 // Profiling must be stopped first. 474 errorMessage = "Profiling is currently active"; 475 return; 476 } 477 478 singleThreadProfile = 0; // Unless kProfileTimeThread is given this should be 0 479 480 switch (mode) 481 { 482 case kProfileOff: 483 // Turn off old profiling mechanism and print out accumulated results 484 profileMode = kProfileOff; 485 processes->StopProfiling(); 486 getResults(); 487 // Remove all the bitmaps to free up memory 488 gMem.RemoveProfilingBitmaps(); 489 break; 490 491 case kProfileTimeThread: 492 singleThreadProfile = pCallingThread; 493 // And drop through to kProfileTime 494 495 case kProfileTime: 496 profileMode = kProfileTime; 497 processes->StartProfiling(); 498 break; 499 500 case kProfileStoreAllocation: 501 profileMode = kProfileStoreAllocation; 502 break; 503 504 case kProfileEmulation: 505 profileMode = kProfileEmulation; 506 break; 507 508 case kProfileLiveData: 509 profileMode = kProfileLiveData; 510 break; 511 512 case kProfileLiveMutables: 513 profileMode = kProfileLiveMutables; 514 break; 515 516 case kProfileMutexContention: 517 profileMode = kProfileMutexContention; 518 break; 519 520 default: /* do nothing */ 521 break; 522 } 523 524} 525 526struct _entrypts profilingEPT[] = 527{ 528 // Profiling 529 { "PolyProfiling", (polyRTSFunction)&PolyProfiling}, 530 531 { NULL, NULL} // End of list. 532}; 533 534 535class Profiling: public RtsModule 536{ 537public: 538 virtual void Init(void); 539 virtual void GarbageCollect(ScanAddress *process); 540}; 541 542// Declare this. It will be automatically added to the table. 543static Profiling profileModule; 544 545void Profiling::Init(void) 546{ 547 // Reset profiling counts. 548 profileMode = kProfileOff; 549 for (unsigned k = 0; k < MTP_MAXENTRY; k++) mainThreadCounts[k] = 0; 550} 551 552void Profiling::GarbageCollect(ScanAddress *process) 553{ 554 // Process any strings in the table. 555 for (unsigned k = 0; k < MTP_MAXENTRY; k++) 556 process->ScanRuntimeWord(&psRTSString[k]); 557 for (unsigned k = 0; k < EST_MAX_ENTRY; k++) 558 process->ScanRuntimeWord(&psExtraStrings[k]); 559 process->ScanRuntimeWord(&psGCTotal); 560} 561