1//===-- xray_profile_collector.cc ------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is a part of XRay, a dynamic runtime instrumentation system. 11// 12// This implements the interface for the profileCollectorService. 13// 14//===----------------------------------------------------------------------===// 15#include "xray_profile_collector.h" 16#include "sanitizer_common/sanitizer_common.h" 17#include "xray_allocator.h" 18#include "xray_defs.h" 19#include "xray_profiling_flags.h" 20#include "xray_segmented_array.h" 21#include <memory> 22#include <pthread.h> 23#include <utility> 24 25namespace __xray { 26namespace profileCollectorService { 27 28namespace { 29 30SpinMutex GlobalMutex; 31struct ThreadTrie { 32 tid_t TId; 33 typename std::aligned_storage<sizeof(FunctionCallTrie)>::type TrieStorage; 34}; 35 36struct ProfileBuffer { 37 void *Data; 38 size_t Size; 39}; 40 41// Current version of the profile format. 42constexpr u64 XRayProfilingVersion = 0x20180424; 43 44// Identifier for XRay profiling files 'xrayprof' in hex. 45constexpr u64 XRayMagicBytes = 0x7872617970726f66; 46 47struct XRayProfilingFileHeader { 48 const u64 MagicBytes = XRayMagicBytes; 49 const u64 Version = XRayProfilingVersion; 50 u64 Timestamp = 0; // System time in nanoseconds. 51 u64 PID = 0; // Process ID. 52}; 53 54struct BlockHeader { 55 u32 BlockSize; 56 u32 BlockNum; 57 u64 ThreadId; 58}; 59 60struct ThreadData { 61 BufferQueue *BQ; 62 FunctionCallTrie::Allocators::Buffers Buffers; 63 FunctionCallTrie::Allocators Allocators; 64 FunctionCallTrie FCT; 65 tid_t TId; 66}; 67 68using ThreadDataArray = Array<ThreadData>; 69using ThreadDataAllocator = ThreadDataArray::AllocatorType; 70 71// We use a separate buffer queue for the backing store for the allocator used 72// by the ThreadData array. This lets us host the buffers, allocators, and tries 73// associated with a thread by moving the data into the array instead of 74// attempting to copy the data to a separately backed set of tries. 75static typename std::aligned_storage< 76 sizeof(BufferQueue), alignof(BufferQueue)>::type BufferQueueStorage; 77static BufferQueue *BQ = nullptr; 78static BufferQueue::Buffer Buffer; 79static typename std::aligned_storage<sizeof(ThreadDataAllocator), 80 alignof(ThreadDataAllocator)>::type 81 ThreadDataAllocatorStorage; 82static typename std::aligned_storage<sizeof(ThreadDataArray), 83 alignof(ThreadDataArray)>::type 84 ThreadDataArrayStorage; 85 86static ThreadDataAllocator *TDAllocator = nullptr; 87static ThreadDataArray *TDArray = nullptr; 88 89using ProfileBufferArray = Array<ProfileBuffer>; 90using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType; 91 92// These need to be global aligned storage to avoid dynamic initialization. We 93// need these to be aligned to allow us to placement new objects into the 94// storage, and have pointers to those objects be appropriately aligned. 95static typename std::aligned_storage<sizeof(ProfileBufferArray)>::type 96 ProfileBuffersStorage; 97static typename std::aligned_storage<sizeof(ProfileBufferArrayAllocator)>::type 98 ProfileBufferArrayAllocatorStorage; 99 100static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; 101static ProfileBufferArray *ProfileBuffers = nullptr; 102 103// Use a global flag to determine whether the collector implementation has been 104// initialized. 105static atomic_uint8_t CollectorInitialized{0}; 106 107} // namespace 108 109void post(BufferQueue *Q, FunctionCallTrie &&T, 110 FunctionCallTrie::Allocators &&A, 111 FunctionCallTrie::Allocators::Buffers &&B, 112 tid_t TId) XRAY_NEVER_INSTRUMENT { 113 DCHECK_NE(Q, nullptr); 114 115 // Bail out early if the collector has not been initialized. 116 if (!atomic_load(&CollectorInitialized, memory_order_acquire)) { 117 T.~FunctionCallTrie(); 118 A.~Allocators(); 119 Q->releaseBuffer(B.NodeBuffer); 120 Q->releaseBuffer(B.RootsBuffer); 121 Q->releaseBuffer(B.ShadowStackBuffer); 122 Q->releaseBuffer(B.NodeIdPairBuffer); 123 B.~Buffers(); 124 return; 125 } 126 127 { 128 SpinMutexLock Lock(&GlobalMutex); 129 DCHECK_NE(TDAllocator, nullptr); 130 DCHECK_NE(TDArray, nullptr); 131 132 if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T), 133 TId) == nullptr) { 134 // If we fail to add the data to the array, we should destroy the objects 135 // handed us. 136 T.~FunctionCallTrie(); 137 A.~Allocators(); 138 Q->releaseBuffer(B.NodeBuffer); 139 Q->releaseBuffer(B.RootsBuffer); 140 Q->releaseBuffer(B.ShadowStackBuffer); 141 Q->releaseBuffer(B.NodeIdPairBuffer); 142 B.~Buffers(); 143 } 144 } 145} 146 147// A PathArray represents the function id's representing a stack trace. In this 148// context a path is almost always represented from the leaf function in a call 149// stack to a root of the call trie. 150using PathArray = Array<int32_t>; 151 152struct ProfileRecord { 153 using PathAllocator = typename PathArray::AllocatorType; 154 155 // The Path in this record is the function id's from the leaf to the root of 156 // the function call stack as represented from a FunctionCallTrie. 157 PathArray Path; 158 const FunctionCallTrie::Node *Node; 159}; 160 161namespace { 162 163using ProfileRecordArray = Array<ProfileRecord>; 164 165// Walk a depth-first traversal of each root of the FunctionCallTrie to generate 166// the path(s) and the data associated with the path. 167static void 168populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, 169 const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT { 170 using StackArray = Array<const FunctionCallTrie::Node *>; 171 using StackAllocator = typename StackArray::AllocatorType; 172 StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); 173 StackArray DFSStack(StackAlloc); 174 for (const auto *R : Trie.getRoots()) { 175 DFSStack.Append(R); 176 while (!DFSStack.empty()) { 177 auto *Node = DFSStack.back(); 178 DFSStack.trim(1); 179 if (Node == nullptr) 180 continue; 181 auto Record = PRs.AppendEmplace(PathArray{PA}, Node); 182 if (Record == nullptr) 183 return; 184 DCHECK_NE(Record, nullptr); 185 186 // Traverse the Node's parents and as we're doing so, get the FIds in 187 // the order they appear. 188 for (auto N = Node; N != nullptr; N = N->Parent) 189 Record->Path.Append(N->FId); 190 DCHECK(!Record->Path.empty()); 191 192 for (const auto C : Node->Callees) 193 DFSStack.Append(C.NodePtr); 194 } 195 } 196} 197 198static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, 199 const ProfileRecordArray &ProfileRecords) 200 XRAY_NEVER_INSTRUMENT { 201 auto NextPtr = static_cast<uint8_t *>( 202 internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + 203 sizeof(Header); 204 for (const auto &Record : ProfileRecords) { 205 // List of IDs follow: 206 for (const auto FId : Record.Path) 207 NextPtr = 208 static_cast<uint8_t *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) + 209 sizeof(FId); 210 211 // Add the sentinel here. 212 constexpr int32_t SentinelFId = 0; 213 NextPtr = static_cast<uint8_t *>( 214 internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) + 215 sizeof(SentinelFId); 216 217 // Add the node data here. 218 NextPtr = 219 static_cast<uint8_t *>(internal_memcpy( 220 NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) + 221 sizeof(Record.Node->CallCount); 222 NextPtr = static_cast<uint8_t *>( 223 internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime, 224 sizeof(Record.Node->CumulativeLocalTime))) + 225 sizeof(Record.Node->CumulativeLocalTime); 226 } 227 228 DCHECK_EQ(NextPtr - static_cast<uint8_t *>(Buffer->Data), Buffer->Size); 229} 230 231} // namespace 232 233void serialize() XRAY_NEVER_INSTRUMENT { 234 if (!atomic_load(&CollectorInitialized, memory_order_acquire)) 235 return; 236 237 SpinMutexLock Lock(&GlobalMutex); 238 239 // Clear out the global ProfileBuffers, if it's not empty. 240 for (auto &B : *ProfileBuffers) 241 deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size); 242 ProfileBuffers->trim(ProfileBuffers->size()); 243 244 DCHECK_NE(TDArray, nullptr); 245 if (TDArray->empty()) 246 return; 247 248 // Then repopulate the global ProfileBuffers. 249 u32 I = 0; 250 auto MaxSize = profilingFlags()->global_allocator_max; 251 auto ProfileArena = allocateBuffer(MaxSize); 252 if (ProfileArena == nullptr) 253 return; 254 255 auto ProfileArenaCleanup = at_scope_exit( 256 [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); }); 257 258 auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max); 259 if (PathArena == nullptr) 260 return; 261 262 auto PathArenaCleanup = at_scope_exit( 263 [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); }); 264 265 for (const auto &ThreadTrie : *TDArray) { 266 using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; 267 ProfileRecordAllocator PRAlloc(ProfileArena, 268 profilingFlags()->global_allocator_max); 269 ProfileRecord::PathAllocator PathAlloc( 270 PathArena, profilingFlags()->global_allocator_max); 271 ProfileRecordArray ProfileRecords(PRAlloc); 272 273 // First, we want to compute the amount of space we're going to need. We'll 274 // use a local allocator and an __xray::Array<...> to store the intermediary 275 // data, then compute the size as we're going along. Then we'll allocate the 276 // contiguous space to contain the thread buffer data. 277 if (ThreadTrie.FCT.getRoots().empty()) 278 continue; 279 280 populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT); 281 DCHECK(!ThreadTrie.FCT.getRoots().empty()); 282 DCHECK(!ProfileRecords.empty()); 283 284 // Go through each record, to compute the sizes. 285 // 286 // header size = block size (4 bytes) 287 // + block number (4 bytes) 288 // + thread id (8 bytes) 289 // record size = path ids (4 bytes * number of ids + sentinel 4 bytes) 290 // + call count (8 bytes) 291 // + local time (8 bytes) 292 // + end of record (8 bytes) 293 u32 CumulativeSizes = 0; 294 for (const auto &Record : ProfileRecords) 295 CumulativeSizes += 20 + (4 * Record.Path.size()); 296 297 BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId}; 298 auto B = ProfileBuffers->Append({}); 299 B->Size = sizeof(Header) + CumulativeSizes; 300 B->Data = allocateBuffer(B->Size); 301 DCHECK_NE(B->Data, nullptr); 302 serializeRecords(B, Header, ProfileRecords); 303 } 304} 305 306void reset() XRAY_NEVER_INSTRUMENT { 307 atomic_store(&CollectorInitialized, 0, memory_order_release); 308 SpinMutexLock Lock(&GlobalMutex); 309 310 if (ProfileBuffers != nullptr) { 311 // Clear out the profile buffers that have been serialized. 312 for (auto &B : *ProfileBuffers) 313 deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size); 314 ProfileBuffers->trim(ProfileBuffers->size()); 315 ProfileBuffers = nullptr; 316 } 317 318 if (TDArray != nullptr) { 319 // Release the resources as required. 320 for (auto &TD : *TDArray) { 321 TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer); 322 TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer); 323 TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer); 324 TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer); 325 } 326 // We don't bother destroying the array here because we've already 327 // potentially freed the backing store for the array. Instead we're going to 328 // reset the pointer to nullptr, and re-use the storage later instead 329 // (placement-new'ing into the storage as-is). 330 TDArray = nullptr; 331 } 332 333 if (TDAllocator != nullptr) { 334 TDAllocator->~Allocator(); 335 TDAllocator = nullptr; 336 } 337 338 if (Buffer.Data != nullptr) { 339 BQ->releaseBuffer(Buffer); 340 } 341 342 if (BQ == nullptr) { 343 bool Success = false; 344 new (&BufferQueueStorage) 345 BufferQueue(profilingFlags()->global_allocator_max, 1, Success); 346 if (!Success) 347 return; 348 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 349 } else { 350 BQ->finalize(); 351 352 if (BQ->init(profilingFlags()->global_allocator_max, 1) != 353 BufferQueue::ErrorCode::Ok) 354 return; 355 } 356 357 if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok) 358 return; 359 360 new (&ProfileBufferArrayAllocatorStorage) 361 ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max); 362 ProfileBuffersAllocator = reinterpret_cast<ProfileBufferArrayAllocator *>( 363 &ProfileBufferArrayAllocatorStorage); 364 365 new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator); 366 ProfileBuffers = 367 reinterpret_cast<ProfileBufferArray *>(&ProfileBuffersStorage); 368 369 new (&ThreadDataAllocatorStorage) 370 ThreadDataAllocator(Buffer.Data, Buffer.Size); 371 TDAllocator = 372 reinterpret_cast<ThreadDataAllocator *>(&ThreadDataAllocatorStorage); 373 new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator); 374 TDArray = reinterpret_cast<ThreadDataArray *>(&ThreadDataArrayStorage); 375 376 atomic_store(&CollectorInitialized, 1, memory_order_release); 377} 378 379XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { 380 SpinMutexLock Lock(&GlobalMutex); 381 382 if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0) 383 return {nullptr, 0}; 384 385 static pthread_once_t Once = PTHREAD_ONCE_INIT; 386 static typename std::aligned_storage<sizeof(XRayProfilingFileHeader)>::type 387 FileHeaderStorage; 388 pthread_once( 389 &Once, +[]() XRAY_NEVER_INSTRUMENT { 390 new (&FileHeaderStorage) XRayProfilingFileHeader{}; 391 }); 392 393 if (UNLIKELY(B.Data == nullptr)) { 394 // The first buffer should always contain the file header information. 395 auto &FileHeader = 396 *reinterpret_cast<XRayProfilingFileHeader *>(&FileHeaderStorage); 397 FileHeader.Timestamp = NanoTime(); 398 FileHeader.PID = internal_getpid(); 399 return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)}; 400 } 401 402 if (UNLIKELY(B.Data == &FileHeaderStorage)) 403 return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; 404 405 BlockHeader Header; 406 internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); 407 auto NextBlock = Header.BlockNum + 1; 408 if (NextBlock < ProfileBuffers->size()) 409 return {(*ProfileBuffers)[NextBlock].Data, 410 (*ProfileBuffers)[NextBlock].Size}; 411 return {nullptr, 0}; 412} 413 414} // namespace profileCollectorService 415} // namespace __xray 416