PerfJITEventListener.cpp revision 360660
1139969Simp//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===// 2139969Simp// 3139969Simp// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41556Srgrimes// See https://llvm.org/LICENSE.txt for license information. 51556Srgrimes// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61556Srgrimes// 71556Srgrimes//===----------------------------------------------------------------------===// 81556Srgrimes// 91556Srgrimes// This file defines a JITEventListener object that tells perf about JITted 101556Srgrimes// functions, including source line information. 111556Srgrimes// 121556Srgrimes// Documentation for perf jit integration is available at: 131556Srgrimes// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt 141556Srgrimes// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt 151556Srgrimes// 161556Srgrimes//===----------------------------------------------------------------------===// 171556Srgrimes 181556Srgrimes#include "llvm/ADT/Twine.h" 191556Srgrimes#include "llvm/Config/config.h" 201556Srgrimes#include "llvm/DebugInfo/DWARF/DWARFContext.h" 211556Srgrimes#include "llvm/ExecutionEngine/JITEventListener.h" 221556Srgrimes#include "llvm/Object/ObjectFile.h" 231556Srgrimes#include "llvm/Object/SymbolSize.h" 241556Srgrimes#include "llvm/Support/Debug.h" 251556Srgrimes#include "llvm/Support/Errno.h" 261556Srgrimes#include "llvm/Support/FileSystem.h" 271556Srgrimes#include "llvm/Support/MemoryBuffer.h" 281556Srgrimes#include "llvm/Support/Mutex.h" 291556Srgrimes#include "llvm/Support/MutexGuard.h" 301556Srgrimes#include "llvm/Support/Path.h" 311556Srgrimes#include "llvm/Support/Process.h" 321556Srgrimes#include "llvm/Support/Threading.h" 331556Srgrimes#include "llvm/Support/raw_ostream.h" 3417987Speter 3550471Speter#include <sys/mman.h> // mmap() 361556Srgrimes#include <sys/types.h> // getpid() 371556Srgrimes#include <time.h> // clock_gettime(), time(), localtime_r() */ 381556Srgrimes#include <unistd.h> // for getpid(), read(), close() 391556Srgrimes 401556Srgrimesusing namespace llvm; 411556Srgrimesusing namespace llvm::object; 421556Srgrimestypedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; 431556Srgrimes 441556Srgrimesnamespace { 451556Srgrimes 461556Srgrimes// language identifier (XXX: should we generate something better from debug 471556Srgrimes// info?) 481556Srgrimes#define JIT_LANG "llvm-IR" 491556Srgrimes#define LLVM_PERF_JIT_MAGIC \ 501556Srgrimes ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ 511556Srgrimes (uint32_t)'D') 521556Srgrimes#define LLVM_PERF_JIT_VERSION 1 531556Srgrimes 541556Srgrimes// bit 0: set if the jitdump file is using an architecture-specific timestamp 551556Srgrimes// clock source 561556Srgrimes#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) 571556Srgrimes 581556Srgrimesstruct LLVMPerfJitHeader; 591556Srgrimes 601556Srgrimesclass PerfJITEventListener : public JITEventListener { 611556Srgrimespublic: 621556Srgrimes PerfJITEventListener(); 631556Srgrimes ~PerfJITEventListener() { 641556Srgrimes if (MarkerAddr) 651556Srgrimes CloseMarker(); 661556Srgrimes } 671556Srgrimes 681556Srgrimes void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj, 691556Srgrimes const RuntimeDyld::LoadedObjectInfo &L) override; 701556Srgrimes void notifyFreeingObject(ObjectKey K) override; 711556Srgrimes 721556Srgrimesprivate: 731556Srgrimes bool InitDebuggingDir(); 74 bool OpenMarker(); 75 void CloseMarker(); 76 static bool FillMachine(LLVMPerfJitHeader &hdr); 77 78 void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr, 79 uint64_t CodeSize); 80 void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines); 81 82 // cache lookups 83 pid_t Pid; 84 85 // base directory for output data 86 std::string JitPath; 87 88 // output data stream, closed via Dumpstream 89 int DumpFd = -1; 90 91 // output data stream 92 std::unique_ptr<raw_fd_ostream> Dumpstream; 93 94 // prevent concurrent dumps from messing up the output file 95 sys::Mutex Mutex; 96 97 // perf mmap marker 98 void *MarkerAddr = NULL; 99 100 // perf support ready 101 bool SuccessfullyInitialized = false; 102 103 // identifier for functions, primarily to identify when moving them around 104 uint64_t CodeGeneration = 1; 105}; 106 107// The following are POD struct definitions from the perf jit specification 108 109enum LLVMPerfJitRecordType { 110 JIT_CODE_LOAD = 0, 111 JIT_CODE_MOVE = 1, // not emitted, code isn't moved 112 JIT_CODE_DEBUG_INFO = 2, 113 JIT_CODE_CLOSE = 3, // not emitted, unnecessary 114 JIT_CODE_UNWINDING_INFO = 4, // not emitted 115 116 JIT_CODE_MAX 117}; 118 119struct LLVMPerfJitHeader { 120 uint32_t Magic; // characters "JiTD" 121 uint32_t Version; // header version 122 uint32_t TotalSize; // total size of header 123 uint32_t ElfMach; // elf mach target 124 uint32_t Pad1; // reserved 125 uint32_t Pid; 126 uint64_t Timestamp; // timestamp 127 uint64_t Flags; // flags 128}; 129 130// record prefix (mandatory in each record) 131struct LLVMPerfJitRecordPrefix { 132 uint32_t Id; // record type identifier 133 uint32_t TotalSize; 134 uint64_t Timestamp; 135}; 136 137struct LLVMPerfJitRecordCodeLoad { 138 LLVMPerfJitRecordPrefix Prefix; 139 140 uint32_t Pid; 141 uint32_t Tid; 142 uint64_t Vma; 143 uint64_t CodeAddr; 144 uint64_t CodeSize; 145 uint64_t CodeIndex; 146}; 147 148struct LLVMPerfJitDebugEntry { 149 uint64_t Addr; 150 int Lineno; // source line number starting at 1 151 int Discrim; // column discriminator, 0 is default 152 // followed by null terminated filename, \xff\0 if same as previous entry 153}; 154 155struct LLVMPerfJitRecordDebugInfo { 156 LLVMPerfJitRecordPrefix Prefix; 157 158 uint64_t CodeAddr; 159 uint64_t NrEntry; 160 // followed by NrEntry LLVMPerfJitDebugEntry records 161}; 162 163static inline uint64_t timespec_to_ns(const struct timespec *ts) { 164 const uint64_t NanoSecPerSec = 1000000000; 165 return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; 166} 167 168static inline uint64_t perf_get_timestamp(void) { 169 struct timespec ts; 170 int ret; 171 172 ret = clock_gettime(CLOCK_MONOTONIC, &ts); 173 if (ret) 174 return 0; 175 176 return timespec_to_ns(&ts); 177} 178 179PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) { 180 // check if clock-source is supported 181 if (!perf_get_timestamp()) { 182 errs() << "kernel does not support CLOCK_MONOTONIC\n"; 183 return; 184 } 185 186 if (!InitDebuggingDir()) { 187 errs() << "could not initialize debugging directory\n"; 188 return; 189 } 190 191 std::string Filename; 192 raw_string_ostream FilenameBuf(Filename); 193 FilenameBuf << JitPath << "/jit-" << Pid << ".dump"; 194 195 // Need to open ourselves, because we need to hand the FD to OpenMarker() and 196 // raw_fd_ostream doesn't expose the FD. 197 using sys::fs::openFileForWrite; 198 if (auto EC = 199 openFileForReadWrite(FilenameBuf.str(), DumpFd, 200 sys::fs::CD_CreateNew, sys::fs::OF_None)) { 201 errs() << "could not open JIT dump file " << FilenameBuf.str() << ": " 202 << EC.message() << "\n"; 203 return; 204 } 205 206 Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true); 207 208 LLVMPerfJitHeader Header = {0}; 209 if (!FillMachine(Header)) 210 return; 211 212 // signal this process emits JIT information 213 if (!OpenMarker()) 214 return; 215 216 // emit dumpstream header 217 Header.Magic = LLVM_PERF_JIT_MAGIC; 218 Header.Version = LLVM_PERF_JIT_VERSION; 219 Header.TotalSize = sizeof(Header); 220 Header.Pid = Pid; 221 Header.Timestamp = perf_get_timestamp(); 222 Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header)); 223 224 // Everything initialized, can do profiling now. 225 if (!Dumpstream->has_error()) 226 SuccessfullyInitialized = true; 227} 228 229void PerfJITEventListener::notifyObjectLoaded( 230 ObjectKey K, const ObjectFile &Obj, 231 const RuntimeDyld::LoadedObjectInfo &L) { 232 233 if (!SuccessfullyInitialized) 234 return; 235 236 OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj); 237 const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); 238 239 // Get the address of the object image for use as a unique identifier 240 std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj); 241 242 // Use symbol info to iterate over functions in the object. 243 for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) { 244 SymbolRef Sym = P.first; 245 std::string SourceFileName; 246 247 Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType(); 248 if (!SymTypeOrErr) { 249 // There's not much we can with errors here 250 consumeError(SymTypeOrErr.takeError()); 251 continue; 252 } 253 SymbolRef::Type SymType = *SymTypeOrErr; 254 if (SymType != SymbolRef::ST_Function) 255 continue; 256 257 Expected<StringRef> Name = Sym.getName(); 258 if (!Name) { 259 consumeError(Name.takeError()); 260 continue; 261 } 262 263 Expected<uint64_t> AddrOrErr = Sym.getAddress(); 264 if (!AddrOrErr) { 265 consumeError(AddrOrErr.takeError()); 266 continue; 267 } 268 uint64_t Size = P.second; 269 object::SectionedAddress Address; 270 Address.Address = *AddrOrErr; 271 272 uint64_t SectionIndex = object::SectionedAddress::UndefSection; 273 if (auto SectOrErr = Sym.getSection()) 274 if (*SectOrErr != Obj.section_end()) 275 SectionIndex = SectOrErr.get()->getIndex(); 276 277 // According to spec debugging info has to come before loading the 278 // corresonding code load. 279 DILineInfoTable Lines = Context->getLineInfoForAddressRange( 280 {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath); 281 282 NotifyDebug(*AddrOrErr, Lines); 283 NotifyCode(Name, *AddrOrErr, Size); 284 } 285 286 Dumpstream->flush(); 287} 288 289void PerfJITEventListener::notifyFreeingObject(ObjectKey K) { 290 // perf currently doesn't have an interface for unloading. But munmap()ing the 291 // code section does, so that's ok. 292} 293 294bool PerfJITEventListener::InitDebuggingDir() { 295 time_t Time; 296 struct tm LocalTime; 297 char TimeBuffer[sizeof("YYYYMMDD")]; 298 SmallString<64> Path; 299 300 // search for location to dump data to 301 if (const char *BaseDir = getenv("JITDUMPDIR")) 302 Path.append(BaseDir); 303 else if (!sys::path::home_directory(Path)) 304 Path = "."; 305 306 // create debug directory 307 Path += "/.debug/jit/"; 308 if (auto EC = sys::fs::create_directories(Path)) { 309 errs() << "could not create jit cache directory " << Path << ": " 310 << EC.message() << "\n"; 311 return false; 312 } 313 314 // create unique directory for dump data related to this process 315 time(&Time); 316 localtime_r(&Time, &LocalTime); 317 strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); 318 Path += JIT_LANG "-jit-"; 319 Path += TimeBuffer; 320 321 SmallString<128> UniqueDebugDir; 322 323 using sys::fs::createUniqueDirectory; 324 if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { 325 errs() << "could not create unique jit cache directory " << UniqueDebugDir 326 << ": " << EC.message() << "\n"; 327 return false; 328 } 329 330 JitPath = UniqueDebugDir.str(); 331 332 return true; 333} 334 335bool PerfJITEventListener::OpenMarker() { 336 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap 337 // is captured either live (perf record running when we mmap) or in deferred 338 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump 339 // file for more meta data info about the jitted code. Perf report/annotate 340 // detect this special filename and process the jitdump file. 341 // 342 // Mapping must be PROT_EXEC to ensure it is captured by perf record 343 // even when not using -d option. 344 MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(), 345 PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0); 346 347 if (MarkerAddr == MAP_FAILED) { 348 errs() << "could not mmap JIT marker\n"; 349 return false; 350 } 351 return true; 352} 353 354void PerfJITEventListener::CloseMarker() { 355 if (!MarkerAddr) 356 return; 357 358 munmap(MarkerAddr, sys::Process::getPageSizeEstimate()); 359 MarkerAddr = nullptr; 360} 361 362bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) { 363 char id[16]; 364 struct { 365 uint16_t e_type; 366 uint16_t e_machine; 367 } info; 368 369 size_t RequiredMemory = sizeof(id) + sizeof(info); 370 371 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 372 MemoryBuffer::getFileSlice("/proc/self/exe", 373 RequiredMemory, 374 0); 375 376 // This'll not guarantee that enough data was actually read from the 377 // underlying file. Instead the trailing part of the buffer would be 378 // zeroed. Given the ELF signature check below that seems ok though, 379 // it's unlikely that the file ends just after that, and the 380 // consequence would just be that perf wouldn't recognize the 381 // signature. 382 if (auto EC = MB.getError()) { 383 errs() << "could not open /proc/self/exe: " << EC.message() << "\n"; 384 return false; 385 } 386 387 memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); 388 memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); 389 390 // check ELF signature 391 if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { 392 errs() << "invalid elf signature\n"; 393 return false; 394 } 395 396 hdr.ElfMach = info.e_machine; 397 398 return true; 399} 400 401void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol, 402 uint64_t CodeAddr, uint64_t CodeSize) { 403 assert(SuccessfullyInitialized); 404 405 // 0 length functions can't have samples. 406 if (CodeSize == 0) 407 return; 408 409 LLVMPerfJitRecordCodeLoad rec; 410 rec.Prefix.Id = JIT_CODE_LOAD; 411 rec.Prefix.TotalSize = sizeof(rec) + // debug record itself 412 Symbol->size() + 1 + // symbol name 413 CodeSize; // and code 414 rec.Prefix.Timestamp = perf_get_timestamp(); 415 416 rec.CodeSize = CodeSize; 417 rec.Vma = 0; 418 rec.CodeAddr = CodeAddr; 419 rec.Pid = Pid; 420 rec.Tid = get_threadid(); 421 422 // avoid interspersing output 423 MutexGuard Guard(Mutex); 424 425 rec.CodeIndex = CodeGeneration++; // under lock! 426 427 Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec)); 428 Dumpstream->write(Symbol->data(), Symbol->size() + 1); 429 Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize); 430} 431 432void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, 433 DILineInfoTable Lines) { 434 assert(SuccessfullyInitialized); 435 436 // Didn't get useful debug info. 437 if (Lines.empty()) 438 return; 439 440 LLVMPerfJitRecordDebugInfo rec; 441 rec.Prefix.Id = JIT_CODE_DEBUG_INFO; 442 rec.Prefix.TotalSize = sizeof(rec); // will be increased further 443 rec.Prefix.Timestamp = perf_get_timestamp(); 444 rec.CodeAddr = CodeAddr; 445 rec.NrEntry = Lines.size(); 446 447 // compute total size size of record (variable due to filenames) 448 DILineInfoTable::iterator Begin = Lines.begin(); 449 DILineInfoTable::iterator End = Lines.end(); 450 for (DILineInfoTable::iterator It = Begin; It != End; ++It) { 451 DILineInfo &line = It->second; 452 rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry); 453 rec.Prefix.TotalSize += line.FileName.size() + 1; 454 } 455 456 // The debug_entry describes the source line information. It is defined as 457 // follows in order: 458 // * uint64_t code_addr: address of function for which the debug information 459 // is generated 460 // * uint32_t line : source file line number (starting at 1) 461 // * uint32_t discrim : column discriminator, 0 is default 462 // * char name[n] : source file name in ASCII, including null termination 463 464 // avoid interspersing output 465 MutexGuard Guard(Mutex); 466 467 Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec)); 468 469 for (DILineInfoTable::iterator It = Begin; It != End; ++It) { 470 LLVMPerfJitDebugEntry LineInfo; 471 DILineInfo &Line = It->second; 472 473 LineInfo.Addr = It->first; 474 // The function re-created by perf is preceded by a elf 475 // header. Need to adjust for that, otherwise the results are 476 // wrong. 477 LineInfo.Addr += 0x40; 478 LineInfo.Lineno = Line.Line; 479 LineInfo.Discrim = Line.Discriminator; 480 481 Dumpstream->write(reinterpret_cast<const char *>(&LineInfo), 482 sizeof(LineInfo)); 483 Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1); 484 } 485} 486 487// There should be only a single event listener per process, otherwise perf gets 488// confused. 489llvm::ManagedStatic<PerfJITEventListener> PerfListener; 490 491} // end anonymous namespace 492 493namespace llvm { 494JITEventListener *JITEventListener::createPerfJITEventListener() { 495 return &*PerfListener; 496} 497 498} // namespace llvm 499 500LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void) 501{ 502 return wrap(JITEventListener::createPerfJITEventListener()); 503} 504