1//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Register objects for access by profilers via the perf JIT interface.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
14
15#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
16
17#include "llvm/Support/FileSystem.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/Support/Path.h"
20#include "llvm/Support/Process.h"
21#include "llvm/Support/Threading.h"
22
23#include <mutex>
24#include <optional>
25
26#ifdef __linux__
27
28#include <sys/mman.h> // mmap()
29#include <time.h>     // clock_gettime(), time(), localtime_r() */
30#include <unistd.h>   // for read(), close()
31
32#define DEBUG_TYPE "orc"
33
34// language identifier (XXX: should we generate something better from debug
35// info?)
36#define JIT_LANG "llvm-IR"
37#define LLVM_PERF_JIT_MAGIC                                                    \
38  ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
39   (uint32_t)'D')
40#define LLVM_PERF_JIT_VERSION 1
41
42using namespace llvm;
43using namespace llvm::orc;
44
45struct PerfState {
46  // cache lookups
47  uint32_t Pid;
48
49  // base directory for output data
50  std::string JitPath;
51
52  // output data stream, closed via Dumpstream
53  int DumpFd = -1;
54
55  // output data stream
56  std::unique_ptr<raw_fd_ostream> Dumpstream;
57
58  // perf mmap marker
59  void *MarkerAddr = NULL;
60};
61
62// prevent concurrent dumps from messing up the output file
63static std::mutex Mutex;
64static std::optional<PerfState> State;
65
66struct RecHeader {
67  uint32_t Id;
68  uint32_t TotalSize;
69  uint64_t Timestamp;
70};
71
72struct DIR {
73  RecHeader Prefix;
74  uint64_t CodeAddr;
75  uint64_t NrEntry;
76};
77
78struct DIE {
79  uint64_t CodeAddr;
80  uint32_t Line;
81  uint32_t Discrim;
82};
83
84struct CLR {
85  RecHeader Prefix;
86  uint32_t Pid;
87  uint32_t Tid;
88  uint64_t Vma;
89  uint64_t CodeAddr;
90  uint64_t CodeSize;
91  uint64_t CodeIndex;
92};
93
94struct UWR {
95  RecHeader Prefix;
96  uint64_t UnwindDataSize;
97  uint64_t EhFrameHeaderSize;
98  uint64_t MappedSize;
99};
100
101static inline uint64_t timespec_to_ns(const struct timespec *TS) {
102  const uint64_t NanoSecPerSec = 1000000000;
103  return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
104}
105
106static inline uint64_t perf_get_timestamp() {
107  timespec TS;
108  if (clock_gettime(CLOCK_MONOTONIC, &TS))
109    return 0;
110
111  return timespec_to_ns(&TS);
112}
113
114static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
115  assert(State && "PerfState not initialized");
116  LLVM_DEBUG(dbgs() << "Writing debug record with "
117                    << DebugRecord.Entries.size() << " entries\n");
118  [[maybe_unused]] size_t Written = 0;
119  DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
120                    DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
121          DebugRecord.CodeAddr, DebugRecord.Entries.size()};
122  State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
123  Written += sizeof(Dir);
124  for (auto &Die : DebugRecord.Entries) {
125    DIE d{Die.Addr, Die.Lineno, Die.Discrim};
126    State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
127    State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1);
128    Written += sizeof(d) + Die.Name.size() + 1;
129  }
130  LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
131}
132
133static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
134  assert(State && "PerfState not initialized");
135  uint32_t Tid = get_threadid();
136  LLVM_DEBUG(dbgs() << "Writing code record with code size "
137                    << CodeRecord.CodeSize << " and code index "
138                    << CodeRecord.CodeIndex << "\n");
139  CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
140                    CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
141          State->Pid,
142          Tid,
143          CodeRecord.Vma,
144          CodeRecord.CodeAddr,
145          CodeRecord.CodeSize,
146          CodeRecord.CodeIndex};
147  LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "
148                    << CodeRecord.Name.size() + 1 << " bytes of name, "
149                    << CodeRecord.CodeSize << " bytes of code\n");
150  State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr));
151  State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
152  State->Dumpstream->write((const char *)CodeRecord.CodeAddr,
153                           CodeRecord.CodeSize);
154}
155
156static void
157writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
158  assert(State && "PerfState not initialized");
159  dbgs() << "Writing unwind record with unwind data size "
160         << UnwindRecord.UnwindDataSize << " and EH frame header size "
161         << UnwindRecord.EHFrameHdrSize << " and mapped size "
162         << UnwindRecord.MappedSize << "\n";
163  UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
164                    UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
165          UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
166          UnwindRecord.MappedSize};
167  LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
168                    << UnwindRecord.EHFrameHdrSize
169                    << " bytes of EH frame header, "
170                    << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
171                    << " bytes of EH frame\n");
172  State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr));
173  if (UnwindRecord.EHFrameHdrAddr)
174    State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
175                             UnwindRecord.EHFrameHdrSize);
176  else
177    State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
178                             UnwindRecord.EHFrameHdrSize);
179  State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
180                           UnwindRecord.UnwindDataSize -
181                               UnwindRecord.EHFrameHdrSize);
182}
183
184static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
185  if (!State)
186    return make_error<StringError>("PerfState not initialized",
187                                   inconvertibleErrorCode());
188
189  // Serialize the batch
190  std::lock_guard<std::mutex> Lock(Mutex);
191  if (Batch.UnwindingRecord.Prefix.TotalSize > 0)
192    writeUnwindRecord(Batch.UnwindingRecord);
193
194  for (const auto &DebugInfo : Batch.DebugInfoRecords)
195    writeDebugRecord(DebugInfo);
196
197  for (const auto &CodeLoad : Batch.CodeLoadRecords)
198    writeCodeRecord(CodeLoad);
199
200  State->Dumpstream->flush();
201
202  return Error::success();
203}
204
205struct Header {
206  uint32_t Magic;     // characters "JiTD"
207  uint32_t Version;   // header version
208  uint32_t TotalSize; // total size of header
209  uint32_t ElfMach;   // elf mach target
210  uint32_t Pad1;      // reserved
211  uint32_t Pid;
212  uint64_t Timestamp; // timestamp
213  uint64_t Flags;     // flags
214};
215
216static Error OpenMarker(PerfState &State) {
217  // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
218  // is captured either live (perf record running when we mmap) or in deferred
219  // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
220  // file for more meta data info about the jitted code. Perf report/annotate
221  // detect this special filename and process the jitdump file.
222  //
223  // Mapping must be PROT_EXEC to ensure it is captured by perf record
224  // even when not using -d option.
225  State.MarkerAddr =
226      ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
227             MAP_PRIVATE, State.DumpFd, 0);
228
229  if (State.MarkerAddr == MAP_FAILED)
230    return make_error<llvm::StringError>("could not mmap JIT marker",
231                                         inconvertibleErrorCode());
232
233  return Error::success();
234}
235
236void CloseMarker(PerfState &State) {
237  if (!State.MarkerAddr)
238    return;
239
240  munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate());
241  State.MarkerAddr = nullptr;
242}
243
244static Expected<Header> FillMachine(PerfState &State) {
245  Header Hdr;
246  Hdr.Magic = LLVM_PERF_JIT_MAGIC;
247  Hdr.Version = LLVM_PERF_JIT_VERSION;
248  Hdr.TotalSize = sizeof(Hdr);
249  Hdr.Pid = State.Pid;
250  Hdr.Timestamp = perf_get_timestamp();
251
252  char Id[16];
253  struct {
254    uint16_t e_type;
255    uint16_t e_machine;
256  } Info;
257
258  size_t RequiredMemory = sizeof(Id) + sizeof(Info);
259
260  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
261      MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
262
263  // This'll not guarantee that enough data was actually read from the
264  // underlying file. Instead the trailing part of the buffer would be
265  // zeroed. Given the ELF signature check below that seems ok though,
266  // it's unlikely that the file ends just after that, and the
267  // consequence would just be that perf wouldn't recognize the
268  // signature.
269  if (!MB)
270    return make_error<llvm::StringError>("could not open /proc/self/exe",
271                                         MB.getError());
272
273  memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id));
274  memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info));
275
276  // check ELF signature
277  if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')
278    return make_error<llvm::StringError>("invalid ELF signature",
279                                         inconvertibleErrorCode());
280
281  Hdr.ElfMach = Info.e_machine;
282
283  return Hdr;
284}
285
286static Error InitDebuggingDir(PerfState &State) {
287  time_t Time;
288  struct tm LocalTime;
289  char TimeBuffer[sizeof("YYYYMMDD")];
290  SmallString<64> Path;
291
292  // search for location to dump data to
293  if (const char *BaseDir = getenv("JITDUMPDIR"))
294    Path.append(BaseDir);
295  else if (!sys::path::home_directory(Path))
296    Path = ".";
297
298  // create debug directory
299  Path += "/.debug/jit/";
300  if (auto EC = sys::fs::create_directories(Path)) {
301    std::string ErrStr;
302    raw_string_ostream ErrStream(ErrStr);
303    ErrStream << "could not create jit cache directory " << Path << ": "
304              << EC.message() << "\n";
305    return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
306  }
307
308  // create unique directory for dump data related to this process
309  time(&Time);
310  localtime_r(&Time, &LocalTime);
311  strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
312  Path += JIT_LANG "-jit-";
313  Path += TimeBuffer;
314
315  SmallString<128> UniqueDebugDir;
316
317  using sys::fs::createUniqueDirectory;
318  if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
319    std::string ErrStr;
320    raw_string_ostream ErrStream(ErrStr);
321    ErrStream << "could not create unique jit cache directory "
322              << UniqueDebugDir << ": " << EC.message() << "\n";
323    return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
324  }
325
326  State.JitPath = std::string(UniqueDebugDir);
327
328  return Error::success();
329}
330
331static Error registerJITLoaderPerfStartImpl() {
332  PerfState Tentative;
333  Tentative.Pid = sys::Process::getProcessId();
334  // check if clock-source is supported
335  if (!perf_get_timestamp())
336    return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
337                                   inconvertibleErrorCode());
338
339  if (auto Err = InitDebuggingDir(Tentative))
340    return Err;
341
342  std::string Filename;
343  raw_string_ostream FilenameBuf(Filename);
344  FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";
345
346  // Need to open ourselves, because we need to hand the FD to OpenMarker() and
347  // raw_fd_ostream doesn't expose the FD.
348  using sys::fs::openFileForWrite;
349  if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd,
350                                     sys::fs::CD_CreateNew, sys::fs::OF_None)) {
351    std::string ErrStr;
352    raw_string_ostream ErrStream(ErrStr);
353    ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": "
354              << EC.message() << "\n";
355    return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
356  }
357
358  Tentative.Dumpstream =
359      std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true);
360
361  auto Header = FillMachine(Tentative);
362  if (!Header)
363    return Header.takeError();
364
365  // signal this process emits JIT information
366  if (auto Err = OpenMarker(Tentative))
367    return Err;
368
369  Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()),
370                              sizeof(*Header));
371
372  // Everything initialized, can do profiling now.
373  if (Tentative.Dumpstream->has_error())
374    return make_error<StringError>("could not write JIT dump header",
375                                   inconvertibleErrorCode());
376
377  State = std::move(Tentative);
378  return Error::success();
379}
380
381static Error registerJITLoaderPerfEndImpl() {
382  if (!State)
383    return make_error<StringError>("PerfState not initialized",
384                                   inconvertibleErrorCode());
385
386  RecHeader Close;
387  Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
388  Close.TotalSize = sizeof(Close);
389  Close.Timestamp = perf_get_timestamp();
390  State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
391                           sizeof(Close));
392  if (State->MarkerAddr)
393    CloseMarker(*State);
394
395  State.reset();
396  return Error::success();
397}
398
399extern "C" llvm::orc::shared::CWrapperFunctionResult
400llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
401  using namespace orc::shared;
402  return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
403             Data, Size, registerJITLoaderPerfImpl)
404      .release();
405}
406
407extern "C" llvm::orc::shared::CWrapperFunctionResult
408llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
409  using namespace orc::shared;
410  return WrapperFunction<SPSError()>::handle(Data, Size,
411                                             registerJITLoaderPerfStartImpl)
412      .release();
413}
414
415extern "C" llvm::orc::shared::CWrapperFunctionResult
416llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
417  using namespace orc::shared;
418  return WrapperFunction<SPSError()>::handle(Data, Size,
419                                             registerJITLoaderPerfEndImpl)
420      .release();
421}
422
423#else
424
425using namespace llvm;
426using namespace llvm::orc;
427
428static Error badOS() {
429  using namespace llvm;
430  return llvm::make_error<StringError>(
431      "unsupported OS (perf support is only available on linux!)",
432      inconvertibleErrorCode());
433}
434
435static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
436
437extern "C" llvm::orc::shared::CWrapperFunctionResult
438llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
439  using namespace shared;
440  return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
441                                                                  badOSBatch)
442      .release();
443}
444
445extern "C" llvm::orc::shared::CWrapperFunctionResult
446llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
447  using namespace shared;
448  return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
449}
450
451extern "C" llvm::orc::shared::CWrapperFunctionResult
452llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
453  using namespace shared;
454  return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
455}
456
457#endif
458