1//===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// Implementation of a simple in-memory log of XRay events. This defines a
12// logging function that's compatible with the XRay handler interface, and
13// routines for exporting data to files.
14//
15//===----------------------------------------------------------------------===//
16
17#include <errno.h>
18#include <fcntl.h>
19#include <pthread.h>
20#include <sys/stat.h>
21#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
22#include <sys/syscall.h>
23#endif
24#include <sys/types.h>
25#include <time.h>
26#include <unistd.h>
27
28#include "sanitizer_common/sanitizer_allocator_internal.h"
29#include "sanitizer_common/sanitizer_libc.h"
30#include "xray/xray_records.h"
31#include "xray_recursion_guard.h"
32#include "xray_basic_flags.h"
33#include "xray_basic_logging.h"
34#include "xray_defs.h"
35#include "xray_flags.h"
36#include "xray_interface_internal.h"
37#include "xray_tsc.h"
38#include "xray_utils.h"
39
40namespace __xray {
41
42static SpinMutex LogMutex;
43
44namespace {
45// We use elements of this type to record the entry TSC of every function ID we
46// see as we're tracing a particular thread's execution.
47struct alignas(16) StackEntry {
48  int32_t FuncId;
49  uint16_t Type;
50  uint8_t CPU;
51  uint8_t Padding;
52  uint64_t TSC;
53};
54
55static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry");
56
57struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
58  void *InMemoryBuffer = nullptr;
59  size_t BufferSize = 0;
60  size_t BufferOffset = 0;
61  void *ShadowStack = nullptr;
62  size_t StackSize = 0;
63  size_t StackEntries = 0;
64  __xray::LogWriter *LogWriter = nullptr;
65};
66
67struct BasicLoggingOptions {
68  int DurationFilterMicros = 0;
69  size_t MaxStackDepth = 0;
70  size_t ThreadBufferSize = 0;
71};
72} // namespace
73
74static pthread_key_t PThreadKey;
75
76static atomic_uint8_t BasicInitialized{0};
77
78struct BasicLoggingOptions GlobalOptions;
79
80thread_local atomic_uint8_t Guard{0};
81
82static atomic_uint8_t UseRealTSC{0};
83static atomic_uint64_t ThresholdTicks{0};
84static atomic_uint64_t TicksPerSec{0};
85static atomic_uint64_t CycleFrequency{NanosecondsPerSecond};
86
87static LogWriter *getLog() XRAY_NEVER_INSTRUMENT {
88  LogWriter* LW = LogWriter::Open();
89  if (LW == nullptr)
90    return LW;
91
92  static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT;
93  pthread_once(&DetectOnce, +[] {
94    if (atomic_load(&UseRealTSC, memory_order_acquire))
95      atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release);
96  });
97
98  // Since we're here, we get to write the header. We set it up so that the
99  // header will only be written once, at the start, and let the threads
100  // logging do writes which just append.
101  XRayFileHeader Header;
102  // Version 2 includes tail exit records.
103  // Version 3 includes pid inside records.
104  Header.Version = 3;
105  Header.Type = FileTypes::NAIVE_LOG;
106  Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire);
107
108  // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
109  // before setting the values in the header.
110  Header.ConstantTSC = 1;
111  Header.NonstopTSC = 1;
112  LW->WriteAll(reinterpret_cast<char *>(&Header),
113               reinterpret_cast<char *>(&Header) + sizeof(Header));
114  return LW;
115}
116
117static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT {
118  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
119  static LogWriter *LW = nullptr;
120  pthread_once(&OnceInit, +[] { LW = getLog(); });
121  return LW;
122}
123
124static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
125  thread_local ThreadLocalData TLD;
126  thread_local bool UNUSED TOnce = [] {
127    if (GlobalOptions.ThreadBufferSize == 0) {
128      if (Verbosity())
129        Report("Not initializing TLD since ThreadBufferSize == 0.\n");
130      return false;
131    }
132    pthread_setspecific(PThreadKey, &TLD);
133    TLD.LogWriter = getGlobalLog();
134    TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>(
135        InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize,
136                      nullptr, alignof(XRayRecord)));
137    TLD.BufferSize = GlobalOptions.ThreadBufferSize;
138    TLD.BufferOffset = 0;
139    if (GlobalOptions.MaxStackDepth == 0) {
140      if (Verbosity())
141        Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n");
142      TLD.StackSize = 0;
143      TLD.StackEntries = 0;
144      TLD.ShadowStack = nullptr;
145      return false;
146    }
147    TLD.ShadowStack = reinterpret_cast<StackEntry *>(
148        InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr,
149                      alignof(StackEntry)));
150    TLD.StackSize = GlobalOptions.MaxStackDepth;
151    TLD.StackEntries = 0;
152    return false;
153  }();
154  return TLD;
155}
156
157template <class RDTSC>
158void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
159                    RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
160  auto &TLD = getThreadLocalData();
161  LogWriter *LW = getGlobalLog();
162  if (LW == nullptr)
163    return;
164
165  // Use a simple recursion guard, to handle cases where we're already logging
166  // and for one reason or another, this function gets called again in the same
167  // thread.
168  RecursionGuard G(Guard);
169  if (!G)
170    return;
171
172  uint8_t CPU = 0;
173  uint64_t TSC = ReadTSC(CPU);
174
175  switch (Type) {
176  case XRayEntryType::ENTRY:
177  case XRayEntryType::LOG_ARGS_ENTRY: {
178    // Short circuit if we've reached the maximum depth of the stack.
179    if (TLD.StackEntries++ >= TLD.StackSize)
180      return;
181
182    // When we encounter an entry event, we keep track of the TSC and the CPU,
183    // and put it in the stack.
184    StackEntry E;
185    E.FuncId = FuncId;
186    E.CPU = CPU;
187    E.Type = Type;
188    E.TSC = TSC;
189    auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
190                         (sizeof(StackEntry) * (TLD.StackEntries - 1));
191    internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry));
192    break;
193  }
194  case XRayEntryType::EXIT:
195  case XRayEntryType::TAIL: {
196    if (TLD.StackEntries == 0)
197      break;
198
199    if (--TLD.StackEntries >= TLD.StackSize)
200      return;
201
202    // When we encounter an exit event, we check whether all the following are
203    // true:
204    //
205    // - The Function ID is the same as the most recent entry in the stack.
206    // - The CPU is the same as the most recent entry in the stack.
207    // - The Delta of the TSCs is less than the threshold amount of time we're
208    //   looking to record.
209    //
210    // If all of these conditions are true, we pop the stack and don't write a
211    // record and move the record offset back.
212    StackEntry StackTop;
213    auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
214                         (sizeof(StackEntry) * TLD.StackEntries);
215    internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry));
216    if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
217        StackTop.TSC < TSC) {
218      auto Delta = TSC - StackTop.TSC;
219      if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) {
220        DCHECK(TLD.BufferOffset > 0);
221        TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
222        return;
223      }
224    }
225    break;
226  }
227  default:
228    // Should be unreachable.
229    DCHECK(false && "Unsupported XRayEntryType encountered.");
230    break;
231  }
232
233  // First determine whether the delta between the function's enter record and
234  // the exit record is higher than the threshold.
235  XRayRecord R;
236  R.RecordType = RecordTypes::NORMAL;
237  R.CPU = CPU;
238  R.TSC = TSC;
239  R.TId = GetTid();
240  R.PId = internal_getpid();
241  R.Type = Type;
242  R.FuncId = FuncId;
243  auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer);
244  internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
245  if (++TLD.BufferOffset == TLD.BufferSize) {
246    SpinMutexLock Lock(&LogMutex);
247    LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
248                 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
249    TLD.BufferOffset = 0;
250    TLD.StackEntries = 0;
251  }
252}
253
254template <class RDTSC>
255void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
256                           RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
257  auto &TLD = getThreadLocalData();
258  auto FirstEntry =
259      reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer);
260  const auto &BuffLen = TLD.BufferSize;
261  LogWriter *LW = getGlobalLog();
262  if (LW == nullptr)
263    return;
264
265  // First we check whether there's enough space to write the data consecutively
266  // in the thread-local buffer. If not, we first flush the buffer before
267  // attempting to write the two records that must be consecutive.
268  if (TLD.BufferOffset + 2 > BuffLen) {
269    SpinMutexLock Lock(&LogMutex);
270    LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
271                 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
272    TLD.BufferOffset = 0;
273    TLD.StackEntries = 0;
274  }
275
276  // Then we write the "we have an argument" record.
277  InMemoryRawLog(FuncId, Type, ReadTSC);
278
279  RecursionGuard G(Guard);
280  if (!G)
281    return;
282
283  // And, from here on write the arg payload.
284  XRayArgPayload R;
285  R.RecordType = RecordTypes::ARG_PAYLOAD;
286  R.FuncId = FuncId;
287  R.TId = GetTid();
288  R.PId = internal_getpid();
289  R.Arg = Arg1;
290  internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
291  if (++TLD.BufferOffset == BuffLen) {
292    SpinMutexLock Lock(&LogMutex);
293    LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
294                 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
295    TLD.BufferOffset = 0;
296    TLD.StackEntries = 0;
297  }
298}
299
300void basicLoggingHandleArg0RealTSC(int32_t FuncId,
301                                   XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
302  InMemoryRawLog(FuncId, Type, readTSC);
303}
304
305void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type)
306    XRAY_NEVER_INSTRUMENT {
307  InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
308    timespec TS;
309    int result = clock_gettime(CLOCK_REALTIME, &TS);
310    if (result != 0) {
311      Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
312      TS = {0, 0};
313    }
314    CPU = 0;
315    return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
316  });
317}
318
319void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type,
320                                   uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
321  InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC);
322}
323
324void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type,
325                                      uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
326  InMemoryRawLogWithArg(
327      FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
328        timespec TS;
329        int result = clock_gettime(CLOCK_REALTIME, &TS);
330        if (result != 0) {
331          Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
332          TS = {0, 0};
333        }
334        CPU = 0;
335        return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
336      });
337}
338
339static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT {
340  ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
341  auto ExitGuard = at_scope_exit([&TLD] {
342    // Clean up dynamic resources.
343    if (TLD.InMemoryBuffer)
344      InternalFree(TLD.InMemoryBuffer);
345    if (TLD.ShadowStack)
346      InternalFree(TLD.ShadowStack);
347    if (Verbosity())
348      Report("Cleaned up log for TID: %llu\n", GetTid());
349  });
350
351  if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) {
352    if (Verbosity())
353      Report("Skipping buffer for TID: %llu; Offset = %zu\n", GetTid(),
354             TLD.BufferOffset);
355    return;
356  }
357
358  {
359    SpinMutexLock L(&LogMutex);
360    TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer),
361                            reinterpret_cast<char *>(TLD.InMemoryBuffer) +
362                            (sizeof(XRayRecord) * TLD.BufferOffset));
363  }
364
365  // Because this thread's exit could be the last one trying to write to
366  // the file and that we're not able to close out the file properly, we
367  // sync instead and hope that the pending writes are flushed as the
368  // thread exits.
369  TLD.LogWriter->Flush();
370}
371
372XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize,
373                                   UNUSED size_t BufferMax, void *Options,
374                                   size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
375  uint8_t Expected = 0;
376  if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1,
377                                      memory_order_acq_rel)) {
378    if (Verbosity())
379      Report("Basic logging already initialized.\n");
380    return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
381  }
382
383  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
384  pthread_once(&OnceInit, +[] {
385    pthread_key_create(&PThreadKey, TLDDestructor);
386    atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release);
387    // Initialize the global TicksPerSec value.
388    atomic_store(&TicksPerSec,
389                 probeRequiredCPUFeatures() ? getTSCFrequency()
390                                            : NanosecondsPerSecond,
391                 memory_order_release);
392    if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity())
393      Report("WARNING: Required CPU features missing for XRay instrumentation, "
394             "using emulation instead.\n");
395  });
396
397  FlagParser P;
398  BasicFlags F;
399  F.setDefaults();
400  registerXRayBasicFlags(&P, &F);
401  P.ParseString(useCompilerDefinedBasicFlags());
402  auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS");
403  if (EnvOpts == nullptr)
404    EnvOpts = "";
405
406  P.ParseString(EnvOpts);
407
408  // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options
409  // set through XRAY_OPTIONS instead.
410  if (internal_strlen(EnvOpts) == 0) {
411    F.func_duration_threshold_us =
412        flags()->xray_naive_log_func_duration_threshold_us;
413    F.max_stack_depth = flags()->xray_naive_log_max_stack_depth;
414    F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size;
415  }
416
417  P.ParseString(static_cast<const char *>(Options));
418  GlobalOptions.ThreadBufferSize = F.thread_buffer_size;
419  GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us;
420  GlobalOptions.MaxStackDepth = F.max_stack_depth;
421  *basicFlags() = F;
422
423  atomic_store(&ThresholdTicks,
424               atomic_load(&TicksPerSec, memory_order_acquire) *
425                   GlobalOptions.DurationFilterMicros / 1000000,
426               memory_order_release);
427  __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire)
428                              ? basicLoggingHandleArg1RealTSC
429                              : basicLoggingHandleArg1EmulateTSC);
430  __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire)
431                         ? basicLoggingHandleArg0RealTSC
432                         : basicLoggingHandleArg0EmulateTSC);
433
434  // TODO: Implement custom event and typed event handling support in Basic
435  // Mode.
436  __xray_remove_customevent_handler();
437  __xray_remove_typedevent_handler();
438
439  return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
440}
441
442XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
443  uint8_t Expected = 0;
444  if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0,
445                                      memory_order_acq_rel) &&
446      Verbosity())
447    Report("Basic logging already finalized.\n");
448
449  // Nothing really to do aside from marking state of the global to be
450  // uninitialized.
451
452  return XRayLogInitStatus::XRAY_LOG_FINALIZED;
453}
454
455XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
456  // This really does nothing, since flushing the logs happen at the end of a
457  // thread's lifetime, or when the buffers are full.
458  return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
459}
460
461// This is a handler that, effectively, does nothing.
462void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
463}
464
465bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
466  XRayLogImpl Impl{
467      basicLoggingInit,
468      basicLoggingFinalize,
469      basicLoggingHandleArg0Empty,
470      basicLoggingFlush,
471  };
472  auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl);
473  if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
474      Verbosity())
475    Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n",
476           RegistrationResult);
477  if (flags()->xray_naive_log ||
478      !internal_strcmp(flags()->xray_mode, "xray-basic")) {
479    auto SelectResult = __xray_log_select_mode("xray-basic");
480    if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
481      if (Verbosity())
482        Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult);
483      return false;
484    }
485
486    // We initialize the implementation using the data we get from the
487    // XRAY_BASIC_OPTIONS environment variable, at this point of the
488    // implementation.
489    auto *Env = GetEnv("XRAY_BASIC_OPTIONS");
490    auto InitResult =
491        __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env);
492    if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
493      if (Verbosity())
494        Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult);
495      return false;
496    }
497
498    // At this point we know that we've successfully initialized Basic mode
499    // tracing, and the only chance we're going to get for the current thread to
500    // clean-up may be at thread/program exit. To ensure that we're going to get
501    // the cleanup even without calling the finalization routines, we're
502    // registering a program exit function that will do the cleanup.
503    static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT;
504    pthread_once(&DynamicOnce, +[] {
505      static void *FakeTLD = nullptr;
506      FakeTLD = &getThreadLocalData();
507      Atexit(+[] { TLDDestructor(FakeTLD); });
508    });
509  }
510  return true;
511}
512
513} // namespace __xray
514
515static auto UNUSED Unused = __xray::basicLogDynamicInitializer();
516