1//===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// Here we implement the Flight Data Recorder mode for XRay, where we use
12// compact structures to store records in memory as well as when writing out the
13// data to files.
14//
15//===----------------------------------------------------------------------===//
16#include "xray_fdr_logging.h"
17#include <cassert>
18#include <errno.h>
19#include <limits>
20#include <memory>
21#include <pthread.h>
22#include <sys/time.h>
23#include <time.h>
24#include <unistd.h>
25
26#include "sanitizer_common/sanitizer_allocator_internal.h"
27#include "sanitizer_common/sanitizer_atomic.h"
28#include "sanitizer_common/sanitizer_common.h"
29#include "xray/xray_interface.h"
30#include "xray/xray_records.h"
31#include "xray_allocator.h"
32#include "xray_buffer_queue.h"
33#include "xray_defs.h"
34#include "xray_fdr_controller.h"
35#include "xray_fdr_flags.h"
36#include "xray_fdr_log_writer.h"
37#include "xray_flags.h"
38#include "xray_recursion_guard.h"
39#include "xray_tsc.h"
40#include "xray_utils.h"
41
42namespace __xray {
43
44static atomic_sint32_t LoggingStatus = {
45    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
46
47namespace {
48
49// Group together thread-local-data in a struct, then hide it behind a function
50// call so that it can be initialized on first use instead of as a global. We
51// force the alignment to 64-bytes for x86 cache line alignment, as this
52// structure is used in the hot path of implementation.
53struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
54  BufferQueue::Buffer Buffer{};
55  BufferQueue *BQ = nullptr;
56
57  using LogWriterStorage =
58      typename std::aligned_storage<sizeof(FDRLogWriter),
59                                    alignof(FDRLogWriter)>::type;
60
61  LogWriterStorage LWStorage;
62  FDRLogWriter *Writer = nullptr;
63
64  using ControllerStorage =
65      typename std::aligned_storage<sizeof(FDRController<>),
66                                    alignof(FDRController<>)>::type;
67  ControllerStorage CStorage;
68  FDRController<> *Controller = nullptr;
69};
70
71} // namespace
72
73static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
74              "ThreadLocalData must be trivially destructible");
75
76// Use a global pthread key to identify thread-local data for logging.
77static pthread_key_t Key;
78
79// Global BufferQueue.
80static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
81static BufferQueue *BQ = nullptr;
82
83// Global thresholds for function durations.
84static atomic_uint64_t ThresholdTicks{0};
85
86// Global for ticks per second.
87static atomic_uint64_t TicksPerSec{0};
88
89static atomic_sint32_t LogFlushStatus = {
90    XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
91
92// This function will initialize the thread-local data structure used by the FDR
93// logging implementation and return a reference to it. The implementation
94// details require a bit of care to maintain.
95//
96// First, some requirements on the implementation in general:
97//
98//   - XRay handlers should not call any memory allocation routines that may
99//     delegate to an instrumented implementation. This means functions like
100//     malloc() and free() should not be called while instrumenting.
101//
102//   - We would like to use some thread-local data initialized on first-use of
103//     the XRay instrumentation. These allow us to implement unsynchronized
104//     routines that access resources associated with the thread.
105//
106// The implementation here uses a few mechanisms that allow us to provide both
107// the requirements listed above. We do this by:
108//
109//   1. Using a thread-local aligned storage buffer for representing the
110//      ThreadLocalData struct. This data will be uninitialized memory by
111//      design.
112//
113//   2. Not requiring a thread exit handler/implementation, keeping the
114//      thread-local as purely a collection of references/data that do not
115//      require cleanup.
116//
117// We're doing this to avoid using a `thread_local` object that has a
118// non-trivial destructor, because the C++ runtime might call std::malloc(...)
119// to register calls to destructors. Deadlocks may arise when, for example, an
120// externally provided malloc implementation is XRay instrumented, and
121// initializing the thread-locals involves calling into malloc. A malloc
122// implementation that does global synchronization might be holding a lock for a
123// critical section, calling a function that might be XRay instrumented (and
124// thus in turn calling into malloc by virtue of registration of the
125// thread_local's destructor).
126#if XRAY_HAS_TLS_ALIGNAS
127static_assert(alignof(ThreadLocalData) >= 64,
128              "ThreadLocalData must be cache line aligned.");
129#endif
130static ThreadLocalData &getThreadLocalData() {
131  thread_local typename std::aligned_storage<
132      sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{};
133
134  if (pthread_getspecific(Key) == NULL) {
135    new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
136    pthread_setspecific(Key, &TLDStorage);
137  }
138
139  return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
140}
141
142static XRayFileHeader &fdrCommonHeaderInfo() {
143  static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
144  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
145  static bool TSCSupported = true;
146  static uint64_t CycleFrequency = NanosecondsPerSecond;
147  pthread_once(
148      &OnceInit, +[] {
149        XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
150        // Version 2 of the log writes the extents of the buffer, instead of
151        // relying on an end-of-buffer record.
152        // Version 3 includes PID metadata record.
153        // Version 4 includes CPU data in the custom event records.
154        // Version 5 uses relative deltas for custom and typed event records,
155        // and removes the CPU data in custom event records (similar to how
156        // function records use deltas instead of full TSCs and rely on other
157        // metadata records for TSC wraparound and CPU migration).
158        H.Version = 5;
159        H.Type = FileTypes::FDR_LOG;
160
161        // Test for required CPU features and cache the cycle frequency
162        TSCSupported = probeRequiredCPUFeatures();
163        if (TSCSupported)
164          CycleFrequency = getTSCFrequency();
165        H.CycleFrequency = CycleFrequency;
166
167        // FIXME: Actually check whether we have 'constant_tsc' and
168        // 'nonstop_tsc' before setting the values in the header.
169        H.ConstantTSC = 1;
170        H.NonstopTSC = 1;
171      });
172  return reinterpret_cast<XRayFileHeader &>(HStorage);
173}
174
175// This is the iterator implementation, which knows how to handle FDR-mode
176// specific buffers. This is used as an implementation of the iterator function
177// needed by __xray_set_buffer_iterator(...). It maintains a global state of the
178// buffer iteration for the currently installed FDR mode buffers. In particular:
179//
180//   - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
181//     then the iterator returns the header information.
182//   - If the argument represents the header information ({address of header
183//     info, size of the header info}) then it returns the first FDR buffer's
184//     address and extents.
185//   - It will keep returning the next buffer and extents as there are more
186//     buffers to process. When the input represents the last buffer, it will
187//     return the initial state to signal completion ({nullptr, 0}).
188//
189// See xray/xray_log_interface.h for more details on the requirements for the
190// implementations of __xray_set_buffer_iterator(...) and
191// __xray_log_process_buffers(...).
192XRayBuffer fdrIterator(const XRayBuffer B) {
193  DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
194  DCHECK(BQ->finalizing());
195
196  if (BQ == nullptr || !BQ->finalizing()) {
197    if (Verbosity())
198      Report(
199          "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
200    return {nullptr, 0};
201  }
202
203  // We use a global scratch-pad for the header information, which only gets
204  // initialized the first time this function is called. We'll update one part
205  // of this information with some relevant data (in particular the number of
206  // buffers to expect).
207  static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage;
208  static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
209  pthread_once(
210      &HeaderOnce, +[] {
211        reinterpret_cast<XRayFileHeader &>(HeaderStorage) =
212            fdrCommonHeaderInfo();
213      });
214
215  // We use a convenience alias for code referring to Header from here on out.
216  auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
217  if (B.Data == nullptr && B.Size == 0) {
218    Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
219    return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
220  }
221
222  static BufferQueue::const_iterator It{};
223  static BufferQueue::const_iterator End{};
224  static uint8_t *CurrentBuffer{nullptr};
225  static size_t SerializedBufferSize = 0;
226  if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
227    // From this point on, we provide raw access to the raw buffer we're getting
228    // from the BufferQueue. We're relying on the iterators from the current
229    // Buffer queue.
230    It = BQ->cbegin();
231    End = BQ->cend();
232  }
233
234  if (CurrentBuffer != nullptr) {
235    deallocateBuffer(CurrentBuffer, SerializedBufferSize);
236    CurrentBuffer = nullptr;
237  }
238
239  if (It == End)
240    return {nullptr, 0};
241
242  // Set up the current buffer to contain the extents like we would when writing
243  // out to disk. The difference here would be that we still write "empty"
244  // buffers, or at least go through the iterators faithfully to let the
245  // handlers see the empty buffers in the queue.
246  //
247  // We need this atomic fence here to ensure that writes happening to the
248  // buffer have been committed before we load the extents atomically. Because
249  // the buffer is not explicitly synchronised across threads, we rely on the
250  // fence ordering to ensure that writes we expect to have been completed
251  // before the fence are fully committed before we read the extents.
252  atomic_thread_fence(memory_order_acquire);
253  auto BufferSize = atomic_load(It->Extents, memory_order_acquire);
254  SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
255  CurrentBuffer = allocateBuffer(SerializedBufferSize);
256  if (CurrentBuffer == nullptr)
257    return {nullptr, 0};
258
259  // Write out the extents as a Metadata Record into the CurrentBuffer.
260  MetadataRecord ExtentsRecord;
261  ExtentsRecord.Type = uint8_t(RecordType::Metadata);
262  ExtentsRecord.RecordKind =
263      uint8_t(MetadataRecord::RecordKinds::BufferExtents);
264  internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize));
265  auto AfterExtents =
266      static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord,
267                                          sizeof(MetadataRecord))) +
268      sizeof(MetadataRecord);
269  internal_memcpy(AfterExtents, It->Data, BufferSize);
270
271  XRayBuffer Result;
272  Result.Data = CurrentBuffer;
273  Result.Size = SerializedBufferSize;
274  ++It;
275  return Result;
276}
277
278// Must finalize before flushing.
279XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
280  if (atomic_load(&LoggingStatus, memory_order_acquire) !=
281      XRayLogInitStatus::XRAY_LOG_FINALIZED) {
282    if (Verbosity())
283      Report("Not flushing log, implementation is not finalized.\n");
284    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
285  }
286
287  s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
288  if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result,
289                                      XRayLogFlushStatus::XRAY_LOG_FLUSHING,
290                                      memory_order_release)) {
291    if (Verbosity())
292      Report("Not flushing log, implementation is still finalizing.\n");
293    return static_cast<XRayLogFlushStatus>(Result);
294  }
295
296  if (BQ == nullptr) {
297    if (Verbosity())
298      Report("Cannot flush when global buffer queue is null.\n");
299    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
300  }
301
302  // We wait a number of milliseconds to allow threads to see that we've
303  // finalised before attempting to flush the log.
304  SleepForMillis(fdrFlags()->grace_period_ms);
305
306  // At this point, we're going to uninstall the iterator implementation, before
307  // we decide to do anything further with the global buffer queue.
308  __xray_log_remove_buffer_iterator();
309
310  // Once flushed, we should set the global status of the logging implementation
311  // to "uninitialized" to allow for FDR-logging multiple runs.
312  auto ResetToUnitialized = at_scope_exit([] {
313    atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
314                 memory_order_release);
315  });
316
317  auto CleanupBuffers = at_scope_exit([] {
318    auto &TLD = getThreadLocalData();
319    if (TLD.Controller != nullptr)
320      TLD.Controller->flush();
321  });
322
323  if (fdrFlags()->no_file_flush) {
324    if (Verbosity())
325      Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
326
327    atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
328                 memory_order_release);
329    return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
330  }
331
332  // We write out the file in the following format:
333  //
334  //   1) We write down the XRay file header with version 1, type FDR_LOG.
335  //   2) Then we use the 'apply' member of the BufferQueue that's live, to
336  //      ensure that at this point in time we write down the buffers that have
337  //      been released (and marked "used") -- we dump the full buffer for now
338  //      (fixed-sized) and let the tools reading the buffers deal with the data
339  //      afterwards.
340  //
341  LogWriter *LW = LogWriter::Open();
342  if (LW == nullptr) {
343    auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
344    atomic_store(&LogFlushStatus, Result, memory_order_release);
345    return Result;
346  }
347
348  XRayFileHeader Header = fdrCommonHeaderInfo();
349  Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
350  LW->WriteAll(reinterpret_cast<char *>(&Header),
351               reinterpret_cast<char *>(&Header) + sizeof(Header));
352
353  // Release the current thread's buffer before we attempt to write out all the
354  // buffers. This ensures that in case we had only a single thread going, that
355  // we are able to capture the data nonetheless.
356  auto &TLD = getThreadLocalData();
357  if (TLD.Controller != nullptr)
358    TLD.Controller->flush();
359
360  BQ->apply([&](const BufferQueue::Buffer &B) {
361    // Starting at version 2 of the FDR logging implementation, we only write
362    // the records identified by the extents of the buffer. We use the Extents
363    // from the Buffer and write that out as the first record in the buffer.  We
364    // still use a Metadata record, but fill in the extents instead for the
365    // data.
366    MetadataRecord ExtentsRecord;
367    auto BufferExtents = atomic_load(B.Extents, memory_order_acquire);
368    DCHECK(BufferExtents <= B.Size);
369    ExtentsRecord.Type = uint8_t(RecordType::Metadata);
370    ExtentsRecord.RecordKind =
371        uint8_t(MetadataRecord::RecordKinds::BufferExtents);
372    internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
373    if (BufferExtents > 0) {
374      LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord),
375                   reinterpret_cast<char *>(&ExtentsRecord) +
376                       sizeof(MetadataRecord));
377      LW->WriteAll(reinterpret_cast<char *>(B.Data),
378                   reinterpret_cast<char *>(B.Data) + BufferExtents);
379    }
380  });
381
382  atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
383               memory_order_release);
384  return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
385}
386
387XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
388  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
389  if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
390                                      XRayLogInitStatus::XRAY_LOG_FINALIZING,
391                                      memory_order_release)) {
392    if (Verbosity())
393      Report("Cannot finalize log, implementation not initialized.\n");
394    return static_cast<XRayLogInitStatus>(CurrentStatus);
395  }
396
397  // Do special things to make the log finalize itself, and not allow any more
398  // operations to be performed until re-initialized.
399  if (BQ == nullptr) {
400    if (Verbosity())
401      Report("Attempting to finalize an uninitialized global buffer!\n");
402  } else {
403    BQ->finalize();
404  }
405
406  atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
407               memory_order_release);
408  return XRayLogInitStatus::XRAY_LOG_FINALIZED;
409}
410
411struct TSCAndCPU {
412  uint64_t TSC = 0;
413  unsigned char CPU = 0;
414};
415
416static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
417  // We want to get the TSC as early as possible, so that we can check whether
418  // we've seen this CPU before. We also do it before we load anything else,
419  // to allow for forward progress with the scheduling.
420  TSCAndCPU Result;
421
422  // Test once for required CPU features
423  static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
424  static bool TSCSupported = true;
425  pthread_once(
426      &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
427
428  if (TSCSupported) {
429    Result.TSC = __xray::readTSC(Result.CPU);
430  } else {
431    // FIXME: This code needs refactoring as it appears in multiple locations
432    timespec TS;
433    int result = clock_gettime(CLOCK_REALTIME, &TS);
434    if (result != 0) {
435      Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
436      TS = {0, 0};
437    }
438    Result.CPU = 0;
439    Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
440  }
441  return Result;
442}
443
444thread_local atomic_uint8_t Running{0};
445
446static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT {
447  // Check if we're finalizing, before proceeding.
448  {
449    auto Status = atomic_load(&LoggingStatus, memory_order_acquire);
450    if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
451        Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) {
452      if (TLD.Controller != nullptr) {
453        TLD.Controller->flush();
454        TLD.Controller = nullptr;
455      }
456      return false;
457    }
458  }
459
460  if (UNLIKELY(TLD.Controller == nullptr)) {
461    // Set up the TLD buffer queue.
462    if (UNLIKELY(BQ == nullptr))
463      return false;
464    TLD.BQ = BQ;
465
466    // Check that we have a valid buffer.
467    if (TLD.Buffer.Generation != BQ->generation() &&
468        TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
469      return false;
470
471    // Set up a buffer, before setting up the log writer. Bail out on failure.
472    if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
473      return false;
474
475    // Set up the Log Writer for this thread.
476    if (UNLIKELY(TLD.Writer == nullptr)) {
477      auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage);
478      new (LWStorage) FDRLogWriter(TLD.Buffer);
479      TLD.Writer = LWStorage;
480    } else {
481      TLD.Writer->resetRecord();
482    }
483
484    auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage);
485    new (CStorage)
486        FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime,
487                        atomic_load_relaxed(&ThresholdTicks));
488    TLD.Controller = CStorage;
489  }
490
491  DCHECK_NE(TLD.Controller, nullptr);
492  return true;
493}
494
495void fdrLoggingHandleArg0(int32_t FuncId,
496                          XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
497  auto TC = getTimestamp();
498  auto &TSC = TC.TSC;
499  auto &CPU = TC.CPU;
500  RecursionGuard Guard{Running};
501  if (!Guard)
502    return;
503
504  auto &TLD = getThreadLocalData();
505  if (!setupTLD(TLD))
506    return;
507
508  switch (Entry) {
509  case XRayEntryType::ENTRY:
510  case XRayEntryType::LOG_ARGS_ENTRY:
511    TLD.Controller->functionEnter(FuncId, TSC, CPU);
512    return;
513  case XRayEntryType::EXIT:
514    TLD.Controller->functionExit(FuncId, TSC, CPU);
515    return;
516  case XRayEntryType::TAIL:
517    TLD.Controller->functionTailExit(FuncId, TSC, CPU);
518    return;
519  case XRayEntryType::CUSTOM_EVENT:
520  case XRayEntryType::TYPED_EVENT:
521    break;
522  }
523}
524
525void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
526                          uint64_t Arg) XRAY_NEVER_INSTRUMENT {
527  auto TC = getTimestamp();
528  auto &TSC = TC.TSC;
529  auto &CPU = TC.CPU;
530  RecursionGuard Guard{Running};
531  if (!Guard)
532    return;
533
534  auto &TLD = getThreadLocalData();
535  if (!setupTLD(TLD))
536    return;
537
538  switch (Entry) {
539  case XRayEntryType::ENTRY:
540  case XRayEntryType::LOG_ARGS_ENTRY:
541    TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg);
542    return;
543  case XRayEntryType::EXIT:
544    TLD.Controller->functionExit(FuncId, TSC, CPU);
545    return;
546  case XRayEntryType::TAIL:
547    TLD.Controller->functionTailExit(FuncId, TSC, CPU);
548    return;
549  case XRayEntryType::CUSTOM_EVENT:
550  case XRayEntryType::TYPED_EVENT:
551    break;
552  }
553}
554
555void fdrLoggingHandleCustomEvent(void *Event,
556                                 std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
557  auto TC = getTimestamp();
558  auto &TSC = TC.TSC;
559  auto &CPU = TC.CPU;
560  RecursionGuard Guard{Running};
561  if (!Guard)
562    return;
563
564  // Complain when we ever get at least one custom event that's larger than what
565  // we can possibly support.
566  if (EventSize >
567      static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
568    static pthread_once_t Once = PTHREAD_ONCE_INIT;
569    pthread_once(
570        &Once, +[] {
571          Report("Custom event size too large; truncating to %d.\n",
572                 std::numeric_limits<int32_t>::max());
573        });
574  }
575
576  auto &TLD = getThreadLocalData();
577  if (!setupTLD(TLD))
578    return;
579
580  int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
581  TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize);
582}
583
584void fdrLoggingHandleTypedEvent(
585    uint16_t EventType, const void *Event,
586    std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT {
587  auto TC = getTimestamp();
588  auto &TSC = TC.TSC;
589  auto &CPU = TC.CPU;
590  RecursionGuard Guard{Running};
591  if (!Guard)
592    return;
593
594  // Complain when we ever get at least one typed event that's larger than what
595  // we can possibly support.
596  if (EventSize >
597      static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
598    static pthread_once_t Once = PTHREAD_ONCE_INIT;
599    pthread_once(
600        &Once, +[] {
601          Report("Typed event size too large; truncating to %d.\n",
602                 std::numeric_limits<int32_t>::max());
603        });
604  }
605
606  auto &TLD = getThreadLocalData();
607  if (!setupTLD(TLD))
608    return;
609
610  int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
611  TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize);
612}
613
614XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
615                                 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
616  if (Options == nullptr)
617    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
618
619  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
620  if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
621                                      XRayLogInitStatus::XRAY_LOG_INITIALIZING,
622                                      memory_order_release)) {
623    if (Verbosity())
624      Report("Cannot initialize already initialized implementation.\n");
625    return static_cast<XRayLogInitStatus>(CurrentStatus);
626  }
627
628  if (Verbosity())
629    Report("Initializing FDR mode with options: %s\n",
630           static_cast<const char *>(Options));
631
632  // TODO: Factor out the flags specific to the FDR mode implementation. For
633  // now, use the global/single definition of the flags, since the FDR mode
634  // flags are already defined there.
635  FlagParser FDRParser;
636  FDRFlags FDRFlags;
637  registerXRayFDRFlags(&FDRParser, &FDRFlags);
638  FDRFlags.setDefaults();
639
640  // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
641  // options until we migrate everyone to use the XRAY_FDR_OPTIONS
642  // compiler-provided options.
643  FDRParser.ParseString(useCompilerDefinedFlags());
644  FDRParser.ParseString(useCompilerDefinedFDRFlags());
645  auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS");
646  if (EnvOpts == nullptr)
647    EnvOpts = "";
648  FDRParser.ParseString(EnvOpts);
649
650  // FIXME: Remove this when we fully remove the deprecated flags.
651  if (internal_strlen(EnvOpts) == 0) {
652    FDRFlags.func_duration_threshold_us =
653        flags()->xray_fdr_log_func_duration_threshold_us;
654    FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
655  }
656
657  // The provided options should always override the compiler-provided and
658  // environment-variable defined options.
659  FDRParser.ParseString(static_cast<const char *>(Options));
660  *fdrFlags() = FDRFlags;
661  auto BufferSize = FDRFlags.buffer_size;
662  auto BufferMax = FDRFlags.buffer_max;
663
664  if (BQ == nullptr) {
665    bool Success = false;
666    BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
667    new (BQ) BufferQueue(BufferSize, BufferMax, Success);
668    if (!Success) {
669      Report("BufferQueue init failed.\n");
670      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
671    }
672  } else {
673    if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) {
674      if (Verbosity())
675        Report("Failed to re-initialize global buffer queue. Init failed.\n");
676      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
677    }
678  }
679
680  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
681  pthread_once(
682      &OnceInit, +[] {
683        atomic_store(&TicksPerSec,
684                     probeRequiredCPUFeatures() ? getTSCFrequency()
685                                                : __xray::NanosecondsPerSecond,
686                     memory_order_release);
687        pthread_key_create(
688            &Key, +[](void *TLDPtr) {
689              if (TLDPtr == nullptr)
690                return;
691              auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
692              if (TLD.BQ == nullptr)
693                return;
694              if (TLD.Buffer.Data == nullptr)
695                return;
696              auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
697              if (EC != BufferQueue::ErrorCode::Ok)
698                Report("At thread exit, failed to release buffer at %p; "
699                       "error=%s\n",
700                       TLD.Buffer.Data, BufferQueue::getErrorString(EC));
701            });
702      });
703
704  atomic_store(&ThresholdTicks,
705               atomic_load_relaxed(&TicksPerSec) *
706                   fdrFlags()->func_duration_threshold_us / 1000000,
707               memory_order_release);
708  // Arg1 handler should go in first to avoid concurrent code accidentally
709  // falling back to arg0 when it should have ran arg1.
710  __xray_set_handler_arg1(fdrLoggingHandleArg1);
711  // Install the actual handleArg0 handler after initialising the buffers.
712  __xray_set_handler(fdrLoggingHandleArg0);
713  __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
714  __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent);
715
716  // Install the buffer iterator implementation.
717  __xray_log_set_buffer_iterator(fdrIterator);
718
719  atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
720               memory_order_release);
721
722  if (Verbosity())
723    Report("XRay FDR init successful.\n");
724  return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
725}
726
727bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
728  XRayLogImpl Impl{
729      fdrLoggingInit,
730      fdrLoggingFinalize,
731      fdrLoggingHandleArg0,
732      fdrLoggingFlush,
733  };
734  auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
735  if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
736      Verbosity()) {
737    Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
738           RegistrationResult);
739    return false;
740  }
741
742  if (flags()->xray_fdr_log ||
743      !internal_strcmp(flags()->xray_mode, "xray-fdr")) {
744    auto SelectResult = __xray_log_select_mode("xray-fdr");
745    if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
746        Verbosity()) {
747      Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
748             SelectResult);
749      return false;
750    }
751  }
752  return true;
753}
754
755} // namespace __xray
756
757static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
758